summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/Makefile9
-rw-r--r--arch/arm64/kernel/acpi.c40
-rw-r--r--arch/arm64/kernel/alternative.c19
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c14
-rw-r--r--arch/arm64/kernel/asm-offsets.c3
-rw-r--r--arch/arm64/kernel/compat_alignment.c2
-rw-r--r--arch/arm64/kernel/cpu_errata.c166
-rw-r--r--arch/arm64/kernel/cpufeature.c595
-rw-r--r--arch/arm64/kernel/cpuinfo.c121
-rw-r--r--arch/arm64/kernel/debug-monitors.c263
-rw-r--r--arch/arm64/kernel/efi-header.S6
-rw-r--r--arch/arm64/kernel/efi.c56
-rw-r--r--arch/arm64/kernel/elfcore.c3
-rw-r--r--arch/arm64/kernel/entry-common.c607
-rw-r--r--arch/arm64/kernel/entry-ftrace.S2
-rw-r--r--arch/arm64/kernel/entry.S20
-rw-r--r--arch/arm64/kernel/fpsimd.c542
-rw-r--r--arch/arm64/kernel/ftrace.c22
-rw-r--r--arch/arm64/kernel/head.S8
-rw-r--r--arch/arm64/kernel/hibernate.c2
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c60
-rw-r--r--arch/arm64/kernel/hyp-stub.S8
-rw-r--r--arch/arm64/kernel/image-vars.h98
-rw-r--r--arch/arm64/kernel/irq.c15
-rw-r--r--arch/arm64/kernel/kaslr.c2
-rw-r--r--arch/arm64/kernel/kexec_image.c2
-rw-r--r--arch/arm64/kernel/kgdb.c39
-rw-r--r--arch/arm64/kernel/machine_kexec.c5
-rw-r--r--arch/arm64/kernel/machine_kexec_file.c8
-rw-r--r--arch/arm64/kernel/module-plts.c12
-rw-r--r--arch/arm64/kernel/module.c133
-rw-r--r--arch/arm64/kernel/mpam.c62
-rw-r--r--arch/arm64/kernel/mte.c44
-rw-r--r--arch/arm64/kernel/paravirt.c11
-rw-r--r--arch/arm64/kernel/pi/Makefile4
-rw-r--r--arch/arm64/kernel/pi/idreg-override.c3
-rw-r--r--arch/arm64/kernel/pi/kaslr_early.c4
-rw-r--r--arch/arm64/kernel/pi/map_kernel.c84
-rw-r--r--arch/arm64/kernel/pi/map_range.c28
-rw-r--r--arch/arm64/kernel/pi/patch-scs.c18
-rw-r--r--arch/arm64/kernel/pi/pi.h12
-rw-r--r--arch/arm64/kernel/probes/decode-insn.c7
-rw-r--r--arch/arm64/kernel/probes/kprobes.c46
-rw-r--r--arch/arm64/kernel/probes/kprobes_trampoline.S2
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.c50
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.h3
-rw-r--r--arch/arm64/kernel/probes/uprobes.c68
-rw-r--r--arch/arm64/kernel/process.c181
-rw-r--r--arch/arm64/kernel/proton-pack.c275
-rw-r--r--arch/arm64/kernel/ptrace.c301
-rw-r--r--arch/arm64/kernel/relocate_kernel.S3
-rw-r--r--arch/arm64/kernel/rsi.c31
-rw-r--r--arch/arm64/kernel/sdei.c12
-rw-r--r--arch/arm64/kernel/setup.c14
-rw-r--r--arch/arm64/kernel/signal.c183
-rw-r--r--arch/arm64/kernel/signal32.c11
-rw-r--r--arch/arm64/kernel/smp.c148
-rw-r--r--arch/arm64/kernel/stacktrace.c59
-rw-r--r--arch/arm64/kernel/static_call.c23
-rw-r--r--arch/arm64/kernel/sys32.c2
-rw-r--r--arch/arm64/kernel/sys_compat.c4
-rw-r--r--arch/arm64/kernel/syscall.c15
-rw-r--r--arch/arm64/kernel/topology.c266
-rw-r--r--arch/arm64/kernel/traps.c117
-rw-r--r--arch/arm64/kernel/vdso.c103
-rw-r--r--arch/arm64/kernel/vdso/Makefile5
-rw-r--r--arch/arm64/kernel/vdso/vdso.lds.S7
-rw-r--r--arch/arm64/kernel/vdso32/Makefile18
-rw-r--r--arch/arm64/kernel/vdso32/vdso.lds.S8
-rw-r--r--arch/arm64/kernel/vdso32/vgettimeofday.c5
-rw-r--r--arch/arm64/kernel/vmcore_info.c2
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S48
-rw-r--r--arch/arm64/kernel/watchdog_hld.c58
73 files changed, 3020 insertions, 2207 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 71c29a2a2f19..74b76bb70452 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -34,7 +34,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
syscall.o proton-pack.o idle.o patching.o pi/ \
- rsi.o
+ rsi.o jump_label.o
obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
sys_compat.o
@@ -46,8 +46,8 @@ obj-$(CONFIG_MODULES) += module.o module-plts.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o
obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
-obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o
obj-$(CONFIG_PCI) += pci.o
@@ -68,6 +68,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
+obj-$(CONFIG_ARM64_MPAM) += mpam.o
obj-$(CONFIG_ARM64_MTE) += mte.o
obj-y += vdso-wrap.o
obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
@@ -78,10 +79,10 @@ $(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so
obj-y += probes/
obj-y += head.o
-extra-y += vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
ifeq ($(CONFIG_DEBUG_EFI),y)
-AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
+AFLAGS_head.o += -DVMLINUX_PATH="\"$(abspath vmlinux)\""
endif
# for cleaning
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index e6f66491fbe9..5891f92c2035 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -133,7 +133,7 @@ static int __init acpi_fadt_sanity_check(void)
/*
* FADT is required on arm64; retrieve it to check its presence
- * and carry out revision and ACPI HW reduced compliancy tests
+ * and carry out revision and ACPI HW reduced compliance tests
*/
status = acpi_get_table(ACPI_SIG_FADT, 0, &table);
if (ACPI_FAILURE(status)) {
@@ -252,8 +252,6 @@ done:
*/
acpi_parse_spcr(earlycon_acpi_spcr_enable,
!param_acpi_nospcr);
- pr_info("Use ACPI SPCR as default console: %s\n",
- param_acpi_nospcr ? "No" : "Yes");
if (IS_ENABLED(CONFIG_ACPI_BGRT))
acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
@@ -379,7 +377,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
prot = __acpi_get_writethrough_mem_attribute();
}
}
- return ioremap_prot(phys, size, pgprot_val(prot));
+ return __ioremap_prot(phys, size, prot);
}
/*
@@ -403,7 +401,7 @@ int apei_claim_sea(struct pt_regs *regs)
return_to_irqs_enabled = !irqs_disabled_flags(arch_local_save_flags());
if (regs)
- return_to_irqs_enabled = interrupts_enabled(regs);
+ return_to_irqs_enabled = !regs_irqs_disabled(regs);
/*
* SEA can interrupt SError, mask it and describe this as an NMI so
@@ -425,7 +423,7 @@ int apei_claim_sea(struct pt_regs *regs)
irq_work_run();
__irq_exit();
} else {
- pr_warn_ratelimited("APEI work queued but not completed");
+ pr_warn_ratelimited("APEI work queued but not completed\n");
err = -EINPROGRESS;
}
}
@@ -460,3 +458,33 @@ int acpi_unmap_cpu(int cpu)
}
EXPORT_SYMBOL(acpi_unmap_cpu);
#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
+int acpi_get_cpu_uid(unsigned int cpu, u32 *uid)
+{
+ struct acpi_madt_generic_interrupt *gicc;
+
+ if (cpu >= nr_cpu_ids)
+ return -EINVAL;
+
+ gicc = acpi_cpu_get_madt_gicc(cpu);
+ if (!gicc)
+ return -ENODEV;
+
+ *uid = gicc->uid;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_get_cpu_uid);
+
+int get_cpu_for_acpi_id(u32 uid)
+{
+ u32 cpu_uid;
+ int ret;
+
+ for (int cpu = 0; cpu < nr_cpu_ids; cpu++) {
+ ret = acpi_get_cpu_uid(cpu, &cpu_uid);
+ if (ret == 0 && uid == cpu_uid)
+ return cpu;
+ }
+
+ return -EINVAL;
+}
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 8ff6610af496..f5ec7e7c1d3f 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -139,9 +139,9 @@ static noinstr void clean_dcache_range_nopatch(u64 start, u64 end)
} while (cur += d_size, cur < end);
}
-static void __apply_alternatives(const struct alt_region *region,
- bool is_module,
- unsigned long *cpucap_mask)
+static int __apply_alternatives(const struct alt_region *region,
+ bool is_module,
+ unsigned long *cpucap_mask)
{
struct alt_instr *alt;
__le32 *origptr, *updptr;
@@ -166,10 +166,13 @@ static void __apply_alternatives(const struct alt_region *region,
updptr = is_module ? origptr : lm_alias(origptr);
nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
- if (ALT_HAS_CB(alt))
+ if (ALT_HAS_CB(alt)) {
alt_cb = ALT_REPL_PTR(alt);
- else
+ if (is_module && !core_kernel_text((unsigned long)alt_cb))
+ return -ENOEXEC;
+ } else {
alt_cb = patch_alternative;
+ }
alt_cb(alt, origptr, updptr, nr_inst);
@@ -193,6 +196,8 @@ static void __apply_alternatives(const struct alt_region *region,
bitmap_and(applied_alternatives, applied_alternatives,
system_cpucaps, ARM64_NCAPS);
}
+
+ return 0;
}
static void __init apply_alternatives_vdso(void)
@@ -277,7 +282,7 @@ void __init apply_boot_alternatives(void)
}
#ifdef CONFIG_MODULES
-void apply_alternatives_module(void *start, size_t length)
+int apply_alternatives_module(void *start, size_t length)
{
struct alt_region region = {
.begin = start,
@@ -287,7 +292,7 @@ void apply_alternatives_module(void *start, size_t length)
bitmap_fill(all_capabilities, ARM64_NCAPS);
- __apply_alternatives(&region, true, &all_capabilities[0]);
+ return __apply_alternatives(&region, true, &all_capabilities[0]);
}
#endif
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index e737c6295ec7..b7a1f8b788bb 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -610,6 +610,20 @@ static int __init armv8_deprecated_init(void)
}
#endif
+
+#ifdef CONFIG_SWP_EMULATION
+ /*
+ * The purpose of supporting LSUI is to eliminate PAN toggling. CPUs
+ * that support LSUI are unlikely to support a 32-bit runtime. Rather
+ * than emulating the SWP instruction using LSUI instructions, simply
+ * disable SWP emulation.
+ */
+ if (cpus_have_final_cap(ARM64_HAS_LSUI)) {
+ insn_swp.status = INSN_UNAVAILABLE;
+ pr_info("swp/swpb instruction emulation is not supported on this system\n");
+ }
+#endif
+
for (int i = 0; i < ARRAY_SIZE(insn_emulations); i++) {
struct insn_emulation *ie = insn_emulations[i];
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index eb1a840e4110..b6367ff3a49c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -6,6 +6,7 @@
* 2001-2002 Keith Owens
* Copyright (C) 2012 ARM Ltd.
*/
+#define COMPILE_OFFSETS
#include <linux/arm_sdei.h>
#include <linux/sched.h>
@@ -182,5 +183,7 @@ int main(void)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call));
#endif
+ DEFINE(PIE_E0_ASM, PIE_E0);
+ DEFINE(PIE_E1_ASM, PIE_E1);
return 0;
}
diff --git a/arch/arm64/kernel/compat_alignment.c b/arch/arm64/kernel/compat_alignment.c
index deff21bfa680..b68e1d328d4c 100644
--- a/arch/arm64/kernel/compat_alignment.c
+++ b/arch/arm64/kernel/compat_alignment.c
@@ -368,6 +368,8 @@ int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs)
return 1;
}
+ if (!handler)
+ return 1;
type = handler(addr, instr, regs);
if (type == TYPE_ERROR || type == TYPE_FAULT)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 7ce555862895..5c0ab6bfd44a 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -14,31 +14,85 @@
#include <asm/kvm_asm.h>
#include <asm/smp_plat.h>
+static u64 target_impl_cpu_num;
+static struct target_impl_cpu *target_impl_cpus;
+
+bool cpu_errata_set_target_impl(u64 num, void *impl_cpus)
+{
+ if (target_impl_cpu_num || !num || !impl_cpus)
+ return false;
+
+ target_impl_cpu_num = num;
+ target_impl_cpus = impl_cpus;
+ return true;
+}
+
+static inline bool is_midr_in_range(struct midr_range const *range)
+{
+ int i;
+
+ if (!target_impl_cpu_num)
+ return midr_is_cpu_model_range(read_cpuid_id(), range->model,
+ range->rv_min, range->rv_max);
+
+ for (i = 0; i < target_impl_cpu_num; i++) {
+ if (midr_is_cpu_model_range(target_impl_cpus[i].midr,
+ range->model,
+ range->rv_min, range->rv_max))
+ return true;
+ }
+ return false;
+}
+
+bool is_midr_in_range_list(struct midr_range const *ranges)
+{
+ while (ranges->model)
+ if (is_midr_in_range(ranges++))
+ return true;
+ return false;
+}
+EXPORT_SYMBOL_GPL(is_midr_in_range_list);
+
static bool __maybe_unused
-is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
+__is_affected_midr_range(const struct arm64_cpu_capabilities *entry,
+ u32 midr, u32 revidr)
{
const struct arm64_midr_revidr *fix;
- u32 midr = read_cpuid_id(), revidr;
-
- WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- if (!is_midr_in_range(midr, &entry->midr_range))
+ if (!is_midr_in_range(&entry->midr_range))
return false;
midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK;
- revidr = read_cpuid(REVIDR_EL1);
for (fix = entry->fixed_revs; fix && fix->revidr_mask; fix++)
if (midr == fix->midr_rv && (revidr & fix->revidr_mask))
return false;
-
return true;
}
static bool __maybe_unused
+is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ int i;
+
+ if (!target_impl_cpu_num) {
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+ return __is_affected_midr_range(entry, read_cpuid_id(),
+ read_cpuid(REVIDR_EL1));
+ }
+
+ for (i = 0; i < target_impl_cpu_num; i++) {
+ if (__is_affected_midr_range(entry, target_impl_cpus[i].midr,
+ target_impl_cpus[i].midr))
+ return true;
+ }
+ return false;
+}
+
+static bool __maybe_unused
is_affected_midr_range_list(const struct arm64_cpu_capabilities *entry,
int scope)
{
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- return is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list);
+ return is_midr_in_range_list(entry->midr_range_list);
}
static bool __maybe_unused
@@ -87,6 +141,30 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry,
return (ctr_real != sys) && (ctr_raw != sys);
}
+#ifdef CONFIG_ARM64_ERRATUM_4311569
+static DEFINE_STATIC_KEY_FALSE(arm_si_l1_workaround_4311569);
+static int __init early_arm_si_l1_workaround_4311569_cfg(char *arg)
+{
+ static_branch_enable(&arm_si_l1_workaround_4311569);
+ pr_info("Enabling cache maintenance workaround for ARM SI-L1 erratum 4311569\n");
+
+ return 0;
+}
+early_param("arm_si_l1_workaround_4311569", early_arm_si_l1_workaround_4311569_cfg);
+
+/*
+ * We have some earlier use cases to call cache maintenance operation functions, for example,
+ * dcache_inval_poc() and dcache_clean_poc() in head.S, before making decision to turn on this
+ * workaround. Since the scope of this workaround is limited to non-coherent DMA agents, its
+ * safe to have the workaround off by default.
+ */
+static bool
+need_arm_si_l1_workaround_4311569(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ return static_branch_unlikely(&arm_si_l1_workaround_4311569);
+}
+#endif
+
static void
cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
{
@@ -186,12 +264,48 @@ static bool __maybe_unused
has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
int scope)
{
- u32 midr = read_cpuid_id();
bool has_dic = read_cpuid_cachetype() & BIT(CTR_EL0_DIC_SHIFT);
const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1);
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- return is_midr_in_range(midr, &range) && has_dic;
+ return is_midr_in_range(&range) && has_dic;
+}
+
+static const struct midr_range impdef_pmuv3_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
+ {},
+};
+
+static bool has_impdef_pmuv3(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+ unsigned int pmuver;
+
+ if (!is_kernel_in_hyp_mode())
+ return false;
+
+ pmuver = cpuid_feature_extract_unsigned_field(dfr0,
+ ID_AA64DFR0_EL1_PMUVer_SHIFT);
+ if (pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+ return false;
+
+ return is_midr_in_range_list(impdef_pmuv3_cpus);
+}
+
+static void cpu_enable_impdef_pmuv3_traps(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set_s(SYS_HACR_EL2, 0, BIT(56));
}
#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
@@ -245,7 +359,7 @@ static const struct midr_range cavium_erratum_23154_cpus[] = {
#endif
#ifdef CONFIG_CAVIUM_ERRATUM_27456
-const struct midr_range cavium_erratum_27456_cpus[] = {
+static const struct midr_range cavium_erratum_27456_cpus[] = {
/* Cavium ThunderX, T88 pass 1.x - 2.1 */
MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1),
/* Cavium ThunderX, T81 pass 1.0 */
@@ -441,6 +555,7 @@ static const struct midr_range erratum_spec_ssbs_list[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A715),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A725),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
@@ -455,6 +570,7 @@ static const struct midr_range erratum_spec_ssbs_list[] = {
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3AE),
{}
};
#endif
@@ -467,6 +583,13 @@ static const struct midr_range erratum_ac03_cpu_38_list[] = {
};
#endif
+#ifdef CONFIG_AMPERE_ERRATUM_AC04_CPU_23
+static const struct midr_range erratum_ac04_cpu_23_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {},
+};
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
@@ -771,6 +894,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
ERRATA_MIDR_RANGE_LIST(erratum_spec_ssbs_list),
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_4311569
+ {
+ .capability = ARM64_WORKAROUND_4311569,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = need_arm_si_l1_workaround_4311569,
+ },
+#endif
#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
{
.desc = "ARM errata 2966298, 3117295",
@@ -786,6 +916,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
ERRATA_MIDR_RANGE_LIST(erratum_ac03_cpu_38_list),
},
#endif
+#ifdef CONFIG_AMPERE_ERRATUM_AC04_CPU_23
+ {
+ .desc = "AmpereOne erratum AC04_CPU_23",
+ .capability = ARM64_WORKAROUND_AMPERE_AC04_CPU_23,
+ ERRATA_MIDR_RANGE_LIST(erratum_ac04_cpu_23_list),
+ },
+#endif
{
.desc = "Broken CNTVOFF_EL2",
.capability = ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF,
@@ -795,5 +932,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
})),
},
{
+ .desc = "Apple IMPDEF PMUv3 Traps",
+ .capability = ARM64_WORKAROUND_PMUV3_IMPDEF_TRAPS,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = has_impdef_pmuv3,
+ .cpu_enable = cpu_enable_impdef_pmuv3_traps,
+ },
+ {
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index d561cf3b8ac7..6d53bb15cf7b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -77,6 +77,7 @@
#include <linux/percpu.h>
#include <linux/sched/isolation.h>
+#include <asm/arm_pmuv3.h>
#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
@@ -84,8 +85,11 @@
#include <asm/hwcap.h>
#include <asm/insn.h>
#include <asm/kvm_host.h>
+#include <asm/mmu.h>
#include <asm/mmu_context.h>
+#include <asm/mpam.h>
#include <asm/mte.h>
+#include <asm/hypervisor.h>
#include <asm/processor.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
@@ -93,6 +97,7 @@
#include <asm/vectors.h>
#include <asm/virt.h>
+#include <asm/spectre.h>
/* Kernel representation of AT_HWCAP and AT_HWCAP2 */
static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly;
@@ -113,7 +118,14 @@ static struct arm64_cpu_capabilities const __ro_after_init *cpucap_ptrs[ARM64_NC
DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS);
-bool arm64_use_ng_mappings = false;
+/*
+ * arm64_use_ng_mappings must be placed in the .data section, otherwise it
+ * ends up in the .bss section where it is initialized in early_map_kernel()
+ * after the MMU (with the idmap) was enabled. create_init_idmap() - which
+ * runs before early_map_kernel() and reads the variable via PTE_MAYBE_NG -
+ * may end up generating an incorrect idmap page table attributes.
+ */
+bool arm64_use_ng_mappings __read_mostly = false;
EXPORT_SYMBOL(arm64_use_ng_mappings);
DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors;
@@ -230,6 +242,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_LS64_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_XS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0),
@@ -270,6 +283,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
static const struct arm64_ftr_bits ftr_id_aa64isar3[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FPRCVT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_LSUI_SHIFT, 4, ID_AA64ISAR3_EL1_LSUI_NI),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_LSFE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FAMINMAX_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -295,8 +310,10 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_DF2_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_GCS),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_GCS_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MTE_frac_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0),
@@ -311,6 +328,9 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
static const struct arm64_ftr_bits ftr_id_aa64pfr2[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_FPMR_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_GCIE_SHIFT, 4, ID_AA64PFR2_EL1_GCIE_NI),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_MTEFAR_SHIFT, 4, ID_AA64PFR2_EL1_MTEFAR_NI),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_MTESTOREONLY_SHIFT, 4, ID_AA64PFR2_EL1_MTESTOREONLY_NI),
ARM64_FTR_END,
};
@@ -491,12 +511,14 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_POE),
FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1POE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_SCTLRX_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr4[] = {
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_E2H0_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_NV_frac_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -547,7 +569,7 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
* We can instantiate multiple PMU instances with different levels
* of support.
*/
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_EL1_PMUVer_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_EL1_PMUVer_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_EL1_DebugVer_SHIFT, 4, 0x6),
ARM64_FTR_END,
};
@@ -691,7 +713,7 @@ static const struct arm64_ftr_bits ftr_id_pfr2[] = {
static const struct arm64_ftr_bits ftr_id_dfr0[] = {
/* [31:28] TraceFilt */
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_EL1_PerfMon_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_EL1_PerfMon_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_MProfDbg_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_MMapTrc_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_CopTrc_SHIFT, 4, 0),
@@ -756,17 +778,17 @@ static const struct arm64_ftr_bits ftr_raz[] = {
#define ARM64_FTR_REG(id, table) \
__ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
-struct arm64_ftr_override id_aa64mmfr0_override;
-struct arm64_ftr_override id_aa64mmfr1_override;
-struct arm64_ftr_override id_aa64mmfr2_override;
-struct arm64_ftr_override id_aa64pfr0_override;
-struct arm64_ftr_override id_aa64pfr1_override;
-struct arm64_ftr_override id_aa64zfr0_override;
-struct arm64_ftr_override id_aa64smfr0_override;
-struct arm64_ftr_override id_aa64isar1_override;
-struct arm64_ftr_override id_aa64isar2_override;
+struct arm64_ftr_override __read_mostly id_aa64mmfr0_override;
+struct arm64_ftr_override __read_mostly id_aa64mmfr1_override;
+struct arm64_ftr_override __read_mostly id_aa64mmfr2_override;
+struct arm64_ftr_override __read_mostly id_aa64pfr0_override;
+struct arm64_ftr_override __read_mostly id_aa64pfr1_override;
+struct arm64_ftr_override __read_mostly id_aa64zfr0_override;
+struct arm64_ftr_override __read_mostly id_aa64smfr0_override;
+struct arm64_ftr_override __read_mostly id_aa64isar1_override;
+struct arm64_ftr_override __read_mostly id_aa64isar2_override;
-struct arm64_ftr_override arm64_sw_feature_override;
+struct arm64_ftr_override __read_mostly arm64_sw_feature_override;
static const struct __ftr_reg_entry {
u32 sys_id;
@@ -986,7 +1008,7 @@ static void __init sort_ftr_regs(void)
/*
* Initialise the CPU feature register from Boot CPU values.
- * Also initiliases the strict_mask for the register.
+ * Also initialises the strict_mask for the register.
* Any bits that are not covered by an arm64_ftr_bits entry are considered
* RES0 for the system-wide value, and must strictly match.
*/
@@ -1189,8 +1211,10 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
cpacr_restore(cpacr);
}
- if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0))
+ if (id_aa64pfr0_mpam(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+ info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
init_cpu_ftr_reg(SYS_MPAMIDR_EL1, info->reg_mpamidr);
+ }
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
@@ -1401,6 +1425,8 @@ void update_cpu_features(int cpu,
info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2);
taint |= check_update_ftr_reg(SYS_ID_AA64MMFR3_EL1, cpu,
info->reg_id_aa64mmfr3, boot->reg_id_aa64mmfr3);
+ taint |= check_update_ftr_reg(SYS_ID_AA64MMFR4_EL1, cpu,
+ info->reg_id_aa64mmfr4, boot->reg_id_aa64mmfr4);
taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu,
info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0);
@@ -1441,7 +1467,8 @@ void update_cpu_features(int cpu,
cpacr_restore(cpacr);
}
- if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) {
+ if (id_aa64pfr0_mpam(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+ info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
taint |= check_update_ftr_reg(SYS_MPAMIDR_EL1, cpu,
info->reg_mpamidr, boot->reg_mpamidr);
}
@@ -1647,7 +1674,7 @@ const struct cpumask *system_32bit_el0_cpumask(void)
const struct cpumask *task_cpu_fallback_mask(struct task_struct *p)
{
- return __task_cpu_possible_mask(p, housekeeping_cpumask(HK_TYPE_TICK));
+ return __task_cpu_possible_mask(p, housekeeping_cpumask(HK_TYPE_DOMAIN));
}
static int __init parse_32bit_el0_param(char *str)
@@ -1792,7 +1819,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
char const *str = "kpti command line option";
bool meltdown_safe;
- meltdown_safe = is_midr_in_range_list(read_cpuid_id(), kpti_safe_list);
+ meltdown_safe = is_midr_in_range_list(kpti_safe_list);
/* Defer to CPU feature registers */
if (has_cpuid_feature(entry, scope))
@@ -1862,7 +1889,7 @@ static bool has_nv1(const struct arm64_cpu_capabilities *entry, int scope)
return (__system_matches_cap(ARM64_HAS_NESTED_VIRT) &&
!(has_cpuid_feature(entry, scope) ||
- is_midr_in_range_list(read_cpuid_id(), nv1_ni_list)));
+ is_midr_in_range_list(nv1_ni_list)));
}
#if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2)
@@ -1898,102 +1925,18 @@ static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
}
#endif
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT))
-
-extern
-void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
- phys_addr_t size, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int), int flags);
-
-static phys_addr_t __initdata kpti_ng_temp_alloc;
-
-static phys_addr_t __init kpti_ng_pgd_alloc(int shift)
+#ifdef CONFIG_HW_PERF_EVENTS
+static bool has_pmuv3(const struct arm64_cpu_capabilities *entry, int scope)
{
- kpti_ng_temp_alloc -= PAGE_SIZE;
- return kpti_ng_temp_alloc;
-}
-
-static int __init __kpti_install_ng_mappings(void *__unused)
-{
- typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long);
- extern kpti_remap_fn idmap_kpti_install_ng_mappings;
- kpti_remap_fn *remap_fn;
-
- int cpu = smp_processor_id();
- int levels = CONFIG_PGTABLE_LEVELS;
- int order = order_base_2(levels);
- u64 kpti_ng_temp_pgd_pa = 0;
- pgd_t *kpti_ng_temp_pgd;
- u64 alloc = 0;
-
- if (levels == 5 && !pgtable_l5_enabled())
- levels = 4;
- else if (levels == 4 && !pgtable_l4_enabled())
- levels = 3;
-
- remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
-
- if (!cpu) {
- alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order);
- kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE);
- kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd);
-
- //
- // Create a minimal page table hierarchy that permits us to map
- // the swapper page tables temporarily as we traverse them.
- //
- // The physical pages are laid out as follows:
- //
- // +--------+-/-------+-/------ +-/------ +-\\\--------+
- // : PTE[] : | PMD[] : | PUD[] : | P4D[] : ||| PGD[] :
- // +--------+-\-------+-\------ +-\------ +-///--------+
- // ^
- // The first page is mapped into this hierarchy at a PMD_SHIFT
- // aligned virtual address, so that we can manipulate the PTE
- // level entries while the mapping is active. The first entry
- // covers the PTE[] page itself, the remaining entries are free
- // to be used as a ad-hoc fixmap.
- //
- create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc),
- KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL,
- kpti_ng_pgd_alloc, 0);
- }
+ u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+ unsigned int pmuver;
- cpu_install_idmap();
- remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA);
- cpu_uninstall_idmap();
+ pmuver = cpuid_feature_extract_unsigned_field(dfr0,
+ ID_AA64DFR0_EL1_PMUVer_SHIFT);
- if (!cpu) {
- free_pages(alloc, order);
- arm64_use_ng_mappings = true;
- }
-
- return 0;
+ return pmuv3_implemented(pmuver);
}
-
-static void __init kpti_install_ng_mappings(void)
-{
- /* Check whether KPTI is going to be used */
- if (!arm64_kernel_unmapped_at_el0())
- return;
-
- /*
- * We don't need to rewrite the page-tables if either we've done
- * it already or we have KASLR enabled and therefore have not
- * created any global mappings at all.
- */
- if (arm64_use_ng_mappings)
- return;
-
- stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask);
-}
-
-#else
-static inline void kpti_install_ng_mappings(void)
-{
-}
-#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+#endif
static void cpu_enable_kpti(struct arm64_cpu_capabilities const *cap)
{
@@ -2023,7 +1966,7 @@ static struct cpumask dbm_cpus __read_mostly;
static inline void __cpu_enable_hw_dbm(void)
{
- u64 tcr = read_sysreg(tcr_el1) | TCR_HD;
+ u64 tcr = read_sysreg(tcr_el1) | TCR_EL1_HD;
write_sysreg(tcr, tcr_el1);
isb();
@@ -2045,7 +1988,7 @@ static bool cpu_has_broken_dbm(void)
{},
};
- return is_midr_in_range_list(read_cpuid_id(), cpus);
+ return is_midr_in_range_list(cpus);
}
static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap)
@@ -2162,7 +2105,7 @@ static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
if (kvm_get_mode() != KVM_MODE_NV)
return false;
- if (!has_cpuid_feature(cap, scope)) {
+ if (!cpucap_multi_entry_cap_matches(cap, scope)) {
pr_warn("unavailable: %s\n", cap->desc);
return false;
}
@@ -2176,7 +2119,47 @@ static bool hvhe_possible(const struct arm64_cpu_capabilities *entry,
return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_HVHE);
}
-#ifdef CONFIG_ARM64_PAN
+bool cpu_supports_bbml2_noabort(void)
+{
+ /*
+ * We want to allow usage of BBML2 in as wide a range of kernel contexts
+ * as possible. This list is therefore an allow-list of known-good
+ * implementations that both support BBML2 and additionally, fulfill the
+ * extra constraint of never generating TLB conflict aborts when using
+ * the relaxed BBML2 semantics (such aborts make use of BBML2 in certain
+ * kernel contexts difficult to prove safe against recursive aborts).
+ *
+ * Note that implementations can only be considered "known-good" if their
+ * implementors attest to the fact that the implementation never raises
+ * TLB conflict aborts for BBML2 mapping granularity changes.
+ */
+ static const struct midr_range supports_bbml2_noabort_list[] = {
+ MIDR_REV_RANGE(MIDR_CORTEX_X4, 0, 3, 0xf),
+ MIDR_REV_RANGE(MIDR_NEOVERSE_V3, 0, 2, 0xf),
+ MIDR_REV_RANGE(MIDR_NEOVERSE_V3AE, 0, 2, 0xf),
+ MIDR_ALL_VERSIONS(MIDR_NVIDIA_OLYMPUS),
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {}
+ };
+
+ /* Does our cpu guarantee to never raise TLB conflict aborts? */
+ if (!is_midr_in_range_list(supports_bbml2_noabort_list))
+ return false;
+
+ /*
+ * We currently ignore the ID_AA64MMFR2_EL1 register, and only care
+ * about whether the MIDR check passes.
+ */
+
+ return true;
+}
+
+static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int scope)
+{
+ return cpu_supports_bbml2_noabort();
+}
+
static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
{
/*
@@ -2188,7 +2171,6 @@ static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0);
set_pstate_pan(1);
}
-#endif /* CONFIG_ARM64_PAN */
#ifdef CONFIG_ARM64_RAS_EXTN
static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
@@ -2196,6 +2178,24 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
/* Firmware may have left a deferred SError in this register. */
write_sysreg_s(0, SYS_DISR_EL1);
}
+static bool has_rasv1p1(const struct arm64_cpu_capabilities *__unused, int scope)
+{
+ const struct arm64_cpu_capabilities rasv1p1_caps[] = {
+ {
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, V1P1)
+ },
+ {
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP)
+ },
+ {
+ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, RAS_frac, RASv1p1)
+ },
+ };
+
+ return (has_cpuid_feature(&rasv1p1_caps[0], scope) ||
+ (has_cpuid_feature(&rasv1p1_caps[1], scope) &&
+ has_cpuid_feature(&rasv1p1_caps[2], scope)));
+}
#endif /* CONFIG_ARM64_RAS_EXTN */
#ifdef CONFIG_ARM64_PTR_AUTH
@@ -2250,20 +2250,30 @@ static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap)
{
if (this_cpu_has_cap(ARM64_HAS_E0PD))
- sysreg_clear_set(tcr_el1, 0, TCR_E0PD1);
+ sysreg_clear_set(tcr_el1, 0, TCR_EL1_E0PD1);
}
#endif /* CONFIG_ARM64_E0PD */
+static void cpu_enable_ls64(struct arm64_cpu_capabilities const *cap)
+{
+ sysreg_clear_set(sctlr_el1, SCTLR_EL1_EnALS, SCTLR_EL1_EnALS);
+}
+
+static void cpu_enable_ls64_v(struct arm64_cpu_capabilities const *cap)
+{
+ sysreg_clear_set(sctlr_el1, SCTLR_EL1_EnASR, 0);
+}
+
#ifdef CONFIG_ARM64_PSEUDO_NMI
static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry,
int scope)
{
/*
- * ARM64_HAS_GIC_CPUIF_SYSREGS has a lower index, and is a boot CPU
+ * ARM64_HAS_GICV3_CPUIF has a lower index, and is a boot CPU
* feature, so will be detected earlier.
*/
- BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GIC_CPUIF_SYSREGS);
- if (!cpus_have_cap(ARM64_HAS_GIC_CPUIF_SYSREGS))
+ BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GICV3_CPUIF);
+ if (!cpus_have_cap(ARM64_HAS_GICV3_CPUIF))
return false;
return enable_pseudo_nmi;
@@ -2298,6 +2308,58 @@ static bool has_gic_prio_relaxed_sync(const struct arm64_cpu_capabilities *entry
}
#endif
+static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ static const struct midr_range has_vgic_v3[] = {
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
+ {},
+ };
+ struct arm_smccc_res res = {};
+
+ BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV3_CPUIF);
+ BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV5_LEGACY);
+ if (!is_hyp_mode_available())
+ return false;
+
+ if (this_cpu_has_cap(ARM64_HAS_GICV5_LEGACY))
+ return true;
+
+ if (!this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF) &&
+ !is_midr_in_range_list(has_vgic_v3))
+ return false;
+
+ /*
+ * pKVM prevents late onlining of CPUs. This means that whatever
+ * state the capability is in after deprivilege cannot be affected
+ * by a new CPU booting -- this is garanteed to be a CPU we have
+ * already seen, and the cap is therefore unchanged.
+ */
+ if (system_capabilities_finalized() && is_protected_kvm_enabled())
+ return cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR);
+
+ if (is_kernel_in_hyp_mode())
+ res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2);
+ else
+ arm_smccc_1_1_hvc(HVC_GET_ICH_VTR_EL2, &res);
+
+ if (res.a0 == HVC_STUB_ERR)
+ return false;
+
+ return res.a1 & ICH_VTR_EL2_TDS;
+}
+
#ifdef CONFIG_ARM64_BTI
static void bti_enable(const struct arm64_cpu_capabilities *__unused)
{
@@ -2316,17 +2378,21 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused)
#ifdef CONFIG_ARM64_MTE
static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
{
+ static bool cleared_zero_page = false;
+
sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
mte_cpu_setup();
/*
* Clear the tags in the zero page. This needs to be done via the
- * linear map which has the Tagged attribute.
+ * linear map which has the Tagged attribute. Since this page is
+ * always mapped as pte_special(), set_pte_at() will not attempt to
+ * clear the tags or set PG_mte_tagged.
*/
- if (try_page_mte_tagging(ZERO_PAGE(0))) {
+ if (!cleared_zero_page) {
+ cleared_zero_page = true;
mte_clear_page_tags(lm_alias(empty_zero_page));
- set_page_mte_tagged(ZERO_PAGE(0));
}
kasan_init_hw_tags_cpu();
@@ -2430,13 +2496,19 @@ test_has_mpam(const struct arm64_cpu_capabilities *entry, int scope)
static void
cpu_enable_mpam(const struct arm64_cpu_capabilities *entry)
{
- /*
- * Access by the kernel (at EL1) should use the reserved PARTID
- * which is configured unrestricted. This avoids priority-inversion
- * where latency sensitive tasks have to wait for a task that has
- * been throttled to release the lock.
- */
- write_sysreg_s(0, SYS_MPAM1_EL1);
+ int cpu = smp_processor_id();
+ u64 regval = 0;
+
+ if (IS_ENABLED(CONFIG_ARM64_MPAM) && static_branch_likely(&mpam_enabled))
+ regval = READ_ONCE(per_cpu(arm64_mpam_current, cpu));
+
+ write_sysreg_s(regval | MPAM1_EL1_MPAMEN, SYS_MPAM1_EL1);
+ if (cpus_have_cap(ARM64_SME))
+ write_sysreg_s(regval & (MPAMSM_EL1_PARTID_D | MPAMSM_EL1_PMG_D), SYS_MPAMSM_EL1);
+ isb();
+
+ /* Synchronising the EL0 write is left until the ERET to EL0 */
+ write_sysreg_s(regval, SYS_MPAM0_EL1);
}
static bool
@@ -2447,6 +2519,15 @@ test_has_mpam_hcr(const struct arm64_cpu_capabilities *entry, int scope)
return idr & MPAMIDR_EL1_HAS_HCR;
}
+static bool
+test_has_gicv5_legacy(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ if (!this_cpu_has_cap(ARM64_HAS_GICV5_CPUIF))
+ return false;
+
+ return !!(read_sysreg_s(SYS_ICC_IDR0_EL1) & ICC_IDR0_EL1_GCIE_LEGACY);
+}
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.capability = ARM64_ALWAYS_BOOT,
@@ -2459,8 +2540,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_always,
},
{
- .desc = "GIC system register CPU interface",
- .capability = ARM64_HAS_GIC_CPUIF_SYSREGS,
+ .desc = "GICv3 CPU interface",
+ .capability = ARM64_HAS_GICV3_CPUIF,
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = has_useable_gicv3_cpuif,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, GIC, IMP)
@@ -2479,7 +2560,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, CNTPOFF)
},
-#ifdef CONFIG_ARM64_PAN
{
.desc = "Privileged Access Never",
.capability = ARM64_HAS_PAN,
@@ -2488,7 +2568,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_enable_pan,
ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, PAN, IMP)
},
-#endif /* CONFIG_ARM64_PAN */
#ifdef CONFIG_ARM64_EPAN
{
.desc = "Enhanced Privileged Access Never",
@@ -2498,7 +2577,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, PAN, PAN3)
},
#endif /* CONFIG_ARM64_EPAN */
-#ifdef CONFIG_ARM64_LSE_ATOMICS
{
.desc = "LSE atomic instructions",
.capability = ARM64_HAS_LSE_ATOMICS,
@@ -2506,7 +2584,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP)
},
-#endif /* CONFIG_ARM64_LSE_ATOMICS */
{
.desc = "Virtualization Host Extensions",
.capability = ARM64_HAS_VIRT_HOST_EXTN,
@@ -2519,7 +2596,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_NESTED_VIRT,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_nested_virt_support,
- ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, NV2)
+ .match_list = (const struct arm64_cpu_capabilities []){
+ {
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, NV2)
+ },
+ {
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY)
+ },
+ { /* Sentinel */ }
+ },
},
{
.capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
@@ -2604,6 +2691,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_clear_disr,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP)
},
+ {
+ .desc = "RASv1p1 Extension Support",
+ .capability = ARM64_HAS_RASV1P1_EXTN,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_rasv1p1,
+ },
#endif /* CONFIG_ARM64_RAS_EXTN */
#ifdef CONFIG_ARM64_AMU_EXTN
{
@@ -2780,6 +2873,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_gic_prio_relaxed_sync,
},
#endif
+ {
+ /*
+ * Depends on having GICv3
+ */
+ .desc = "ICV_DIR_EL1 trapping",
+ .capability = ARM64_HAS_ICH_HCR_EL2_TDIR,
+ .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
+ .matches = can_trap_icv_dir_el1,
+ },
#ifdef CONFIG_ARM64_E0PD
{
.desc = "E0PD",
@@ -2827,6 +2929,20 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, MTE, MTE3)
},
+ {
+ .desc = "FAR on MTE Tag Check Fault",
+ .capability = ARM64_MTE_FAR,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, MTEFAR, IMP)
+ },
+ {
+ .desc = "Store Only MTE Tag Check",
+ .capability = ARM64_MTE_STORE_ONLY,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, MTESTOREONLY, IMP)
+ },
#endif /* CONFIG_ARM64_MTE */
{
.desc = "RCpc load-acquire (LDAPR)",
@@ -2842,6 +2958,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, IMP)
},
+ {
+ .desc = "Fine Grained Traps 2",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAS_FGT2,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, FGT2)
+ },
#ifdef CONFIG_ARM64_SME
{
.desc = "Scalable Matrix Extension",
@@ -2927,6 +3050,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP)
},
{
+ .desc = "BBM Level 2 without TLB conflict abort",
+ .capability = ARM64_HAS_BBML2_NOABORT,
+ .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
+ .matches = has_bbml2_noabort,
+ },
+ {
.desc = "52-bit Virtual Addressing for KVM (LPA2)",
.capability = ARM64_HAS_LPA2,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
@@ -2999,6 +3128,66 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, GCS, IMP)
},
#endif
+#ifdef CONFIG_HW_PERF_EVENTS
+ {
+ .desc = "PMUv3",
+ .capability = ARM64_HAS_PMUV3,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_pmuv3,
+ },
+#endif
+ {
+ .desc = "SCTLR2",
+ .capability = ARM64_HAS_SCTLR2,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, SCTLRX, IMP)
+ },
+ {
+ .desc = "GICv5 CPU interface",
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+ .capability = ARM64_HAS_GICV5_CPUIF,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, GCIE, IMP)
+ },
+ {
+ .desc = "GICv5 Legacy vCPU interface",
+ .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
+ .capability = ARM64_HAS_GICV5_LEGACY,
+ .matches = test_has_gicv5_legacy,
+ },
+ {
+ .desc = "XNX",
+ .capability = ARM64_HAS_XNX,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, XNX, IMP)
+ },
+ {
+ .desc = "LS64",
+ .capability = ARM64_HAS_LS64,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_ls64,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LS64, LS64)
+ },
+ {
+ .desc = "LS64_V",
+ .capability = ARM64_HAS_LS64_V,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_ls64_v,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LS64, LS64_V)
+ },
+#ifdef CONFIG_ARM64_LSUI
+ {
+ .desc = "Unprivileged Load Store Instructions (LSUI)",
+ .capability = ARM64_HAS_LSUI,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR3_EL1, LSUI, IMP)
+ },
+#endif
{},
};
@@ -3073,6 +3262,13 @@ static bool has_sve_feature(const struct arm64_cpu_capabilities *cap, int scope)
}
#endif
+#ifdef CONFIG_ARM64_SME
+static bool has_sme_feature(const struct arm64_cpu_capabilities *cap, int scope)
+{
+ return system_supports_sme() && has_user_cpuid_feature(cap, scope);
+}
+#endif
+
static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL),
HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES),
@@ -3111,8 +3307,10 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_EBF16),
HWCAP_CAP(ID_AA64ISAR1_EL1, DGH, IMP, CAP_HWCAP, KERNEL_HWCAP_DGH),
HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, LS64, LS64, CAP_HWCAP, KERNEL_HWCAP_LS64),
HWCAP_CAP(ID_AA64ISAR2_EL1, LUT, IMP, CAP_HWCAP, KERNEL_HWCAP_LUT),
HWCAP_CAP(ID_AA64ISAR3_EL1, FAMINMAX, IMP, CAP_HWCAP, KERNEL_HWCAP_FAMINMAX),
+ HWCAP_CAP(ID_AA64ISAR3_EL1, LSFE, IMP, CAP_HWCAP, KERNEL_HWCAP_LSFE),
HWCAP_CAP(ID_AA64MMFR2_EL1, AT, IMP, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
HWCAP_CAP(ID_AA64PFR0_EL1, SVE, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE),
@@ -3149,6 +3347,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
#ifdef CONFIG_ARM64_MTE
HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE2, CAP_HWCAP, KERNEL_HWCAP_MTE),
HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE3, CAP_HWCAP, KERNEL_HWCAP_MTE3),
+ HWCAP_CAP(ID_AA64PFR2_EL1, MTEFAR, IMP, CAP_HWCAP, KERNEL_HWCAP_MTE_FAR),
+ HWCAP_CAP(ID_AA64PFR2_EL1, MTESTOREONLY, IMP, CAP_HWCAP , KERNEL_HWCAP_MTE_STORE_ONLY),
#endif /* CONFIG_ARM64_MTE */
HWCAP_CAP(ID_AA64MMFR0_EL1, ECV, IMP, CAP_HWCAP, KERNEL_HWCAP_ECV),
HWCAP_CAP(ID_AA64MMFR1_EL1, AFP, IMP, CAP_HWCAP, KERNEL_HWCAP_AFP),
@@ -3161,31 +3361,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC),
#ifdef CONFIG_ARM64_SME
HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
- HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM),
- HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA),
- HWCAP_CAP(ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4),
#endif /* CONFIG_ARM64_SME */
HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA),
@@ -3308,18 +3508,49 @@ static void update_cpu_capabilities(u16 scope_mask)
scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
for (i = 0; i < ARM64_NCAPS; i++) {
+ bool match_all = false;
+ bool caps_set = false;
+ bool boot_cpu = false;
+
caps = cpucap_ptrs[i];
- if (!caps || !(caps->type & scope_mask) ||
- cpus_have_cap(caps->capability) ||
- !caps->matches(caps, cpucap_default_scope(caps)))
+ if (!caps || !(caps->type & scope_mask))
continue;
- if (caps->desc && !caps->cpus)
+ match_all = cpucap_match_all_early_cpus(caps);
+ caps_set = cpus_have_cap(caps->capability);
+ boot_cpu = scope_mask & SCOPE_BOOT_CPU;
+
+ /*
+ * Unless it's a match-all CPUs feature, avoid probing if
+ * already detected.
+ */
+ if (!match_all && caps_set)
+ continue;
+
+ /*
+ * A match-all CPUs capability is only set when probing the
+ * boot CPU. It may be cleared subsequently if not detected on
+ * secondary ones.
+ */
+ if (match_all && !caps_set && !boot_cpu)
+ continue;
+
+ if (!caps->matches(caps, cpucap_default_scope(caps))) {
+ if (match_all)
+ __clear_bit(caps->capability, system_cpucaps);
+ continue;
+ }
+
+ /*
+ * Match-all CPUs capabilities are logged later when the
+ * system capabilities are finalised.
+ */
+ if (!match_all && caps->desc && !caps->cpus)
pr_info("detected: %s\n", caps->desc);
__set_bit(caps->capability, system_cpucaps);
- if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU))
+ if (boot_cpu && (caps->type & SCOPE_BOOT_CPU))
set_bit(caps->capability, boot_cpucaps);
}
}
@@ -3680,6 +3911,7 @@ unsigned long cpu_get_elf_hwcap3(void)
static void __init setup_boot_cpu_capabilities(void)
{
+ kvm_arm_target_impl_cpu_init();
/*
* The boot CPU's feature register values have been recorded. Detect
* boot cpucaps and local cpucaps for the boot CPU, then enable and
@@ -3719,17 +3951,24 @@ static void __init setup_system_capabilities(void)
enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
apply_alternatives_all();
- /*
- * Log any cpucaps with a cpumask as these aren't logged by
- * update_cpu_capabilities().
- */
for (int i = 0; i < ARM64_NCAPS; i++) {
const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i];
- if (caps && caps->cpus && caps->desc &&
- cpumask_any(caps->cpus) < nr_cpu_ids)
+ if (!caps || !caps->desc)
+ continue;
+
+ /*
+ * Log any cpucaps with a cpumask as these aren't logged by
+ * update_cpu_capabilities().
+ */
+ if (caps->cpus && cpumask_any(caps->cpus) < nr_cpu_ids)
pr_info("detected: %s on CPU%*pbl\n",
caps->desc, cpumask_pr_args(caps->cpus));
+
+ /* Log match-all CPUs capabilities */
+ if (cpucap_match_all_early_cpus(caps) &&
+ cpus_have_cap(caps->capability))
+ pr_info("detected: %s\n", caps->desc);
}
/*
@@ -3737,12 +3976,18 @@ static void __init setup_system_capabilities(void)
*/
if (system_uses_ttbr0_pan())
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
+
+ /*
+ * Report Spectre mitigations status.
+ */
+ spectre_print_disabled_mitigations();
}
void __init setup_system_features(void)
{
setup_system_capabilities();
+ linear_map_maybe_split_to_ptes();
kpti_install_ng_mappings();
sve_setup();
@@ -3783,8 +4028,8 @@ static int enable_mismatched_32bit_el0(unsigned int cpu)
bool cpu_32bit = false;
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
- if (!housekeeping_cpu(cpu, HK_TYPE_TICK))
- pr_info("Treating adaptive-ticks CPU %u as 64-bit only\n", cpu);
+ if (!housekeeping_cpu(cpu, HK_TYPE_DOMAIN))
+ pr_info("Treating domain isolated CPU %u as 64-bit only\n", cpu);
else
cpu_32bit = true;
}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 285d7d538342..6149bc91251d 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -81,6 +81,7 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_PACA] = "paca",
[KERNEL_HWCAP_PACG] = "pacg",
[KERNEL_HWCAP_GCS] = "gcs",
+ [KERNEL_HWCAP_LS64] = "ls64",
[KERNEL_HWCAP_DCPODP] = "dcpodp",
[KERNEL_HWCAP_SVE2] = "sve2",
[KERNEL_HWCAP_SVEAES] = "sveaes",
@@ -160,6 +161,9 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SME_SFEXPA] = "smesfexpa",
[KERNEL_HWCAP_SME_STMOP] = "smestmop",
[KERNEL_HWCAP_SME_SMOP4] = "smesmop4",
+ [KERNEL_HWCAP_MTE_FAR] = "mtefar",
+ [KERNEL_HWCAP_MTE_STORE_ONLY] = "mtestoreonly",
+ [KERNEL_HWCAP_LSFE] = "lsfe",
};
#ifdef CONFIG_COMPAT
@@ -209,80 +213,79 @@ static const char *const compat_hwcap2_str[] = {
static int c_show(struct seq_file *m, void *v)
{
- int i, j;
+ int j;
+ int cpu = m->index;
bool compat = personality(current->personality) == PER_LINUX32;
+ struct cpuinfo_arm64 *cpuinfo = v;
+ u32 midr = cpuinfo->reg_midr;
- for_each_online_cpu(i) {
- struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
- u32 midr = cpuinfo->reg_midr;
-
- /*
- * glibc reads /proc/cpuinfo to determine the number of
- * online processors, looking for lines beginning with
- * "processor". Give glibc what it expects.
- */
- seq_printf(m, "processor\t: %d\n", i);
- if (compat)
- seq_printf(m, "model name\t: ARMv8 Processor rev %d (%s)\n",
- MIDR_REVISION(midr), COMPAT_ELF_PLATFORM);
+ /*
+ * glibc reads /proc/cpuinfo to determine the number of
+ * online processors, looking for lines beginning with
+ * "processor". Give glibc what it expects.
+ */
+ seq_printf(m, "processor\t: %d\n", cpu);
+ if (compat)
+ seq_printf(m, "model name\t: ARMv8 Processor rev %d (%s)\n",
+ MIDR_REVISION(midr), COMPAT_ELF_PLATFORM);
- seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
- loops_per_jiffy / (500000UL/HZ),
- loops_per_jiffy / (5000UL/HZ) % 100);
+ seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
+ loops_per_jiffy / (500000UL/HZ),
+ loops_per_jiffy / (5000UL/HZ) % 100);
- /*
- * Dump out the common processor features in a single line.
- * Userspace should read the hwcaps with getauxval(AT_HWCAP)
- * rather than attempting to parse this, but there's a body of
- * software which does already (at least for 32-bit).
- */
- seq_puts(m, "Features\t:");
- if (compat) {
+ /*
+ * Dump out the common processor features in a single line.
+ * Userspace should read the hwcaps with getauxval(AT_HWCAP)
+ * rather than attempting to parse this, but there's a body of
+ * software which does already (at least for 32-bit).
+ */
+ seq_puts(m, "Features\t:");
+ if (compat) {
#ifdef CONFIG_COMPAT
- for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) {
- if (compat_elf_hwcap & (1 << j)) {
- /*
- * Warn once if any feature should not
- * have been present on arm64 platform.
- */
- if (WARN_ON_ONCE(!compat_hwcap_str[j]))
- continue;
-
- seq_printf(m, " %s", compat_hwcap_str[j]);
- }
+ for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) {
+ if (compat_elf_hwcap & (1 << j)) {
+ /*
+ * Warn once if any feature should not
+ * have been present on arm64 platform.
+ */
+ if (WARN_ON_ONCE(!compat_hwcap_str[j]))
+ continue;
+
+ seq_printf(m, " %s", compat_hwcap_str[j]);
}
+ }
- for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++)
- if (compat_elf_hwcap2 & (1 << j))
- seq_printf(m, " %s", compat_hwcap2_str[j]);
+ for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++)
+ if (compat_elf_hwcap2 & (1 << j))
+ seq_printf(m, " %s", compat_hwcap2_str[j]);
#endif /* CONFIG_COMPAT */
- } else {
- for (j = 0; j < ARRAY_SIZE(hwcap_str); j++)
- if (cpu_have_feature(j))
- seq_printf(m, " %s", hwcap_str[j]);
- }
- seq_puts(m, "\n");
-
- seq_printf(m, "CPU implementer\t: 0x%02x\n",
- MIDR_IMPLEMENTOR(midr));
- seq_printf(m, "CPU architecture: 8\n");
- seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
- seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
- seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
+ } else {
+ for (j = 0; j < ARRAY_SIZE(hwcap_str); j++)
+ if (cpu_have_feature(j))
+ seq_printf(m, " %s", hwcap_str[j]);
}
+ seq_puts(m, "\n");
+
+ seq_printf(m, "CPU implementer\t: 0x%02x\n",
+ MIDR_IMPLEMENTOR(midr));
+ seq_puts(m, "CPU architecture: 8\n");
+ seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
+ seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
+ seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
return 0;
}
static void *c_start(struct seq_file *m, loff_t *pos)
{
- return *pos < 1 ? (void *)1 : NULL;
+ *pos = cpumask_next(*pos - 1, cpu_online_mask);
+ return *pos < nr_cpu_ids ? &per_cpu(cpu_data, *pos) : NULL;
}
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
++*pos;
- return NULL;
+ return c_start(m, pos);
}
static void c_stop(struct seq_file *m, void *v)
@@ -328,11 +331,13 @@ static const struct kobj_type cpuregs_kobj_type = {
CPUREGS_ATTR_RO(midr_el1, midr);
CPUREGS_ATTR_RO(revidr_el1, revidr);
+CPUREGS_ATTR_RO(aidr_el1, aidr);
CPUREGS_ATTR_RO(smidr_el1, smidr);
static struct attribute *cpuregs_id_attrs[] = {
&cpuregs_attr_midr_el1.attr,
&cpuregs_attr_revidr_el1.attr,
+ &cpuregs_attr_aidr_el1.attr,
NULL
};
@@ -469,6 +474,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_dczid = read_cpuid(DCZID_EL0);
info->reg_midr = read_cpuid_id();
info->reg_revidr = read_cpuid(REVIDR_EL1);
+ info->reg_aidr = read_cpuid(AIDR_EL1);
info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1);
info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
@@ -494,8 +500,11 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
__cpuinfo_store_cpu_32bit(&info->aarch32);
- if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0))
- info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
+ /*
+ * info->reg_mpamidr deferred to {init,update}_cpu_features because we
+ * don't want to read it (and trigger a trap on buggy firmware) if
+ * using an aa64pfr0_el1 override to unconditionally disable MPAM.
+ */
if (IS_ENABLED(CONFIG_ARM64_SME) &&
id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 58f047de3e1c..29307642f4c9 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -21,8 +21,12 @@
#include <asm/cputype.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
+#include <asm/exception.h>
+#include <asm/kgdb.h>
+#include <asm/kprobes.h>
#include <asm/system_misc.h>
#include <asm/traps.h>
+#include <asm/uprobes.h>
/* Determine debug architecture. */
u8 debug_monitors_arch(void)
@@ -34,7 +38,7 @@ u8 debug_monitors_arch(void)
/*
* MDSCR access routines.
*/
-static void mdscr_write(u32 mdscr)
+static void mdscr_write(u64 mdscr)
{
unsigned long flags;
flags = local_daif_save();
@@ -43,7 +47,7 @@ static void mdscr_write(u32 mdscr)
}
NOKPROBE_SYMBOL(mdscr_write);
-static u32 mdscr_read(void)
+static u64 mdscr_read(void)
{
return read_sysreg(mdscr_el1);
}
@@ -79,16 +83,16 @@ static DEFINE_PER_CPU(int, kde_ref_count);
void enable_debug_monitors(enum dbg_active_el el)
{
- u32 mdscr, enable = 0;
+ u64 mdscr, enable = 0;
WARN_ON(preemptible());
if (this_cpu_inc_return(mde_ref_count) == 1)
- enable = DBG_MDSCR_MDE;
+ enable = MDSCR_EL1_MDE;
if (el == DBG_ACTIVE_EL1 &&
this_cpu_inc_return(kde_ref_count) == 1)
- enable |= DBG_MDSCR_KDE;
+ enable |= MDSCR_EL1_KDE;
if (enable && debug_enabled) {
mdscr = mdscr_read();
@@ -100,16 +104,16 @@ NOKPROBE_SYMBOL(enable_debug_monitors);
void disable_debug_monitors(enum dbg_active_el el)
{
- u32 mdscr, disable = 0;
+ u64 mdscr, disable = 0;
WARN_ON(preemptible());
if (this_cpu_dec_return(mde_ref_count) == 0)
- disable = ~DBG_MDSCR_MDE;
+ disable = ~MDSCR_EL1_MDE;
if (el == DBG_ACTIVE_EL1 &&
this_cpu_dec_return(kde_ref_count) == 0)
- disable &= ~DBG_MDSCR_KDE;
+ disable &= ~MDSCR_EL1_KDE;
if (disable) {
mdscr = mdscr_read();
@@ -156,187 +160,124 @@ NOKPROBE_SYMBOL(clear_user_regs_spsr_ss);
#define set_regs_spsr_ss(r) set_user_regs_spsr_ss(&(r)->user_regs)
#define clear_regs_spsr_ss(r) clear_user_regs_spsr_ss(&(r)->user_regs)
-static DEFINE_SPINLOCK(debug_hook_lock);
-static LIST_HEAD(user_step_hook);
-static LIST_HEAD(kernel_step_hook);
-
-static void register_debug_hook(struct list_head *node, struct list_head *list)
+static void send_user_sigtrap(int si_code)
{
- spin_lock(&debug_hook_lock);
- list_add_rcu(node, list);
- spin_unlock(&debug_hook_lock);
-
-}
+ struct pt_regs *regs = current_pt_regs();
-static void unregister_debug_hook(struct list_head *node)
-{
- spin_lock(&debug_hook_lock);
- list_del_rcu(node);
- spin_unlock(&debug_hook_lock);
- synchronize_rcu();
-}
+ if (WARN_ON(!user_mode(regs)))
+ return;
-void register_user_step_hook(struct step_hook *hook)
-{
- register_debug_hook(&hook->node, &user_step_hook);
-}
+ if (!regs_irqs_disabled(regs))
+ local_irq_enable();
-void unregister_user_step_hook(struct step_hook *hook)
-{
- unregister_debug_hook(&hook->node);
+ arm64_force_sig_fault(SIGTRAP, si_code, instruction_pointer(regs),
+ "User debug trap");
}
-void register_kernel_step_hook(struct step_hook *hook)
+/*
+ * We have already unmasked interrupts and enabled preemption
+ * when calling do_el0_softstep() from entry-common.c.
+ */
+void do_el0_softstep(unsigned long esr, struct pt_regs *regs)
{
- register_debug_hook(&hook->node, &kernel_step_hook);
-}
+ if (uprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
+ return;
-void unregister_kernel_step_hook(struct step_hook *hook)
-{
- unregister_debug_hook(&hook->node);
+ send_user_sigtrap(TRAP_TRACE);
+ /*
+ * ptrace will disable single step unless explicitly
+ * asked to re-enable it. For other clients, it makes
+ * sense to leave it enabled (i.e. rewind the controls
+ * to the active-not-pending state).
+ */
+ user_rewind_single_step(current);
}
-/*
- * Call registered single step handlers
- * There is no Syndrome info to check for determining the handler.
- * So we call all the registered handlers, until the right handler is
- * found which returns zero.
- */
-static int call_step_hook(struct pt_regs *regs, unsigned long esr)
+void do_el1_softstep(unsigned long esr, struct pt_regs *regs)
{
- struct step_hook *hook;
- struct list_head *list;
- int retval = DBG_HOOK_ERROR;
-
- list = user_mode(regs) ? &user_step_hook : &kernel_step_hook;
+ if (kgdb_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
+ return;
+ pr_warn("Unexpected kernel single-step exception at EL1\n");
/*
- * Since single-step exception disables interrupt, this function is
- * entirely not preemptible, and we can use rcu list safely here.
+ * Re-enable stepping since we know that we will be
+ * returning to regs.
*/
- list_for_each_entry_rcu(hook, list, node) {
- retval = hook->fn(regs, esr);
- if (retval == DBG_HOOK_HANDLED)
- break;
- }
-
- return retval;
+ set_regs_spsr_ss(regs);
}
-NOKPROBE_SYMBOL(call_step_hook);
+NOKPROBE_SYMBOL(do_el1_softstep);
-static void send_user_sigtrap(int si_code)
+static int call_el1_break_hook(struct pt_regs *regs, unsigned long esr)
{
- struct pt_regs *regs = current_pt_regs();
+ if (esr_brk_comment(esr) == BUG_BRK_IMM)
+ return bug_brk_handler(regs, esr);
- if (WARN_ON(!user_mode(regs)))
- return;
+ if (IS_ENABLED(CONFIG_CFI) && esr_is_cfi_brk(esr))
+ return cfi_brk_handler(regs, esr);
- if (interrupts_enabled(regs))
- local_irq_enable();
+ if (esr_brk_comment(esr) == FAULT_BRK_IMM)
+ return reserved_fault_brk_handler(regs, esr);
- arm64_force_sig_fault(SIGTRAP, si_code, instruction_pointer(regs),
- "User debug trap");
-}
+ if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) &&
+ (esr_brk_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
+ return kasan_brk_handler(regs, esr);
-static int single_step_handler(unsigned long unused, unsigned long esr,
- struct pt_regs *regs)
-{
- bool handler_found = false;
+ if (IS_ENABLED(CONFIG_UBSAN_TRAP) && esr_is_ubsan_brk(esr))
+ return ubsan_brk_handler(regs, esr);
- /*
- * If we are stepping a pending breakpoint, call the hw_breakpoint
- * handler first.
- */
- if (!reinstall_suspended_bps(regs))
- return 0;
-
- if (!handler_found && call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
- handler_found = true;
-
- if (!handler_found && user_mode(regs)) {
- send_user_sigtrap(TRAP_TRACE);
-
- /*
- * ptrace will disable single step unless explicitly
- * asked to re-enable it. For other clients, it makes
- * sense to leave it enabled (i.e. rewind the controls
- * to the active-not-pending state).
- */
- user_rewind_single_step(current);
- } else if (!handler_found) {
- pr_warn("Unexpected kernel single-step exception at EL1\n");
- /*
- * Re-enable stepping since we know that we will be
- * returning to regs.
- */
- set_regs_spsr_ss(regs);
+ if (IS_ENABLED(CONFIG_KGDB)) {
+ if (esr_brk_comment(esr) == KGDB_DYN_DBG_BRK_IMM)
+ return kgdb_brk_handler(regs, esr);
+ if (esr_brk_comment(esr) == KGDB_COMPILED_DBG_BRK_IMM)
+ return kgdb_compiled_brk_handler(regs, esr);
}
- return 0;
-}
-NOKPROBE_SYMBOL(single_step_handler);
-
-static LIST_HEAD(user_break_hook);
-static LIST_HEAD(kernel_break_hook);
+ if (IS_ENABLED(CONFIG_KPROBES)) {
+ if (esr_brk_comment(esr) == KPROBES_BRK_IMM)
+ return kprobe_brk_handler(regs, esr);
+ if (esr_brk_comment(esr) == KPROBES_BRK_SS_IMM)
+ return kprobe_ss_brk_handler(regs, esr);
+ }
-void register_user_break_hook(struct break_hook *hook)
-{
- register_debug_hook(&hook->node, &user_break_hook);
-}
+ if (IS_ENABLED(CONFIG_KRETPROBES) &&
+ esr_brk_comment(esr) == KRETPROBES_BRK_IMM)
+ return kretprobe_brk_handler(regs, esr);
-void unregister_user_break_hook(struct break_hook *hook)
-{
- unregister_debug_hook(&hook->node);
+ return DBG_HOOK_ERROR;
}
+NOKPROBE_SYMBOL(call_el1_break_hook);
-void register_kernel_break_hook(struct break_hook *hook)
+/*
+ * We have already unmasked interrupts and enabled preemption
+ * when calling do_el0_brk64() from entry-common.c.
+ */
+void do_el0_brk64(unsigned long esr, struct pt_regs *regs)
{
- register_debug_hook(&hook->node, &kernel_break_hook);
-}
+ if (IS_ENABLED(CONFIG_UPROBES) &&
+ esr_brk_comment(esr) == UPROBES_BRK_IMM &&
+ uprobe_brk_handler(regs, esr) == DBG_HOOK_HANDLED)
+ return;
-void unregister_kernel_break_hook(struct break_hook *hook)
-{
- unregister_debug_hook(&hook->node);
+ send_user_sigtrap(TRAP_BRKPT);
}
-static int call_break_hook(struct pt_regs *regs, unsigned long esr)
+void do_el1_brk64(unsigned long esr, struct pt_regs *regs)
{
- struct break_hook *hook;
- struct list_head *list;
-
- list = user_mode(regs) ? &user_break_hook : &kernel_break_hook;
-
- /*
- * Since brk exception disables interrupt, this function is
- * entirely not preemptible, and we can use rcu list safely here.
- */
- list_for_each_entry_rcu(hook, list, node) {
- if ((esr_brk_comment(esr) & ~hook->mask) == hook->imm)
- return hook->fn(regs, esr);
- }
+ if (call_el1_break_hook(regs, esr) == DBG_HOOK_HANDLED)
+ return;
- return DBG_HOOK_ERROR;
+ die("Oops - BRK", regs, esr);
}
-NOKPROBE_SYMBOL(call_break_hook);
+NOKPROBE_SYMBOL(do_el1_brk64);
-static int brk_handler(unsigned long unused, unsigned long esr,
- struct pt_regs *regs)
+#ifdef CONFIG_COMPAT
+void do_bkpt32(unsigned long esr, struct pt_regs *regs)
{
- if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
- return 0;
-
- if (user_mode(regs)) {
- send_user_sigtrap(TRAP_BRKPT);
- } else {
- pr_warn("Unexpected kernel BRK exception at EL1\n");
- return -EFAULT;
- }
-
- return 0;
+ arm64_notify_die("aarch32 BKPT", regs, SIGTRAP, TRAP_BRKPT, regs->pc, esr);
}
-NOKPROBE_SYMBOL(brk_handler);
+#endif /* CONFIG_COMPAT */
-int aarch32_break_handler(struct pt_regs *regs)
+bool try_handle_aarch32_break(struct pt_regs *regs)
{
u32 arm_instr;
u16 thumb_instr;
@@ -344,7 +285,7 @@ int aarch32_break_handler(struct pt_regs *regs)
void __user *pc = (void __user *)instruction_pointer(regs);
if (!compat_user_mode(regs))
- return -EFAULT;
+ return false;
if (compat_thumb_mode(regs)) {
/* get 16-bit Thumb instruction */
@@ -368,20 +309,12 @@ int aarch32_break_handler(struct pt_regs *regs)
}
if (!bp)
- return -EFAULT;
+ return false;
send_user_sigtrap(TRAP_BRKPT);
- return 0;
-}
-NOKPROBE_SYMBOL(aarch32_break_handler);
-
-void __init debug_traps_init(void)
-{
- hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
- TRAP_TRACE, "single-step handler");
- hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
- TRAP_BRKPT, "BRK handler");
+ return true;
}
+NOKPROBE_SYMBOL(try_handle_aarch32_break);
/* Re-enable single step for syscall restarting. */
void user_rewind_single_step(struct task_struct *task)
@@ -415,7 +348,7 @@ void kernel_enable_single_step(struct pt_regs *regs)
{
WARN_ON(!irqs_disabled());
set_regs_spsr_ss(regs);
- mdscr_write(mdscr_read() | DBG_MDSCR_SS);
+ mdscr_write(mdscr_read() | MDSCR_EL1_SS);
enable_debug_monitors(DBG_ACTIVE_EL1);
}
NOKPROBE_SYMBOL(kernel_enable_single_step);
@@ -423,7 +356,7 @@ NOKPROBE_SYMBOL(kernel_enable_single_step);
void kernel_disable_single_step(void)
{
WARN_ON(!irqs_disabled());
- mdscr_write(mdscr_read() & ~DBG_MDSCR_SS);
+ mdscr_write(mdscr_read() & ~MDSCR_EL1_SS);
disable_debug_monitors(DBG_ACTIVE_EL1);
}
NOKPROBE_SYMBOL(kernel_disable_single_step);
@@ -431,7 +364,7 @@ NOKPROBE_SYMBOL(kernel_disable_single_step);
int kernel_active_single_step(void)
{
WARN_ON(!irqs_disabled());
- return mdscr_read() & DBG_MDSCR_SS;
+ return mdscr_read() & MDSCR_EL1_SS;
}
NOKPROBE_SYMBOL(kernel_active_single_step);
diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S
index 11d7f7de202d..329e8df9215f 100644
--- a/arch/arm64/kernel/efi-header.S
+++ b/arch/arm64/kernel/efi-header.S
@@ -28,7 +28,7 @@
.macro __EFI_PE_HEADER
#ifdef CONFIG_EFI
.set .Lpe_header_offset, . - .L_head
- .long PE_MAGIC
+ .long IMAGE_NT_SIGNATURE
.short IMAGE_FILE_MACHINE_ARM64 // Machine
.short .Lsection_count // NumberOfSections
.long 0 // TimeDateStamp
@@ -40,7 +40,7 @@
IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics
.Loptional_header:
- .short PE_OPT_MAGIC_PE32PLUS // PE32+ format
+ .short IMAGE_NT_OPTIONAL_HDR64_MAGIC // PE32+ format
.byte 0x02 // MajorLinkerVersion
.byte 0x14 // MinorLinkerVersion
.long __initdata_begin - .Lefi_header_end // SizeOfCode
@@ -66,7 +66,7 @@
.long .Lefi_header_end - .L_head // SizeOfHeaders
.long 0 // CheckSum
.short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem
- .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics
+ .short IMAGE_DLLCHARACTERISTICS_NX_COMPAT // DllCharacteristics
.quad 0 // SizeOfStackReserve
.quad 0 // SizeOfStackCommit
.quad 0 // SizeOfHeapReserve
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 1d25d8899dbf..a81cb4aa4738 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -10,11 +10,13 @@
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/kmemleak.h>
+#include <linux/kthread.h>
#include <linux/screen_info.h>
#include <linux/vmalloc.h>
#include <asm/efi.h>
#include <asm/stacktrace.h>
+#include <asm/vmap_stack.h>
static bool region_is_misaligned(const efi_memory_desc_t *md)
{
@@ -29,7 +31,7 @@ static bool region_is_misaligned(const efi_memory_desc_t *md)
* executable, everything else can be mapped with the XN bits
* set. Also take the new (optional) RO/XP bits into account.
*/
-static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
+static __init ptdesc_t create_mapping_protection(efi_memory_desc_t *md)
{
u64 attr = md->attribute;
u32 type = md->type;
@@ -83,7 +85,7 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
- pteval_t prot_val = create_mapping_protection(md);
+ ptdesc_t prot_val = create_mapping_protection(md);
bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE ||
md->type == EFI_RUNTIME_SERVICES_DATA);
@@ -164,20 +166,53 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
return s;
}
-static DEFINE_RAW_SPINLOCK(efi_rt_lock);
-
void arch_efi_call_virt_setup(void)
{
- efi_virtmap_load();
+ efi_runtime_assert_lock_held();
+
+ if (preemptible() && (current->flags & PF_KTHREAD)) {
+ /*
+ * Disable migration to ensure that a preempted EFI runtime
+ * service call will be resumed on the same CPU. This avoids
+ * potential issues with EFI runtime calls that are preempted
+ * while polling for an asynchronous completion of a secure
+ * firmware call, which may not permit the CPU to change.
+ */
+ migrate_disable();
+ kthread_use_mm(&efi_mm);
+ } else {
+ efi_virtmap_load();
+ }
+
+ /*
+ * Enable access to the valid TTBR0_EL1 and invoke the errata
+ * workaround directly since there is no return from exception when
+ * invoking the EFI run-time services.
+ */
+ uaccess_ttbr0_enable();
+ post_ttbr_update_workaround();
+
__efi_fpsimd_begin();
- raw_spin_lock(&efi_rt_lock);
}
void arch_efi_call_virt_teardown(void)
{
- raw_spin_unlock(&efi_rt_lock);
__efi_fpsimd_end();
- efi_virtmap_unload();
+
+ /*
+ * Defer the switch to the current thread's TTBR0_EL1 until
+ * uaccess_enable(). Do so before efi_virtmap_unload() updates the
+ * saved TTBR0 value, so the userland page tables are not activated
+ * inadvertently over the back of an exception.
+ */
+ uaccess_ttbr0_disable();
+
+ if (preemptible() && (current->flags & PF_KTHREAD)) {
+ kthread_unuse_mm(&efi_mm);
+ migrate_enable();
+ } else {
+ efi_virtmap_unload();
+ }
}
asmlinkage u64 *efi_rt_stack_top __ro_after_init;
@@ -214,9 +249,8 @@ static int __init arm64_efi_rt_init(void)
if (!efi_enabled(EFI_RUNTIME_SERVICES))
return 0;
- p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL,
- NUMA_NO_NODE, &&l);
-l: if (!p) {
+ p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE);
+ if (!p) {
pr_warn("Failed to allocate EFI runtime stack\n");
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return -ENOMEM;
diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
index 2e94d20c4ac7..b735f4c2fe5e 100644
--- a/arch/arm64/kernel/elfcore.c
+++ b/arch/arm64/kernel/elfcore.c
@@ -27,9 +27,10 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
int ret = 1;
unsigned long addr;
void *tags = NULL;
+ int locked = 0;
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
- struct page *page = get_dump_page(addr);
+ struct page *page = get_dump_page(addr, &locked);
/*
* get_dump_page() returns NULL when encountering an empty
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index b260ddc4d3e9..f42ce7b5c67f 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -6,8 +6,10 @@
*/
#include <linux/context_tracking.h>
+#include <linux/irq-entry-common.h>
#include <linux/kasan.h>
#include <linux/linkage.h>
+#include <linux/livepatch.h>
#include <linux/lockdep.h>
#include <linux/ptrace.h>
#include <linux/resume_user_mode.h>
@@ -32,67 +34,31 @@
* Handle IRQ/context state management when entering from kernel mode.
* Before this function is called it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
- *
- * This is intended to match the logic in irqentry_enter(), handling the kernel
- * mode transitions only.
*/
-static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
+static noinstr irqentry_state_t arm64_enter_from_kernel_mode(struct pt_regs *regs)
{
- regs->exit_rcu = false;
-
- if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
- lockdep_hardirqs_off(CALLER_ADDR0);
- ct_irq_enter();
- trace_hardirqs_off_finish();
-
- regs->exit_rcu = true;
- return;
- }
-
- lockdep_hardirqs_off(CALLER_ADDR0);
- rcu_irq_enter_check_tick();
- trace_hardirqs_off_finish();
-}
+ irqentry_state_t state;
-static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
-{
- __enter_from_kernel_mode(regs);
+ state = irqentry_enter_from_kernel_mode(regs);
mte_check_tfsr_entry();
mte_disable_tco_entry(current);
+
+ return state;
}
/*
* Handle IRQ/context state management when exiting to kernel mode.
* After this function returns it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
- *
- * This is intended to match the logic in irqentry_exit(), handling the kernel
- * mode transitions only, and with preemption handled elsewhere.
*/
-static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
-{
- lockdep_assert_irqs_disabled();
-
- if (interrupts_enabled(regs)) {
- if (regs->exit_rcu) {
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare();
- ct_irq_exit();
- lockdep_hardirqs_on(CALLER_ADDR0);
- return;
- }
-
- trace_hardirqs_on();
- } else {
- if (regs->exit_rcu)
- ct_irq_exit();
- }
-}
-
-static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
+static void noinstr arm64_exit_to_kernel_mode(struct pt_regs *regs,
+ irqentry_state_t state)
{
+ local_irq_disable();
+ irqentry_exit_to_kernel_mode_preempt(regs, state);
+ local_daif_mask();
mte_check_tfsr_exit();
- __exit_to_kernel_mode(regs);
+ irqentry_exit_to_kernel_mode_after_preempt(regs, state);
}
/*
@@ -100,129 +66,30 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
* Before this function is called it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
*/
-static __always_inline void __enter_from_user_mode(void)
+static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs)
{
- lockdep_hardirqs_off(CALLER_ADDR0);
- CT_WARN_ON(ct_state() != CT_STATE_USER);
- user_exit_irqoff();
- trace_hardirqs_off_finish();
+ enter_from_user_mode(regs);
mte_disable_tco_entry(current);
}
-static __always_inline void enter_from_user_mode(struct pt_regs *regs)
-{
- __enter_from_user_mode();
-}
-
/*
* Handle IRQ/context state management when exiting to user mode.
* After this function returns it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
*/
-static __always_inline void __exit_to_user_mode(void)
-{
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare();
- user_enter_irqoff();
- lockdep_hardirqs_on(CALLER_ADDR0);
-}
-
-static void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
-{
- do {
- local_irq_enable();
-
- if (thread_flags & _TIF_NEED_RESCHED)
- schedule();
-
- if (thread_flags & _TIF_UPROBE)
- uprobe_notify_resume(regs);
-
- if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
- clear_thread_flag(TIF_MTE_ASYNC_FAULT);
- send_sig_fault(SIGSEGV, SEGV_MTEAERR,
- (void __user *)NULL, current);
- }
- if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
- do_signal(regs);
-
- if (thread_flags & _TIF_NOTIFY_RESUME)
- resume_user_mode_work(regs);
-
- if (thread_flags & _TIF_FOREIGN_FPSTATE)
- fpsimd_restore_current_state();
-
- local_irq_disable();
- thread_flags = read_thread_flags();
- } while (thread_flags & _TIF_WORK_MASK);
-}
-
-static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
+static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs)
{
- unsigned long flags;
-
local_irq_disable();
-
- flags = read_thread_flags();
- if (unlikely(flags & _TIF_WORK_MASK))
- do_notify_resume(regs, flags);
-
+ exit_to_user_mode_prepare_legacy(regs);
local_daif_mask();
-
- lockdep_sys_exit();
-}
-
-static __always_inline void exit_to_user_mode(struct pt_regs *regs)
-{
- exit_to_user_mode_prepare(regs);
mte_check_tfsr_exit();
- __exit_to_user_mode();
+ exit_to_user_mode();
}
asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs)
{
- exit_to_user_mode(regs);
-}
-
-/*
- * Handle IRQ/context state management when entering an NMI from user/kernel
- * mode. Before this function is called it is not safe to call regular kernel
- * code, instrumentable code, or any code which may trigger an exception.
- */
-static void noinstr arm64_enter_nmi(struct pt_regs *regs)
-{
- regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
-
- __nmi_enter();
- lockdep_hardirqs_off(CALLER_ADDR0);
- lockdep_hardirq_enter();
- ct_nmi_enter();
-
- trace_hardirqs_off_finish();
- ftrace_nmi_enter();
-}
-
-/*
- * Handle IRQ/context state management when exiting an NMI from user/kernel
- * mode. After this function returns it is not safe to call regular kernel
- * code, instrumentable code, or any code which may trigger an exception.
- */
-static void noinstr arm64_exit_nmi(struct pt_regs *regs)
-{
- bool restore = regs->lockdep_hardirqs;
-
- ftrace_nmi_exit();
- if (restore) {
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare();
- }
-
- ct_nmi_exit();
- lockdep_hardirq_exit();
- if (restore)
- lockdep_hardirqs_on(CALLER_ADDR0);
- __nmi_exit();
+ arm64_exit_to_user_mode(regs);
}
/*
@@ -230,14 +97,18 @@ static void noinstr arm64_exit_nmi(struct pt_regs *regs)
* kernel mode. Before this function is called it is not safe to call regular
* kernel code, instrumentable code, or any code which may trigger an exception.
*/
-static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
+static noinstr irqentry_state_t arm64_enter_el1_dbg(struct pt_regs *regs)
{
- regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
+ irqentry_state_t state;
+
+ state.lockdep = lockdep_hardirqs_enabled();
lockdep_hardirqs_off(CALLER_ADDR0);
ct_nmi_enter();
trace_hardirqs_off_finish();
+
+ return state;
}
/*
@@ -245,62 +116,19 @@ static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
* kernel mode. After this function returns it is not safe to call regular
* kernel code, instrumentable code, or any code which may trigger an exception.
*/
-static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
+static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs,
+ irqentry_state_t state)
{
- bool restore = regs->lockdep_hardirqs;
-
- if (restore) {
+ if (state.lockdep) {
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
}
ct_nmi_exit();
- if (restore)
+ if (state.lockdep)
lockdep_hardirqs_on(CALLER_ADDR0);
}
-#ifdef CONFIG_PREEMPT_DYNAMIC
-DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
-#define need_irq_preemption() \
- (static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
-#else
-#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION))
-#endif
-
-static void __sched arm64_preempt_schedule_irq(void)
-{
- if (!need_irq_preemption())
- return;
-
- /*
- * Note: thread_info::preempt_count includes both thread_info::count
- * and thread_info::need_resched, and is not equivalent to
- * preempt_count().
- */
- if (READ_ONCE(current_thread_info()->preempt_count) != 0)
- return;
-
- /*
- * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
- * priority masking is used the GIC irqchip driver will clear DAIF.IF
- * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
- * DAIF we must have handled an NMI, so skip preemption.
- */
- if (system_uses_irq_prio_masking() && read_sysreg(daif))
- return;
-
- /*
- * Preempting a task from an IRQ means we leave copies of PSTATE
- * on the stack. cpufeature's enable calls may modify PSTATE, but
- * resuming one of these preempted tasks would undo those changes.
- *
- * Only allow a task to be preempted once cpufeatures have been
- * enabled.
- */
- if (system_capabilities_finalized())
- preempt_schedule_irq();
-}
-
static void do_interrupt_handler(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
@@ -320,7 +148,7 @@ extern void (*handle_arch_fiq)(struct pt_regs *);
static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
unsigned long esr)
{
- arm64_enter_nmi(regs);
+ irqentry_nmi_enter(regs);
console_verbose();
@@ -344,7 +172,7 @@ static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
static void cortex_a76_erratum_1463225_svc_handler(void)
{
- u32 reg, val;
+ u64 reg, val;
if (!unlikely(test_thread_flag(TIF_SINGLESTEP)))
return;
@@ -354,7 +182,7 @@ static void cortex_a76_erratum_1463225_svc_handler(void)
__this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1);
reg = read_sysreg(mdscr_el1);
- val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE;
+ val = reg | MDSCR_EL1_SS | MDSCR_EL1_KDE;
write_sysreg(val, mdscr_el1);
asm volatile("msr daifclr, #8");
isb();
@@ -393,20 +221,16 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
* As per the ABI exit SME streaming mode and clear the SVE state not
* shared with FPSIMD on syscall entry.
*/
-static inline void fp_user_discard(void)
+static inline void fpsimd_syscall_enter(void)
{
- /*
- * If SME is active then exit streaming mode. If ZA is active
- * then flush the SVE registers but leave userspace access to
- * both SVE and SME enabled, otherwise disable SME for the
- * task and fall through to disabling SVE too. This means
- * that after a syscall we never have any streaming mode
- * register state to track, if this changes the KVM code will
- * need updating.
- */
+ /* Ensure PSTATE.SM is clear, but leave PSTATE.ZA as-is. */
if (system_supports_sme())
sme_smstop_sm();
+ /*
+ * The CPU is not in streaming mode. If non-streaming SVE is not
+ * supported, there is no SVE state that needs to be discarded.
+ */
if (!system_supports_sve())
return;
@@ -416,7 +240,56 @@ static inline void fp_user_discard(void)
sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1;
sve_flush_live(true, sve_vq_minus_one);
}
+
+ /*
+ * Any live non-FPSIMD SVE state has been zeroed. Allow
+ * fpsimd_save_user_state() to lazily discard SVE state until either
+ * the live state is unbound or fpsimd_syscall_exit() is called.
+ */
+ __this_cpu_write(fpsimd_last_state.to_save, FP_STATE_FPSIMD);
+}
+
+static __always_inline void fpsimd_syscall_exit(void)
+{
+ if (!system_supports_sve())
+ return;
+
+ /*
+ * The current task's user FPSIMD/SVE/SME state is now bound to this
+ * CPU. The fpsimd_last_state.to_save value is either:
+ *
+ * - FP_STATE_FPSIMD, if the state has not been reloaded on this CPU
+ * since fpsimd_syscall_enter().
+ *
+ * - FP_STATE_CURRENT, if the state has been reloaded on this CPU at
+ * any point.
+ *
+ * Reset this to FP_STATE_CURRENT to stop lazy discarding.
+ */
+ __this_cpu_write(fpsimd_last_state.to_save, FP_STATE_CURRENT);
+}
+
+/*
+ * In debug exception context, we explicitly disable preemption despite
+ * having interrupts disabled.
+ * This serves two purposes: it makes it much less likely that we would
+ * accidentally schedule in exception context and it will force a warning
+ * if we somehow manage to schedule by accident.
+ */
+static void debug_exception_enter(struct pt_regs *regs)
+{
+ preempt_disable();
+
+ /* This code is a bit fragile. Test it. */
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work");
}
+NOKPROBE_SYMBOL(debug_exception_enter);
+
+static void debug_exception_exit(struct pt_regs *regs)
+{
+ preempt_enable_no_resched();
+}
+NOKPROBE_SYMBOL(debug_exception_exit);
UNHANDLED(el1t, 64, sync)
UNHANDLED(el1t, 64, irq)
@@ -426,78 +299,128 @@ UNHANDLED(el1t, 64, error)
static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
+ irqentry_state_t state;
- enter_from_kernel_mode(regs);
+ state = arm64_enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_mem_abort(far, esr, regs);
- local_daif_mask();
- exit_to_kernel_mode(regs);
+ arm64_exit_to_kernel_mode(regs, state);
}
static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
+ irqentry_state_t state;
- enter_from_kernel_mode(regs);
+ state = arm64_enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_sp_pc_abort(far, esr, regs);
- local_daif_mask();
- exit_to_kernel_mode(regs);
+ arm64_exit_to_kernel_mode(regs, state);
}
static void noinstr el1_undef(struct pt_regs *regs, unsigned long esr)
{
- enter_from_kernel_mode(regs);
+ irqentry_state_t state;
+
+ state = arm64_enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_el1_undef(regs, esr);
- local_daif_mask();
- exit_to_kernel_mode(regs);
+ arm64_exit_to_kernel_mode(regs, state);
}
static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr)
{
- enter_from_kernel_mode(regs);
+ irqentry_state_t state;
+
+ state = arm64_enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_el1_bti(regs, esr);
- local_daif_mask();
- exit_to_kernel_mode(regs);
+ arm64_exit_to_kernel_mode(regs, state);
}
static void noinstr el1_gcs(struct pt_regs *regs, unsigned long esr)
{
- enter_from_kernel_mode(regs);
+ irqentry_state_t state;
+
+ state = arm64_enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_el1_gcs(regs, esr);
- local_daif_mask();
- exit_to_kernel_mode(regs);
+ arm64_exit_to_kernel_mode(regs, state);
}
static void noinstr el1_mops(struct pt_regs *regs, unsigned long esr)
{
- enter_from_kernel_mode(regs);
+ irqentry_state_t state;
+
+ state = arm64_enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_el1_mops(regs, esr);
- local_daif_mask();
- exit_to_kernel_mode(regs);
+ arm64_exit_to_kernel_mode(regs, state);
+}
+
+static void noinstr el1_breakpt(struct pt_regs *regs, unsigned long esr)
+{
+ irqentry_state_t state;
+
+ state = arm64_enter_el1_dbg(regs);
+ debug_exception_enter(regs);
+ do_breakpoint(esr, regs);
+ debug_exception_exit(regs);
+ arm64_exit_el1_dbg(regs, state);
+}
+
+static void noinstr el1_softstp(struct pt_regs *regs, unsigned long esr)
+{
+ irqentry_state_t state;
+
+ state = arm64_enter_el1_dbg(regs);
+ if (!cortex_a76_erratum_1463225_debug_handler(regs)) {
+ debug_exception_enter(regs);
+ /*
+ * After handling a breakpoint, we suspend the breakpoint
+ * and use single-step to move to the next instruction.
+ * If we are stepping a suspended breakpoint there's nothing more to do:
+ * the single-step is complete.
+ */
+ if (!try_step_suspended_breakpoints(regs))
+ do_el1_softstep(esr, regs);
+ debug_exception_exit(regs);
+ }
+ arm64_exit_el1_dbg(regs, state);
}
-static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_watchpt(struct pt_regs *regs, unsigned long esr)
{
+ /* Watchpoints are the only debug exception to write FAR_EL1 */
unsigned long far = read_sysreg(far_el1);
+ irqentry_state_t state;
+
+ state = arm64_enter_el1_dbg(regs);
+ debug_exception_enter(regs);
+ do_watchpoint(far, esr, regs);
+ debug_exception_exit(regs);
+ arm64_exit_el1_dbg(regs, state);
+}
- arm64_enter_el1_dbg(regs);
- if (!cortex_a76_erratum_1463225_debug_handler(regs))
- do_debug_exception(far, esr, regs);
- arm64_exit_el1_dbg(regs);
+static void noinstr el1_brk64(struct pt_regs *regs, unsigned long esr)
+{
+ irqentry_state_t state;
+
+ state = arm64_enter_el1_dbg(regs);
+ debug_exception_enter(regs);
+ do_el1_brk64(esr, regs);
+ debug_exception_exit(regs);
+ arm64_exit_el1_dbg(regs, state);
}
static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
{
- enter_from_kernel_mode(regs);
+ irqentry_state_t state;
+
+ state = arm64_enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_el1_fpac(regs, esr);
- local_daif_mask();
- exit_to_kernel_mode(regs);
+ arm64_exit_to_kernel_mode(regs, state);
}
asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
@@ -530,10 +453,16 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
el1_mops(regs, esr);
break;
case ESR_ELx_EC_BREAKPT_CUR:
+ el1_breakpt(regs, esr);
+ break;
case ESR_ELx_EC_SOFTSTP_CUR:
+ el1_softstp(regs, esr);
+ break;
case ESR_ELx_EC_WATCHPT_CUR:
+ el1_watchpt(regs, esr);
+ break;
case ESR_ELx_EC_BRK64:
- el1_dbg(regs, esr);
+ el1_brk64(regs, esr);
break;
case ESR_ELx_EC_FPAC:
el1_fpac(regs, esr);
@@ -546,30 +475,32 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
static __always_inline void __el1_pnmi(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
- arm64_enter_nmi(regs);
+ irqentry_state_t state;
+
+ state = irqentry_nmi_enter(regs);
do_interrupt_handler(regs, handler);
- arm64_exit_nmi(regs);
+ irqentry_nmi_exit(regs, state);
}
static __always_inline void __el1_irq(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
- enter_from_kernel_mode(regs);
+ irqentry_state_t state;
+
+ state = arm64_enter_from_kernel_mode(regs);
irq_enter_rcu();
do_interrupt_handler(regs, handler);
irq_exit_rcu();
- arm64_preempt_schedule_irq();
-
- exit_to_kernel_mode(regs);
+ arm64_exit_to_kernel_mode(regs, state);
}
static void noinstr el1_interrupt(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
- if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
+ if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && regs_irqs_disabled(regs))
__el1_pnmi(regs, handler);
else
__el1_irq(regs, handler);
@@ -588,21 +519,22 @@ asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs)
asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
+ irqentry_state_t state;
local_daif_restore(DAIF_ERRCTX);
- arm64_enter_nmi(regs);
+ state = irqentry_nmi_enter(regs);
do_serror(regs, esr);
- arm64_exit_nmi(regs);
+ irqentry_nmi_exit(regs, state);
}
static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_mem_abort(far, esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
@@ -617,50 +549,50 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
if (!is_ttbr0_addr(far))
arm64_apply_bp_hardening();
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_mem_abort(far, esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_fpsimd_acc(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sve_acc(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sme_acc(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_fpsimd_exc(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_sys(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
@@ -670,87 +602,132 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
if (!is_ttbr0_addr(instruction_pointer(regs)))
arm64_apply_bp_hardening();
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sp_pc_abort(far, esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sp_pc_abort(regs->sp, esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_undef(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_undef(regs, esr);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_bti(struct pt_regs *regs)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_bti(regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_mops(regs, esr);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_gcs(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_gcs(regs, esr);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
bad_el0_sync(regs, 0, esr);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
+}
+
+static void noinstr el0_breakpt(struct pt_regs *regs, unsigned long esr)
+{
+ if (!is_ttbr0_addr(regs->pc))
+ arm64_apply_bp_hardening();
+
+ arm64_enter_from_user_mode(regs);
+ debug_exception_enter(regs);
+ do_breakpoint(esr, regs);
+ debug_exception_exit(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ arm64_exit_to_user_mode(regs);
+}
+
+static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr)
+{
+ bool step_done;
+
+ if (!is_ttbr0_addr(regs->pc))
+ arm64_apply_bp_hardening();
+
+ arm64_enter_from_user_mode(regs);
+ /*
+ * After handling a breakpoint, we suspend the breakpoint
+ * and use single-step to move to the next instruction.
+ * If we are stepping a suspended breakpoint there's nothing more to do:
+ * the single-step is complete.
+ */
+ step_done = try_step_suspended_breakpoints(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ if (!step_done)
+ do_el0_softstep(esr, regs);
+ arm64_exit_to_user_mode(regs);
}
-static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_watchpt(struct pt_regs *regs, unsigned long esr)
{
- /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
+ /* Watchpoints are the only debug exception to write FAR_EL1 */
unsigned long far = read_sysreg(far_el1);
- enter_from_user_mode(regs);
- do_debug_exception(far, esr, regs);
+ arm64_enter_from_user_mode(regs);
+ debug_exception_enter(regs);
+ do_watchpoint(far, esr, regs);
+ debug_exception_exit(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ arm64_exit_to_user_mode(regs);
+}
+
+static void noinstr el0_brk64(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
- exit_to_user_mode(regs);
+ do_el0_brk64(esr, regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_svc(struct pt_regs *regs)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
cortex_a76_erratum_1463225_svc_handler();
- fp_user_discard();
+ fpsimd_syscall_enter();
local_daif_restore(DAIF_PROCCTX);
do_el0_svc(regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
+ fpsimd_syscall_exit();
}
static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_fpac(regs, esr);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
@@ -802,10 +779,16 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
el0_gcs(regs, esr);
break;
case ESR_ELx_EC_BREAKPT_LOW:
+ el0_breakpt(regs, esr);
+ break;
case ESR_ELx_EC_SOFTSTP_LOW:
+ el0_softstp(regs, esr);
+ break;
case ESR_ELx_EC_WATCHPT_LOW:
+ el0_watchpt(regs, esr);
+ break;
case ESR_ELx_EC_BRK64:
- el0_dbg(regs, esr);
+ el0_brk64(regs, esr);
break;
case ESR_ELx_EC_FPAC:
el0_fpac(regs, esr);
@@ -818,7 +801,7 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
static void noinstr el0_interrupt(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
@@ -829,7 +812,7 @@ static void noinstr el0_interrupt(struct pt_regs *regs,
do_interrupt_handler(regs, handler);
irq_exit_rcu();
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
@@ -855,14 +838,15 @@ asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs)
static void noinstr __el0_error_handler_common(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
+ irqentry_state_t state;
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_ERRCTX);
- arm64_enter_nmi(regs);
+ state = irqentry_nmi_enter(regs);
do_serror(regs, esr);
- arm64_exit_nmi(regs);
+ irqentry_nmi_exit(regs, state);
local_daif_restore(DAIF_PROCCTX);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
@@ -873,19 +857,27 @@ asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
#ifdef CONFIG_COMPAT
static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_cp15(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_svc_compat(struct pt_regs *regs)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
cortex_a76_erratum_1463225_svc_handler();
local_daif_restore(DAIF_PROCCTX);
do_el0_svc_compat(regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
+}
+
+static void noinstr el0_bkpt32(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_bkpt32(esr, regs);
+ arm64_exit_to_user_mode(regs);
}
asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
@@ -922,10 +914,16 @@ asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
el0_cp15(regs, esr);
break;
case ESR_ELx_EC_BREAKPT_LOW:
+ el0_breakpt(regs, esr);
+ break;
case ESR_ELx_EC_SOFTSTP_LOW:
+ el0_softstp(regs, esr);
+ break;
case ESR_ELx_EC_WATCHPT_LOW:
+ el0_watchpt(regs, esr);
+ break;
case ESR_ELx_EC_BKPT32:
- el0_dbg(regs, esr);
+ el0_bkpt32(regs, esr);
break;
default:
el0_inv(regs, esr);
@@ -953,21 +951,20 @@ UNHANDLED(el0t, 32, fiq)
UNHANDLED(el0t, 32, error)
#endif /* CONFIG_COMPAT */
-#ifdef CONFIG_VMAP_STACK
asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
unsigned long far = read_sysreg(far_el1);
- arm64_enter_nmi(regs);
+ irqentry_nmi_enter(regs);
panic_bad_stack(regs, esr, far);
}
-#endif /* CONFIG_VMAP_STACK */
#ifdef CONFIG_ARM_SDE_INTERFACE
asmlinkage noinstr unsigned long
__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
{
+ irqentry_state_t state;
unsigned long ret;
/*
@@ -992,9 +989,9 @@ __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
else if (cpu_has_pan())
set_pstate_pan(0);
- arm64_enter_nmi(regs);
+ state = irqentry_nmi_enter(regs);
ret = do_sdei_event(regs, arg);
- arm64_exit_nmi(regs);
+ irqentry_nmi_exit(regs, state);
return ret;
}
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 169ccf600066..025140caafe7 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -94,7 +94,7 @@ SYM_CODE_START(ftrace_caller)
stp x29, x30, [sp, #FREGS_SIZE]
add x29, sp, #FREGS_SIZE
- /* Prepare arguments for the the tracer func */
+ /* Prepare arguments for the tracer func */
sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
mov x1, x9 // parent_ip (callsite's LR)
mov x3, sp // regs
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 5ae2a34b50bd..e0db14e9c843 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -55,7 +55,6 @@
.endif
sub sp, sp, #PT_REGS_SIZE
-#ifdef CONFIG_VMAP_STACK
/*
* Test whether the SP has overflowed, without corrupting a GPR.
* Task and IRQ stacks are aligned so that SP & (1 << THREAD_SHIFT)
@@ -97,7 +96,6 @@
/* We were already on the overflow stack. Restore sp/x0 and carry on. */
sub sp, sp, x0
mrs x0, tpidrro_el0
-#endif
b el\el\ht\()_\regsize\()_\label
.org .Lventry_start\@ + 128 // Did we overflow the ventry slot?
.endm
@@ -275,7 +273,7 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
alternative_else_nop_endif
1:
- scs_load_current
+ scs_load_current_base
.else
add x21, sp, #PT_REGS_SIZE
get_current_task tsk
@@ -380,8 +378,6 @@ alternative_if ARM64_WORKAROUND_845719
alternative_else_nop_endif
#endif
3:
- scs_save tsk
-
/* Ignore asynchronous tag check faults in the uaccess routines */
ldr x0, [tsk, THREAD_SCTLR_USER]
clear_mte_async_tcf x0
@@ -475,7 +471,7 @@ alternative_else_nop_endif
*/
SYM_CODE_START_LOCAL(__swpan_entry_el1)
mrs x21, ttbr0_el1
- tst x21, #TTBR_ASID_MASK // Check for the reserved ASID
+ tst x21, #TTBRx_EL1_ASID_MASK // Check for the reserved ASID
orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
b.eq 1f // TTBR0 access already disabled
and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
@@ -540,7 +536,6 @@ SYM_CODE_START(vectors)
kernel_ventry 0, t, 32, error // Error 32-bit EL0
SYM_CODE_END(vectors)
-#ifdef CONFIG_VMAP_STACK
SYM_CODE_START_LOCAL(__bad_stack)
/*
* We detected an overflow in kernel_ventry, which switched to the
@@ -568,7 +563,6 @@ SYM_CODE_START_LOCAL(__bad_stack)
bl handle_bad_stack
ASM_BUG()
SYM_CODE_END(__bad_stack)
-#endif /* CONFIG_VMAP_STACK */
.macro entry_handler el:req, ht:req, regsize:req, label:req
@@ -614,7 +608,7 @@ SYM_CODE_END(ret_to_kernel)
SYM_CODE_START_LOCAL(ret_to_user)
ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step
enable_step_tsk x19, x2
-#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+#ifdef CONFIG_KSTACK_ERASE
bl stackleak_erase_on_task_stack
#endif
kernel_exit 0
@@ -825,6 +819,7 @@ SYM_CODE_END(__bp_harden_el1_vectors)
*
*/
SYM_FUNC_START(cpu_switch_to)
+ save_and_disable_daif x11
mov x10, #THREAD_CPU_CONTEXT
add x8, x0, x10
mov x9, sp
@@ -848,6 +843,7 @@ SYM_FUNC_START(cpu_switch_to)
ptrauth_keys_install_kernel x1, x8, x9, x10
scs_save x0
scs_load_current
+ restore_irq x11
ret
SYM_FUNC_END(cpu_switch_to)
NOKPROBE(cpu_switch_to)
@@ -874,6 +870,7 @@ NOKPROBE(ret_from_fork)
* Calls func(regs) using this CPU's irq stack and shadow irq stack.
*/
SYM_FUNC_START(call_on_irq_stack)
+ save_and_disable_daif x9
#ifdef CONFIG_SHADOW_CALL_STACK
get_current_task x16
scs_save x16
@@ -888,8 +885,10 @@ SYM_FUNC_START(call_on_irq_stack)
/* Move to the new stack and call the function there */
add sp, x16, #IRQ_STACK_SIZE
+ restore_irq x9
blr x1
+ save_and_disable_daif x9
/*
* Restore the SP from the FP, and restore the FP and LR from the frame
* record.
@@ -897,6 +896,7 @@ SYM_FUNC_START(call_on_irq_stack)
mov sp, x29
ldp x29, x30, [sp], #16
scs_load_current
+ restore_irq x9
ret
SYM_FUNC_END(call_on_irq_stack)
NOKPROBE(call_on_irq_stack)
@@ -1003,7 +1003,6 @@ SYM_CODE_START(__sdei_asm_handler)
1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
2: str x19, [x5]
-#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
@@ -1016,7 +1015,6 @@ SYM_CODE_START(__sdei_asm_handler)
2: mov x6, #SDEI_STACK_SIZE
add x5, x5, x6
mov sp, x5
-#endif
#ifdef CONFIG_SHADOW_CALL_STACK
/* Use a separate shadow call stack for normal and critical events */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 8370d55f0353..9de1d8a604cb 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -119,7 +119,7 @@
* whatever is in the FPSIMD registers is not saved to memory, but discarded.
*/
-static DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
+DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
#ifdef CONFIG_ARM64_SVE
@@ -180,13 +180,6 @@ static inline void set_sve_default_vl(int val)
set_default_vl(ARM64_VEC_SVE, val);
}
-static void __percpu *efi_sve_state;
-
-#else /* ! CONFIG_ARM64_SVE */
-
-/* Dummy declaration for code that will be optimised out: */
-extern void __percpu *efi_sve_state;
-
#endif /* ! CONFIG_ARM64_SVE */
#ifdef CONFIG_ARM64_SME
@@ -225,10 +218,21 @@ static void fpsimd_bind_task_to_cpu(void);
*/
static void get_cpu_fpsimd_context(void)
{
- if (!IS_ENABLED(CONFIG_PREEMPT_RT))
- local_bh_disable();
- else
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ /*
+ * The softirq subsystem lacks a true unmask/mask API, and
+ * re-enabling softirq processing using local_bh_enable() will
+ * not only unmask softirqs, it will also result in immediate
+ * delivery of any pending softirqs.
+ * This is undesirable when running with IRQs disabled, but in
+ * that case, there is no need to mask softirqs in the first
+ * place, so only bother doing so when IRQs are enabled.
+ */
+ if (!irqs_disabled())
+ local_bh_disable();
+ } else {
preempt_disable();
+ }
}
/*
@@ -240,10 +244,12 @@ static void get_cpu_fpsimd_context(void)
*/
static void put_cpu_fpsimd_context(void)
{
- if (!IS_ENABLED(CONFIG_PREEMPT_RT))
- local_bh_enable();
- else
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ if (!irqs_disabled())
+ local_bh_enable();
+ } else {
preempt_enable();
+ }
}
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
@@ -359,20 +365,15 @@ static void task_fpsimd_load(void)
WARN_ON(preemptible());
WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
- if (system_supports_fpmr())
- write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);
-
if (system_supports_sve() || system_supports_sme()) {
switch (current->thread.fp_type) {
case FP_STATE_FPSIMD:
/* Stop tracking SVE for this task until next use. */
- if (test_and_clear_thread_flag(TIF_SVE))
- sve_user_disable();
+ clear_thread_flag(TIF_SVE);
break;
case FP_STATE_SVE:
- if (!thread_sm_enabled(&current->thread) &&
- !WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE)))
- sve_user_enable();
+ if (!thread_sm_enabled(&current->thread))
+ WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE));
if (test_thread_flag(TIF_SVE))
sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
@@ -413,6 +414,9 @@ static void task_fpsimd_load(void)
restore_ffr = system_supports_fa64();
}
+ if (system_supports_fpmr())
+ write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);
+
if (restore_sve_regs) {
WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
sve_load_state(sve_pffr(&current->thread),
@@ -453,12 +457,15 @@ static void fpsimd_save_user_state(void)
*(last->fpmr) = read_sysreg_s(SYS_FPMR);
/*
- * If a task is in a syscall the ABI allows us to only
- * preserve the state shared with FPSIMD so don't bother
- * saving the full SVE state in that case.
+ * Save SVE state if it is live.
+ *
+ * The syscall ABI discards live SVE state at syscall entry. When
+ * entering a syscall, fpsimd_syscall_enter() sets to_save to
+ * FP_STATE_FPSIMD to allow the SVE state to be lazily discarded until
+ * either new SVE state is loaded+bound or fpsimd_syscall_exit() is
+ * called prior to a return to userspace.
*/
- if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) &&
- !in_syscall(current_pt_regs())) ||
+ if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) ||
last->to_save == FP_STATE_SVE) {
save_sve_regs = true;
save_ffr = true;
@@ -651,7 +658,7 @@ static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,
* task->thread.uw.fpsimd_state must be up to date before calling this
* function.
*/
-static void fpsimd_to_sve(struct task_struct *task)
+static inline void fpsimd_to_sve(struct task_struct *task)
{
unsigned int vq;
void *sst = task->thread.sve_state;
@@ -675,7 +682,7 @@ static void fpsimd_to_sve(struct task_struct *task)
* bytes of allocated kernel memory.
* task->thread.sve_state must be up to date before calling this function.
*/
-static void sve_to_fpsimd(struct task_struct *task)
+static inline void sve_to_fpsimd(struct task_struct *task)
{
unsigned int vq, vl;
void const *sst = task->thread.sve_state;
@@ -694,44 +701,39 @@ static void sve_to_fpsimd(struct task_struct *task)
}
}
-void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)
+static inline void __fpsimd_zero_vregs(struct user_fpsimd_state *fpsimd)
{
- write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,
- SYS_SCTLR_EL1);
+ memset(&fpsimd->vregs, 0, sizeof(fpsimd->vregs));
}
-#ifdef CONFIG_ARM64_SVE
/*
- * Call __sve_free() directly only if you know task can't be scheduled
- * or preempted.
+ * Simulate the effects of an SMSTOP SM instruction.
*/
-static void __sve_free(struct task_struct *task)
+void task_smstop_sm(struct task_struct *task)
{
- kfree(task->thread.sve_state);
- task->thread.sve_state = NULL;
-}
+ if (!thread_sm_enabled(&task->thread))
+ return;
-static void sve_free(struct task_struct *task)
-{
- WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
+ __fpsimd_zero_vregs(&task->thread.uw.fpsimd_state);
+ task->thread.uw.fpsimd_state.fpsr = 0x0800009f;
+ if (system_supports_fpmr())
+ task->thread.uw.fpmr = 0;
- __sve_free(task);
+ task->thread.svcr &= ~SVCR_SM_MASK;
+ task->thread.fp_type = FP_STATE_FPSIMD;
}
-/*
- * Return how many bytes of memory are required to store the full SVE
- * state for task, given task's currently configured vector length.
- */
-size_t sve_state_size(struct task_struct const *task)
+void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)
{
- unsigned int vl = 0;
-
- if (system_supports_sve())
- vl = task_get_sve_vl(task);
- if (system_supports_sme())
- vl = max(vl, task_get_sme_vl(task));
+ write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,
+ SYS_SCTLR_EL1);
+}
- return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+#ifdef CONFIG_ARM64_SVE
+static void sve_free(struct task_struct *task)
+{
+ kfree(task->thread.sve_state);
+ task->thread.sve_state = NULL;
}
/*
@@ -758,69 +760,34 @@ void sve_alloc(struct task_struct *task, bool flush)
kzalloc(sve_state_size(task), GFP_KERNEL);
}
-
-/*
- * Force the FPSIMD state shared with SVE to be updated in the SVE state
- * even if the SVE state is the current active state.
- *
- * This should only be called by ptrace. task must be non-runnable.
- * task->thread.sve_state must point to at least sve_state_size(task)
- * bytes of allocated kernel memory.
- */
-void fpsimd_force_sync_to_sve(struct task_struct *task)
-{
- fpsimd_to_sve(task);
-}
-
/*
- * Ensure that task->thread.sve_state is up to date with respect to
- * the user task, irrespective of when SVE is in use or not.
+ * Ensure that task->thread.uw.fpsimd_state is up to date with respect to the
+ * task's currently effective FPSIMD/SVE state.
*
- * This should only be called by ptrace. task must be non-runnable.
- * task->thread.sve_state must point to at least sve_state_size(task)
- * bytes of allocated kernel memory.
+ * The task's FPSIMD/SVE/SME state must not be subject to concurrent
+ * manipulation.
*/
-void fpsimd_sync_to_sve(struct task_struct *task)
-{
- if (!test_tsk_thread_flag(task, TIF_SVE) &&
- !thread_sm_enabled(&task->thread))
- fpsimd_to_sve(task);
-}
-
-/*
- * Ensure that task->thread.uw.fpsimd_state is up to date with respect to
- * the user task, irrespective of whether SVE is in use or not.
- *
- * This should only be called by ptrace. task must be non-runnable.
- * task->thread.sve_state must point to at least sve_state_size(task)
- * bytes of allocated kernel memory.
- */
-void sve_sync_to_fpsimd(struct task_struct *task)
+void fpsimd_sync_from_effective_state(struct task_struct *task)
{
if (task->thread.fp_type == FP_STATE_SVE)
sve_to_fpsimd(task);
}
/*
- * Ensure that task->thread.sve_state is up to date with respect to
- * the task->thread.uw.fpsimd_state.
+ * Ensure that the task's currently effective FPSIMD/SVE state is up to date
+ * with respect to task->thread.uw.fpsimd_state, zeroing any effective
+ * non-FPSIMD (S)SVE state.
*
- * This should only be called by ptrace to merge new FPSIMD register
- * values into a task for which SVE is currently active.
- * task must be non-runnable.
- * task->thread.sve_state must point to at least sve_state_size(task)
- * bytes of allocated kernel memory.
- * task->thread.uw.fpsimd_state must already have been initialised with
- * the new FPSIMD register values to be merged in.
+ * The task's FPSIMD/SVE/SME state must not be subject to concurrent
+ * manipulation.
*/
-void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
+void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task)
{
unsigned int vq;
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!test_tsk_thread_flag(task, TIF_SVE) &&
- !thread_sm_enabled(&task->thread))
+ if (task->thread.fp_type != FP_STATE_SVE)
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -829,10 +796,73 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
__fpsimd_to_sve(sst, fst, vq);
}
+static int change_live_vector_length(struct task_struct *task,
+ enum vec_type type,
+ unsigned long vl)
+{
+ unsigned int sve_vl = task_get_sve_vl(task);
+ unsigned int sme_vl = task_get_sme_vl(task);
+ void *sve_state = NULL, *sme_state = NULL;
+
+ if (type == ARM64_VEC_SME)
+ sme_vl = vl;
+ else
+ sve_vl = vl;
+
+ /*
+ * Allocate the new sve_state and sme_state before freeing the old
+ * copies so that allocation failure can be handled without needing to
+ * mutate the task's state in any way.
+ *
+ * Changes to the SVE vector length must not discard live ZA state or
+ * clear PSTATE.ZA, as userspace code which is unaware of the AAPCS64
+ * ZA lazy saving scheme may attempt to change the SVE vector length
+ * while unsaved/dormant ZA state exists.
+ */
+ sve_state = kzalloc(__sve_state_size(sve_vl, sme_vl), GFP_KERNEL);
+ if (!sve_state)
+ goto out_mem;
+
+ if (type == ARM64_VEC_SME) {
+ sme_state = kzalloc(__sme_state_size(sme_vl), GFP_KERNEL);
+ if (!sme_state)
+ goto out_mem;
+ }
+
+ if (task == current)
+ fpsimd_save_and_flush_current_state();
+ else
+ fpsimd_flush_task_state(task);
+
+ /*
+ * Always preserve PSTATE.SM and the effective FPSIMD state, zeroing
+ * other SVE state.
+ */
+ fpsimd_sync_from_effective_state(task);
+ task_set_vl(task, type, vl);
+ kfree(task->thread.sve_state);
+ task->thread.sve_state = sve_state;
+ fpsimd_sync_to_effective_state_zeropad(task);
+
+ if (type == ARM64_VEC_SME) {
+ task->thread.svcr &= ~SVCR_ZA_MASK;
+ kfree(task->thread.sme_state);
+ task->thread.sme_state = sme_state;
+ }
+
+ return 0;
+
+out_mem:
+ kfree(sve_state);
+ kfree(sme_state);
+ return -ENOMEM;
+}
+
int vec_set_vector_length(struct task_struct *task, enum vec_type type,
unsigned long vl, unsigned long flags)
{
- bool free_sme = false;
+ bool onexec = flags & PR_SVE_SET_VL_ONEXEC;
+ bool inherit = flags & PR_SVE_VL_INHERIT;
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC))
@@ -852,71 +882,17 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
vl = find_supported_vector_length(type, vl);
- if (flags & (PR_SVE_VL_INHERIT |
- PR_SVE_SET_VL_ONEXEC))
+ if (!onexec && vl != task_get_vl(task, type)) {
+ if (change_live_vector_length(task, type, vl))
+ return -ENOMEM;
+ }
+
+ if (onexec || inherit)
task_set_vl_onexec(task, type, vl);
else
/* Reset VL to system default on next exec: */
task_set_vl_onexec(task, type, 0);
- /* Only actually set the VL if not deferred: */
- if (flags & PR_SVE_SET_VL_ONEXEC)
- goto out;
-
- if (vl == task_get_vl(task, type))
- goto out;
-
- /*
- * To ensure the FPSIMD bits of the SVE vector registers are preserved,
- * write any live register state back to task_struct, and convert to a
- * regular FPSIMD thread.
- */
- if (task == current) {
- get_cpu_fpsimd_context();
-
- fpsimd_save_user_state();
- }
-
- fpsimd_flush_task_state(task);
- if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
- thread_sm_enabled(&task->thread)) {
- sve_to_fpsimd(task);
- task->thread.fp_type = FP_STATE_FPSIMD;
- }
-
- if (system_supports_sme()) {
- if (type == ARM64_VEC_SME ||
- !(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) {
- /*
- * We are changing the SME VL or weren't using
- * SME anyway, discard the state and force a
- * reallocation.
- */
- task->thread.svcr &= ~(SVCR_SM_MASK |
- SVCR_ZA_MASK);
- clear_tsk_thread_flag(task, TIF_SME);
- free_sme = true;
- }
- }
-
- if (task == current)
- put_cpu_fpsimd_context();
-
- task_set_vl(task, type, vl);
-
- /*
- * Free the changed states if they are not in use, SME will be
- * reallocated to the correct size on next use and we just
- * allocate SVE now in case it is needed for use in streaming
- * mode.
- */
- sve_free(task);
- sve_alloc(task, true);
-
- if (free_sme)
- sme_free(task);
-
-out:
update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
flags & PR_SVE_VL_INHERIT);
@@ -1112,36 +1088,6 @@ int vec_verify_vq_map(enum vec_type type)
return 0;
}
-static void __init sve_efi_setup(void)
-{
- int max_vl = 0;
- int i;
-
- if (!IS_ENABLED(CONFIG_EFI))
- return;
-
- for (i = 0; i < ARRAY_SIZE(vl_info); i++)
- max_vl = max(vl_info[i].max_vl, max_vl);
-
- /*
- * alloc_percpu() warns and prints a backtrace if this goes wrong.
- * This is evidence of a crippled system and we are returning void,
- * so no attempt is made to handle this situation here.
- */
- if (!sve_vl_valid(max_vl))
- goto fail;
-
- efi_sve_state = __alloc_percpu(
- SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), SVE_VQ_BYTES);
- if (!efi_sve_state)
- goto fail;
-
- return;
-
-fail:
- panic("Cannot allocate percpu memory for EFI SVE save/restore");
-}
-
void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
@@ -1202,8 +1148,6 @@ void __init sve_setup(void)
if (sve_max_virtualisable_vl() < sve_max_vl())
pr_warn("%s: unvirtualisable vector lengths present\n",
info->name);
-
- sve_efi_setup();
}
/*
@@ -1212,7 +1156,7 @@ void __init sve_setup(void)
*/
void fpsimd_release_task(struct task_struct *dead_task)
{
- __sve_free(dead_task);
+ sve_free(dead_task);
sme_free(dead_task);
}
@@ -1295,6 +1239,8 @@ void __init sme_setup(void)
if (!system_supports_sme())
return;
+ min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
+
/*
* SME doesn't require any particular vector length be
* supported but it does require at least one. We should have
@@ -1302,9 +1248,8 @@ void __init sme_setup(void)
* let's double check here. The bitmap is SVE_VQ_MAP sized for
* sharing with SVE.
*/
- WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));
+ WARN_ON(min_bit >= SVE_VQ_MAX);
- min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
@@ -1436,7 +1381,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
* If this not a trap due to SME being disabled then something
* is being used in the wrong mode, report as SIGILL.
*/
- if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) {
+ if (ESR_ELx_SME_ISS_SMTC(esr) != ESR_ELx_SME_ISS_SMTC_SME_DISABLED) {
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
return;
}
@@ -1460,6 +1405,8 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
sme_set_vq(vq_minus_one);
fpsimd_bind_task_to_cpu();
+ } else {
+ fpsimd_flush_task_state(current);
}
put_cpu_fpsimd_context();
@@ -1516,21 +1463,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
* Elide the load if this CPU holds the most recent kernel mode
* FPSIMD context of the current task.
*/
- if (last->st == &task->thread.kernel_fpsimd_state &&
+ if (last->st == task->thread.kernel_fpsimd_state &&
task->thread.kernel_fpsimd_cpu == smp_processor_id())
return;
- fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+ fpsimd_load_state(task->thread.kernel_fpsimd_state);
}
static void fpsimd_save_kernel_state(struct task_struct *task)
{
struct cpu_fp_state cpu_fp_state = {
- .st = &task->thread.kernel_fpsimd_state,
+ .st = task->thread.kernel_fpsimd_state,
.to_save = FP_STATE_FPSIMD,
};
- fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+ BUG_ON(!cpu_fp_state.st);
+
+ fpsimd_save_state(task->thread.kernel_fpsimd_state);
fpsimd_bind_state_to_cpu(&cpu_fp_state);
task->thread.kernel_fpsimd_cpu = smp_processor_id();
@@ -1573,8 +1522,8 @@ void fpsimd_thread_switch(struct task_struct *next)
fpsimd_save_user_state();
if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
- fpsimd_load_kernel_state(next);
fpsimd_flush_cpu_state();
+ fpsimd_load_kernel_state(next);
} else {
/*
* Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
@@ -1661,6 +1610,9 @@ void fpsimd_flush_thread(void)
current->thread.svcr = 0;
}
+ if (system_supports_fpmr())
+ current->thread.uw.fpmr = 0;
+
current->thread.fp_type = FP_STATE_FPSIMD;
put_cpu_fpsimd_context();
@@ -1683,18 +1635,6 @@ void fpsimd_preserve_current_state(void)
}
/*
- * Like fpsimd_preserve_current_state(), but ensure that
- * current->thread.uw.fpsimd_state is updated so that it can be copied to
- * the signal frame.
- */
-void fpsimd_signal_preserve_current_state(void)
-{
- fpsimd_preserve_current_state();
- if (current->thread.fp_type == FP_STATE_SVE)
- sve_to_fpsimd(current);
-}
-
-/*
* Associate current's FPSIMD context with this cpu
* The caller must have ownership of the cpu FPSIMD context before calling
* this function.
@@ -1786,30 +1726,14 @@ void fpsimd_restore_current_state(void)
put_cpu_fpsimd_context();
}
-/*
- * Load an updated userland FPSIMD state for 'current' from memory and set the
- * flag that indicates that the FPSIMD register contents are the most recent
- * FPSIMD state of 'current'. This is used by the signal code to restore the
- * register state when returning from a signal handler in FPSIMD only cases,
- * any SVE context will be discarded.
- */
void fpsimd_update_current_state(struct user_fpsimd_state const *state)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
- get_cpu_fpsimd_context();
-
current->thread.uw.fpsimd_state = *state;
- if (test_thread_flag(TIF_SVE))
+ if (current->thread.fp_type == FP_STATE_SVE)
fpsimd_to_sve(current);
-
- task_fpsimd_load();
- fpsimd_bind_task_to_cpu();
-
- clear_thread_flag(TIF_FOREIGN_FPSTATE);
-
- put_cpu_fpsimd_context();
}
/*
@@ -1826,6 +1750,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
+ t->thread.kernel_fpsimd_state = NULL;
/*
* If we don't support fpsimd, bail out after we have
* reset the fpsimd_cpu for this task and clear the
@@ -1839,6 +1764,17 @@ void fpsimd_flush_task_state(struct task_struct *t)
barrier();
}
+void fpsimd_save_and_flush_current_state(void)
+{
+ if (!system_supports_fpsimd())
+ return;
+
+ get_cpu_fpsimd_context();
+ fpsimd_save_user_state();
+ fpsimd_flush_task_state(current);
+ put_cpu_fpsimd_context();
+}
+
/*
* Save the FPSIMD state to memory and invalidate cpu view.
* This function must be called with preemption disabled.
@@ -1874,12 +1810,19 @@ void fpsimd_save_and_flush_cpu_state(void)
*
* The caller may freely use the FPSIMD registers until kernel_neon_end() is
* called.
+ *
+ * Unless called from non-preemptible task context, @state must point to a
+ * caller provided buffer that will be used to preserve the task's kernel mode
+ * FPSIMD context when it is scheduled out, or if it is interrupted by kernel
+ * mode FPSIMD occurring in softirq context. May be %NULL otherwise.
*/
-void kernel_neon_begin(void)
+void kernel_neon_begin(struct user_fpsimd_state *state)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
+ WARN_ON((preemptible() || in_serving_softirq()) && !state);
+
BUG_ON(!may_use_simd());
get_cpu_fpsimd_context();
@@ -1887,7 +1830,7 @@ void kernel_neon_begin(void)
/* Save unsaved fpsimd state, if any: */
if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
- fpsimd_save_kernel_state(current);
+ fpsimd_save_state(state);
} else {
fpsimd_save_user_state();
@@ -1908,8 +1851,16 @@ void kernel_neon_begin(void)
* mode in task context. So in this case, setting the flag here
* is always appropriate.
*/
- if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
+ /*
+ * Record the caller provided buffer as the kernel mode
+ * FP/SIMD buffer for this task, so that the state can
+ * be preserved and restored on a context switch.
+ */
+ WARN_ON(current->thread.kernel_fpsimd_state != NULL);
+ current->thread.kernel_fpsimd_state = state;
set_thread_flag(TIF_KERNEL_FPSTATE);
+ }
}
/* Invalidate any task state remaining in the fpsimd regs: */
@@ -1927,31 +1878,36 @@ EXPORT_SYMBOL_GPL(kernel_neon_begin);
*
* The caller must not use the FPSIMD registers after this function is called,
* unless kernel_neon_begin() is called again in the meantime.
+ *
+ * The value of @state must match the value passed to the preceding call to
+ * kernel_neon_begin().
*/
-void kernel_neon_end(void)
+void kernel_neon_end(struct user_fpsimd_state *state)
{
if (!system_supports_fpsimd())
return;
+ if (!test_thread_flag(TIF_KERNEL_FPSTATE))
+ return;
+
/*
* If we are returning from a nested use of kernel mode FPSIMD, restore
* the task context kernel mode FPSIMD state. This can only happen when
* running in softirq context on non-PREEMPT_RT.
*/
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
- test_thread_flag(TIF_KERNEL_FPSTATE))
- fpsimd_load_kernel_state(current);
- else
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) {
+ fpsimd_load_state(state);
+ } else {
clear_thread_flag(TIF_KERNEL_FPSTATE);
+ WARN_ON(current->thread.kernel_fpsimd_state != state);
+ current->thread.kernel_fpsimd_state = NULL;
+ }
}
EXPORT_SYMBOL_GPL(kernel_neon_end);
#ifdef CONFIG_EFI
-static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state);
-static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
-static DEFINE_PER_CPU(bool, efi_sve_state_used);
-static DEFINE_PER_CPU(bool, efi_sm_state);
+static struct user_fpsimd_state efi_fpsimd_state;
/*
* EFI runtime services support functions
@@ -1975,49 +1931,29 @@ void __efi_fpsimd_begin(void)
if (!system_supports_fpsimd())
return;
- WARN_ON(preemptible());
-
if (may_use_simd()) {
- kernel_neon_begin();
+ kernel_neon_begin(&efi_fpsimd_state);
} else {
/*
- * If !efi_sve_state, SVE can't be in use yet and doesn't need
- * preserving:
+ * We are running in hardirq or NMI context, and the only
+ * legitimate case where this might happen is when EFI pstore
+ * is attempting to record the system's dying gasps into EFI
+ * variables. This could be due to an oops, a panic or a call
+ * to emergency_restart(), and in none of those cases, we can
+ * expect the current task to ever return to user space again,
+ * or for the kernel to resume any normal execution, for that
+ * matter (an oops in hardirq context triggers a panic too).
+ *
+ * Therefore, there is no point in attempting to preserve any
+ * SVE/SME state here. On the off chance that we might have
+ * ended up here for a different reason inadvertently, kill the
+ * task and preserve/restore the base FP/SIMD state, which
+ * might belong to kernel mode FP/SIMD.
*/
- if (system_supports_sve() && likely(efi_sve_state)) {
- char *sve_state = this_cpu_ptr(efi_sve_state);
- bool ffr = true;
- u64 svcr;
-
- __this_cpu_write(efi_sve_state_used, true);
-
- if (system_supports_sme()) {
- svcr = read_sysreg_s(SYS_SVCR);
-
- __this_cpu_write(efi_sm_state,
- svcr & SVCR_SM_MASK);
-
- /*
- * Unless we have FA64 FFR does not
- * exist in streaming mode.
- */
- if (!system_supports_fa64())
- ffr = !(svcr & SVCR_SM_MASK);
- }
-
- sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()),
- &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
- ffr);
-
- if (system_supports_sme())
- sysreg_clear_set_s(SYS_SVCR,
- SVCR_SM_MASK, 0);
-
- } else {
- fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
- }
-
- __this_cpu_write(efi_fpsimd_state_used, true);
+ pr_warn_ratelimited("Calling EFI runtime from %s context\n",
+ in_nmi() ? "NMI" : "hardirq");
+ force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
+ fpsimd_save_state(&efi_fpsimd_state);
}
}
@@ -2029,42 +1965,10 @@ void __efi_fpsimd_end(void)
if (!system_supports_fpsimd())
return;
- if (!__this_cpu_xchg(efi_fpsimd_state_used, false)) {
- kernel_neon_end();
+ if (may_use_simd()) {
+ kernel_neon_end(&efi_fpsimd_state);
} else {
- if (system_supports_sve() &&
- likely(__this_cpu_read(efi_sve_state_used))) {
- char const *sve_state = this_cpu_ptr(efi_sve_state);
- bool ffr = true;
-
- /*
- * Restore streaming mode; EFI calls are
- * normal function calls so should not return in
- * streaming mode.
- */
- if (system_supports_sme()) {
- if (__this_cpu_read(efi_sm_state)) {
- sysreg_clear_set_s(SYS_SVCR,
- 0,
- SVCR_SM_MASK);
-
- /*
- * Unless we have FA64 FFR does not
- * exist in streaming mode.
- */
- if (!system_supports_fa64())
- ffr = false;
- }
- }
-
- sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()),
- &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
- ffr);
-
- __this_cpu_write(efi_sve_state_used, false);
- } else {
- fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
- }
+ fpsimd_load_state(&efi_fpsimd_state);
}
}
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index d7c0d023dfe5..5a1554a44162 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -258,10 +258,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ftrace_modify_code(pc, 0, new, false);
}
-static struct plt_entry *get_ftrace_plt(struct module *mod)
+static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
{
#ifdef CONFIG_MODULES
- struct plt_entry *plt = mod->arch.ftrace_trampolines;
+ struct plt_entry *plt = NULL;
+
+ if (within_module_mem_type(addr, mod, MOD_INIT_TEXT))
+ plt = mod->arch.init_ftrace_trampolines;
+ else if (within_module_mem_type(addr, mod, MOD_TEXT))
+ plt = mod->arch.ftrace_trampolines;
+ else
+ return NULL;
return &plt[FTRACE_PLT_IDX];
#else
@@ -320,20 +327,19 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
* dealing with an out-of-range condition, we can assume it
* is due to a module being loaded far away from the kernel.
*
- * NOTE: __module_text_address() must be called with preemption
- * disabled, but we can rely on ftrace_lock to ensure that 'mod'
+ * NOTE: __module_text_address() must be called within a RCU read
+ * section, but we can rely on ftrace_lock to ensure that 'mod'
* retains its validity throughout the remainder of this code.
*/
if (!mod) {
- preempt_disable();
+ guard(rcu)();
mod = __module_text_address(pc);
- preempt_enable();
}
if (WARN_ON(!mod))
return false;
- plt = get_ftrace_plt(mod);
+ plt = get_ftrace_plt(mod, pc);
if (!plt) {
pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
return false;
@@ -486,7 +492,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
return ret;
/*
- * When using mcount, callsites in modules may have been initalized to
+ * When using mcount, callsites in modules may have been initialized to
* call an arbitrary module PLT (which redirects to the _mcount stub)
* rather than the ftrace PLT we'll use at runtime (which redirects to
* the ftrace trampoline). We can ignore the old PLT when initializing
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2ce73525de2c..87a822e5c4ca 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -89,7 +89,7 @@ SYM_CODE_START(primary_entry)
adrp x1, early_init_stack
mov sp, x1
mov x29, xzr
- adrp x0, init_idmap_pg_dir
+ adrp x0, __pi_init_idmap_pg_dir
mov x1, xzr
bl __pi_create_init_idmap
@@ -101,7 +101,7 @@ SYM_CODE_START(primary_entry)
cbnz x19, 0f
dmb sy
mov x1, x0 // end of used region
- adrp x0, init_idmap_pg_dir
+ adrp x0, __pi_init_idmap_pg_dir
adr_l x2, dcache_inval_poc
blr x2
b 1f
@@ -299,7 +299,7 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
isb
0:
- init_el2_hcr HCR_HOST_NVHE_FLAGS
+ init_el2_hcr HCR_HOST_NVHE_FLAGS | HCR_ATA
init_el2_state
/* Hypervisor stub */
@@ -507,7 +507,7 @@ SYM_FUNC_END(__no_granule_support)
SYM_FUNC_START_LOCAL(__primary_switch)
adrp x1, reserved_pg_dir
- adrp x2, init_idmap_pg_dir
+ adrp x2, __pi_init_idmap_pg_dir
bl __enable_mmu
adrp x1, early_init_stack
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 18749e9a6c2d..9717568518ba 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -402,7 +402,7 @@ int swsusp_arch_suspend(void)
* Memory allocated by get_safe_page() will be dealt with by the hibernate code,
* we don't need to free it here.
*/
-int swsusp_arch_resume(void)
+int __nocfi swsusp_arch_resume(void)
{
int rc;
void *zero_page;
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 722ac45f9f7b..ab76b36dce82 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -22,6 +22,7 @@
#include <asm/current.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
+#include <asm/exception.h>
#include <asm/hw_breakpoint.h>
#include <asm/traps.h>
#include <asm/cputype.h>
@@ -618,8 +619,7 @@ NOKPROBE_SYMBOL(toggle_bp_registers);
/*
* Debug exception handlers.
*/
-static int breakpoint_handler(unsigned long unused, unsigned long esr,
- struct pt_regs *regs)
+void do_breakpoint(unsigned long esr, struct pt_regs *regs)
{
int i, step = 0, *kernel_step;
u32 ctrl_reg;
@@ -662,7 +662,7 @@ unlock:
}
if (!step)
- return 0;
+ return;
if (user_mode(regs)) {
debug_info->bps_disabled = 1;
@@ -670,7 +670,7 @@ unlock:
/* If we're already stepping a watchpoint, just return. */
if (debug_info->wps_disabled)
- return 0;
+ return;
if (test_thread_flag(TIF_SINGLESTEP))
debug_info->suspended_step = 1;
@@ -681,7 +681,7 @@ unlock:
kernel_step = this_cpu_ptr(&stepping_kernel_bp);
if (*kernel_step != ARM_KERNEL_STEP_NONE)
- return 0;
+ return;
if (kernel_active_single_step()) {
*kernel_step = ARM_KERNEL_STEP_SUSPEND;
@@ -690,10 +690,8 @@ unlock:
kernel_enable_single_step(regs);
}
}
-
- return 0;
}
-NOKPROBE_SYMBOL(breakpoint_handler);
+NOKPROBE_SYMBOL(do_breakpoint);
/*
* Arm64 hardware does not always report a watchpoint hit address that matches
@@ -752,8 +750,7 @@ static int watchpoint_report(struct perf_event *wp, unsigned long addr,
return step;
}
-static int watchpoint_handler(unsigned long addr, unsigned long esr,
- struct pt_regs *regs)
+void do_watchpoint(unsigned long addr, unsigned long esr, struct pt_regs *regs)
{
int i, step = 0, *kernel_step, access, closest_match = 0;
u64 min_dist = -1, dist;
@@ -808,7 +805,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
rcu_read_unlock();
if (!step)
- return 0;
+ return;
/*
* We always disable EL0 watchpoints because the kernel can
@@ -821,7 +818,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
/* If we're already stepping a breakpoint, just return. */
if (debug_info->bps_disabled)
- return 0;
+ return;
if (test_thread_flag(TIF_SINGLESTEP))
debug_info->suspended_step = 1;
@@ -832,7 +829,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
kernel_step = this_cpu_ptr(&stepping_kernel_bp);
if (*kernel_step != ARM_KERNEL_STEP_NONE)
- return 0;
+ return;
if (kernel_active_single_step()) {
*kernel_step = ARM_KERNEL_STEP_SUSPEND;
@@ -841,44 +838,41 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
kernel_enable_single_step(regs);
}
}
-
- return 0;
}
-NOKPROBE_SYMBOL(watchpoint_handler);
+NOKPROBE_SYMBOL(do_watchpoint);
/*
* Handle single-step exception.
*/
-int reinstall_suspended_bps(struct pt_regs *regs)
+bool try_step_suspended_breakpoints(struct pt_regs *regs)
{
struct debug_info *debug_info = &current->thread.debug;
- int handled_exception = 0, *kernel_step;
-
- kernel_step = this_cpu_ptr(&stepping_kernel_bp);
+ int *kernel_step = this_cpu_ptr(&stepping_kernel_bp);
+ bool handled_exception = false;
/*
- * Called from single-step exception handler.
- * Return 0 if execution can resume, 1 if a SIGTRAP should be
- * reported.
+ * Called from single-step exception entry.
+ * Return true if we stepped a breakpoint and can resume execution,
+ * false if we need to handle a single-step.
*/
if (user_mode(regs)) {
if (debug_info->bps_disabled) {
debug_info->bps_disabled = 0;
toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 1);
- handled_exception = 1;
+ handled_exception = true;
}
if (debug_info->wps_disabled) {
debug_info->wps_disabled = 0;
toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1);
- handled_exception = 1;
+ handled_exception = true;
}
if (handled_exception) {
if (debug_info->suspended_step) {
debug_info->suspended_step = 0;
/* Allow exception handling to fall-through. */
- handled_exception = 0;
+ handled_exception = false;
} else {
user_disable_single_step(current);
}
@@ -892,17 +886,17 @@ int reinstall_suspended_bps(struct pt_regs *regs)
if (*kernel_step != ARM_KERNEL_STEP_SUSPEND) {
kernel_disable_single_step();
- handled_exception = 1;
+ handled_exception = true;
} else {
- handled_exception = 0;
+ handled_exception = false;
}
*kernel_step = ARM_KERNEL_STEP_NONE;
}
- return !handled_exception;
+ return handled_exception;
}
-NOKPROBE_SYMBOL(reinstall_suspended_bps);
+NOKPROBE_SYMBOL(try_step_suspended_breakpoints);
/*
* Context-switcher for restoring suspended breakpoints.
@@ -987,12 +981,6 @@ static int __init arch_hw_breakpoint_init(void)
pr_info("found %d breakpoint and %d watchpoint registers.\n",
core_num_brps, core_num_wrps);
- /* Register debug fault handlers. */
- hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP,
- TRAP_HWBKPT, "hw-breakpoint handler");
- hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP,
- TRAP_HWBKPT, "hw-watchpoint handler");
-
/*
* Reset the breakpoint resources. We assume that a halting
* debugger will leave the world in a nice state for us.
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index ae990da1eae5..634ddc904244 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -54,6 +54,11 @@ SYM_CODE_START_LOCAL(elx_sync)
1: cmp x0, #HVC_FINALISE_EL2
b.eq __finalise_el2
+ cmp x0, #HVC_GET_ICH_VTR_EL2
+ b.ne 2f
+ mrs_s x1, SYS_ICH_VTR_EL2
+ b 9f
+
2: cmp x0, #HVC_SOFT_RESTART
b.ne 3f
mov x0, x2
@@ -97,8 +102,7 @@ SYM_CODE_START_LOCAL(__finalise_el2)
2:
// Engage the VHE magic!
mov_q x0, HCR_HOST_VHE_FLAGS
- msr hcr_el2, x0
- isb
+ msr_hcr_el2 x0
// Use the EL1 allocated stack, per-cpu offset
mrs x0, sp_el1
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index ef3a69cc398e..d4c7d45ae6bc 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -10,6 +10,16 @@
#error This file should only be included in vmlinux.lds.S
#endif
+#if defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 210000
+#define ASSERT(...)
+#endif
+
+#define PI_EXPORT_SYM(sym) \
+ __PI_EXPORT_SYM(sym, __pi_ ## sym, Cannot export BSS symbol sym to startup code)
+#define __PI_EXPORT_SYM(sym, pisym, msg)\
+ PROVIDE(pisym = sym); \
+ ASSERT((sym - KIMAGE_VADDR) < (__bss_start - KIMAGE_VADDR), #msg)
+
PROVIDE(__efistub_primary_entry = primary_entry);
/*
@@ -28,7 +38,7 @@ PROVIDE(__efistub__end = _end);
PROVIDE(__efistub___inittext_end = __inittext_end);
PROVIDE(__efistub__edata = _edata);
#if defined(CONFIG_EFI_EARLYCON) || defined(CONFIG_SYSFB)
-PROVIDE(__efistub_screen_info = screen_info);
+PROVIDE(__efistub_sysfb_primary_display = sysfb_primary_display);
#endif
PROVIDE(__efistub__ctype = _ctype);
@@ -36,40 +46,30 @@ PROVIDE(__pi___memcpy = __pi_memcpy);
PROVIDE(__pi___memmove = __pi_memmove);
PROVIDE(__pi___memset = __pi_memset);
-PROVIDE(__pi_id_aa64isar1_override = id_aa64isar1_override);
-PROVIDE(__pi_id_aa64isar2_override = id_aa64isar2_override);
-PROVIDE(__pi_id_aa64mmfr0_override = id_aa64mmfr0_override);
-PROVIDE(__pi_id_aa64mmfr1_override = id_aa64mmfr1_override);
-PROVIDE(__pi_id_aa64mmfr2_override = id_aa64mmfr2_override);
-PROVIDE(__pi_id_aa64pfr0_override = id_aa64pfr0_override);
-PROVIDE(__pi_id_aa64pfr1_override = id_aa64pfr1_override);
-PROVIDE(__pi_id_aa64smfr0_override = id_aa64smfr0_override);
-PROVIDE(__pi_id_aa64zfr0_override = id_aa64zfr0_override);
-PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override);
-PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings);
-#ifdef CONFIG_CAVIUM_ERRATUM_27456
-PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus);
-#endif
-PROVIDE(__pi__ctype = _ctype);
-PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed);
-
-PROVIDE(__pi_init_idmap_pg_dir = init_idmap_pg_dir);
-PROVIDE(__pi_init_idmap_pg_end = init_idmap_pg_end);
-PROVIDE(__pi_init_pg_dir = init_pg_dir);
-PROVIDE(__pi_init_pg_end = init_pg_end);
-PROVIDE(__pi_swapper_pg_dir = swapper_pg_dir);
-
-PROVIDE(__pi__text = _text);
-PROVIDE(__pi__stext = _stext);
-PROVIDE(__pi__etext = _etext);
-PROVIDE(__pi___start_rodata = __start_rodata);
-PROVIDE(__pi___inittext_begin = __inittext_begin);
-PROVIDE(__pi___inittext_end = __inittext_end);
-PROVIDE(__pi___initdata_begin = __initdata_begin);
-PROVIDE(__pi___initdata_end = __initdata_end);
-PROVIDE(__pi__data = _data);
-PROVIDE(__pi___bss_start = __bss_start);
-PROVIDE(__pi__end = _end);
+PI_EXPORT_SYM(id_aa64isar1_override);
+PI_EXPORT_SYM(id_aa64isar2_override);
+PI_EXPORT_SYM(id_aa64mmfr0_override);
+PI_EXPORT_SYM(id_aa64mmfr1_override);
+PI_EXPORT_SYM(id_aa64mmfr2_override);
+PI_EXPORT_SYM(id_aa64pfr0_override);
+PI_EXPORT_SYM(id_aa64pfr1_override);
+PI_EXPORT_SYM(id_aa64smfr0_override);
+PI_EXPORT_SYM(id_aa64zfr0_override);
+PI_EXPORT_SYM(arm64_sw_feature_override);
+PI_EXPORT_SYM(arm64_use_ng_mappings);
+PI_EXPORT_SYM(_ctype);
+
+PI_EXPORT_SYM(swapper_pg_dir);
+
+PI_EXPORT_SYM(_text);
+PI_EXPORT_SYM(_stext);
+PI_EXPORT_SYM(_etext);
+PI_EXPORT_SYM(__start_rodata);
+PI_EXPORT_SYM(__inittext_begin);
+PI_EXPORT_SYM(__inittext_end);
+PI_EXPORT_SYM(__initdata_begin);
+PI_EXPORT_SYM(__initdata_end);
+PI_EXPORT_SYM(_data);
#ifdef CONFIG_KVM
@@ -91,6 +91,7 @@ KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable);
KVM_NVHE_ALIAS(spectre_bhb_patch_wa3);
KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb);
KVM_NVHE_ALIAS(alt_cb_patch_nops);
+KVM_NVHE_ALIAS(kvm_compute_ich_hcr_trap_bits);
/* Global kernel state accessed by nVHE hyp code. */
KVM_NVHE_ALIAS(kvm_vgic_global_state);
@@ -105,6 +106,9 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors);
KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
+/* Static key indicating whether GICv3 has GICv2 compatibility */
+KVM_NVHE_ALIAS(vgic_v3_has_v2_compat);
+
/* Static key which is set if CNTVOFF_EL2 is unusable */
KVM_NVHE_ALIAS(broken_cntvoff_key);
@@ -112,11 +116,6 @@ KVM_NVHE_ALIAS(broken_cntvoff_key);
KVM_NVHE_ALIAS(__start___kvm_ex_table);
KVM_NVHE_ALIAS(__stop___kvm_ex_table);
-/* PMU available static key */
-#ifdef CONFIG_HW_PERF_EVENTS
-KVM_NVHE_ALIAS(kvm_arm_pmu_available);
-#endif
-
/* Position-independent library routines */
KVM_NVHE_ALIAS_HYP(clear_page, __pi_clear_page);
KVM_NVHE_ALIAS_HYP(copy_page, __pi_copy_page);
@@ -135,8 +134,14 @@ KVM_NVHE_ALIAS(__hyp_text_start);
KVM_NVHE_ALIAS(__hyp_text_end);
KVM_NVHE_ALIAS(__hyp_bss_start);
KVM_NVHE_ALIAS(__hyp_bss_end);
+KVM_NVHE_ALIAS(__hyp_data_start);
+KVM_NVHE_ALIAS(__hyp_data_end);
KVM_NVHE_ALIAS(__hyp_rodata_start);
KVM_NVHE_ALIAS(__hyp_rodata_end);
+#ifdef CONFIG_NVHE_EL2_TRACING
+KVM_NVHE_ALIAS(__hyp_event_ids_start);
+KVM_NVHE_ALIAS(__hyp_event_ids_end);
+#endif
/* pKVM static key */
KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
@@ -147,4 +152,17 @@ KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
_kernel_codesize = ABSOLUTE(__inittext_end - _text);
#endif
+/*
+ * LLD will occasionally error out with a '__init_end does not converge' error
+ * if INIT_IDMAP_DIR_SIZE is defined in terms of _end, as this results in a
+ * circular dependency. Counter this by dimensioning the initial IDMAP page
+ * tables based on kimage_limit, which is defined such that its value should
+ * not change as a result of the initdata segment being pushed over a 64k
+ * segment boundary due to changes in INIT_IDMAP_DIR_SIZE, provided that its
+ * value doesn't change by more than 2M between linker passes.
+ */
+kimage_limit = ALIGN(ABSOLUTE(_end + SZ_64K), SZ_2M);
+
+#undef ASSERT
+
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 85087e2df564..15dedb385b9e 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -51,7 +51,6 @@ static void init_irq_scs(void)
scs_alloc(early_cpu_to_node(cpu));
}
-#ifdef CONFIG_VMAP_STACK
static void __init init_irq_stacks(void)
{
int cpu;
@@ -62,20 +61,8 @@ static void __init init_irq_stacks(void)
per_cpu(irq_stack_ptr, cpu) = p;
}
}
-#else
-/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
-DEFINE_PER_CPU_ALIGNED(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
-static void init_irq_stacks(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu)
- per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu);
-}
-#endif
-
-#ifndef CONFIG_PREEMPT_RT
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
static void ____do_softirq(struct pt_regs *regs)
{
__do_softirq();
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 1da3e25f9d9e..c9503ed45a6c 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -10,8 +10,6 @@
#include <asm/cpufeature.h>
#include <asm/memory.h>
-u16 __initdata memstart_offset_seed;
-
bool __ro_after_init __kaslr_is_enabled = false;
void __init kaslr_init(void)
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c
index 532d72ea42ee..b70f4df15a1a 100644
--- a/arch/arm64/kernel/kexec_image.c
+++ b/arch/arm64/kernel/kexec_image.c
@@ -41,7 +41,7 @@ static void *image_load(struct kimage *image,
struct arm64_image_header *h;
u64 flags, value;
bool be_image, be_kernel;
- struct kexec_buf kbuf;
+ struct kexec_buf kbuf = {};
unsigned long text_offset, kernel_segment_number;
struct kexec_segment *kernel_segment;
int ret;
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index f3c4d3a8a20f..968324a79a89 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -234,23 +234,23 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
return err;
}
-static int kgdb_brk_fn(struct pt_regs *regs, unsigned long esr)
+int kgdb_brk_handler(struct pt_regs *regs, unsigned long esr)
{
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
-NOKPROBE_SYMBOL(kgdb_brk_fn)
+NOKPROBE_SYMBOL(kgdb_brk_handler)
-static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned long esr)
+int kgdb_compiled_brk_handler(struct pt_regs *regs, unsigned long esr)
{
compiled_break = 1;
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
-NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
+NOKPROBE_SYMBOL(kgdb_compiled_brk_handler);
-static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr)
+int kgdb_single_step_handler(struct pt_regs *regs, unsigned long esr)
{
if (!kgdb_single_step)
return DBG_HOOK_ERROR;
@@ -258,21 +258,7 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr)
kgdb_handle_exception(0, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
-NOKPROBE_SYMBOL(kgdb_step_brk_fn);
-
-static struct break_hook kgdb_brkpt_hook = {
- .fn = kgdb_brk_fn,
- .imm = KGDB_DYN_DBG_BRK_IMM,
-};
-
-static struct break_hook kgdb_compiled_brkpt_hook = {
- .fn = kgdb_compiled_brk_fn,
- .imm = KGDB_COMPILED_DBG_BRK_IMM,
-};
-
-static struct step_hook kgdb_step_hook = {
- .fn = kgdb_step_brk_fn
-};
+NOKPROBE_SYMBOL(kgdb_single_step_handler);
static int __kgdb_notify(struct die_args *args, unsigned long cmd)
{
@@ -311,15 +297,7 @@ static struct notifier_block kgdb_notifier = {
*/
int kgdb_arch_init(void)
{
- int ret = register_die_notifier(&kgdb_notifier);
-
- if (ret != 0)
- return ret;
-
- register_kernel_break_hook(&kgdb_brkpt_hook);
- register_kernel_break_hook(&kgdb_compiled_brkpt_hook);
- register_kernel_step_hook(&kgdb_step_hook);
- return 0;
+ return register_die_notifier(&kgdb_notifier);
}
/*
@@ -329,9 +307,6 @@ int kgdb_arch_init(void)
*/
void kgdb_arch_exit(void)
{
- unregister_kernel_break_hook(&kgdb_brkpt_hook);
- unregister_kernel_break_hook(&kgdb_compiled_brkpt_hook);
- unregister_kernel_step_hook(&kgdb_step_hook);
unregister_die_notifier(&kgdb_notifier);
}
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 6f121a0164a4..c5693a32e49b 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -129,9 +129,6 @@ int machine_kexec_post_load(struct kimage *kimage)
}
/* Create a copy of the linear map */
- trans_pgd = kexec_page_alloc(kimage);
- if (!trans_pgd)
- return -ENOMEM;
rc = trans_pgd_create_copy(&info, &trans_pgd, PAGE_OFFSET, PAGE_END);
if (rc)
return rc;
@@ -251,7 +248,7 @@ void crash_post_resume(void)
* marked as Reserved as memory was allocated via memblock_reserve().
*
* In hibernation, the pages which are Reserved and yet "nosave" are excluded
- * from the hibernation iamge. crash_is_nosave() does thich check for crash
+ * from the hibernation image. crash_is_nosave() does thich check for crash
* dump kernel and will reduce the total size of hibernation image.
*/
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index af1ca875c52c..e31fabed378a 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -52,7 +52,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
for_each_mem_range(i, &start, &end)
nr_ranges++;
- cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
+ cmem = kmalloc_flex(*cmem, ranges, nr_ranges);
if (!cmem)
return -ENOMEM;
@@ -94,7 +94,7 @@ int load_other_segments(struct kimage *image,
char *initrd, unsigned long initrd_len,
char *cmdline)
{
- struct kexec_buf kbuf;
+ struct kexec_buf kbuf = {};
void *dtb = NULL;
unsigned long initrd_load_addr = 0, dtb_len,
orig_segments = image->nr_segments;
@@ -134,6 +134,10 @@ int load_other_segments(struct kimage *image,
kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
+
+ ret = crash_load_dm_crypt_keys(image);
+ if (ret)
+ goto out_err;
}
#endif
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index bde32979c06a..7afd370da9f4 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -283,7 +283,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
unsigned long core_plts = 0;
unsigned long init_plts = 0;
Elf64_Sym *syms = NULL;
- Elf_Shdr *pltsec, *tramp = NULL;
+ Elf_Shdr *pltsec, *tramp = NULL, *init_tramp = NULL;
int i;
/*
@@ -298,6 +298,9 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
else if (!strcmp(secstrings + sechdrs[i].sh_name,
".text.ftrace_trampoline"))
tramp = sechdrs + i;
+ else if (!strcmp(secstrings + sechdrs[i].sh_name,
+ ".init.text.ftrace_trampoline"))
+ init_tramp = sechdrs + i;
else if (sechdrs[i].sh_type == SHT_SYMTAB)
syms = (Elf64_Sym *)sechdrs[i].sh_addr;
}
@@ -363,5 +366,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);
}
+ if (init_tramp) {
+ init_tramp->sh_type = SHT_NOBITS;
+ init_tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ init_tramp->sh_addralign = __alignof__(struct plt_entry);
+ init_tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);
+ }
+
return 0;
}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 06bb680bfe97..24adb581af0e 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -23,6 +23,7 @@
#include <asm/insn.h>
#include <asm/scs.h>
#include <asm/sections.h>
+#include <asm/text-patching.h>
enum aarch64_reloc_op {
RELOC_OP_NONE,
@@ -48,7 +49,17 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
return 0;
}
-static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
+#define WRITE_PLACE(place, val, mod) do { \
+ __typeof__(val) __val = (val); \
+ \
+ if (mod->state == MODULE_STATE_UNFORMED) \
+ *(place) = __val; \
+ else \
+ aarch64_insn_copy(place, &(__val), sizeof(*place)); \
+} while (0)
+
+static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len,
+ struct module *me)
{
s64 sval = do_reloc(op, place, val);
@@ -66,7 +77,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
switch (len) {
case 16:
- *(s16 *)place = sval;
+ WRITE_PLACE((s16 *)place, sval, me);
switch (op) {
case RELOC_OP_ABS:
if (sval < 0 || sval > U16_MAX)
@@ -82,7 +93,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
}
break;
case 32:
- *(s32 *)place = sval;
+ WRITE_PLACE((s32 *)place, sval, me);
switch (op) {
case RELOC_OP_ABS:
if (sval < 0 || sval > U32_MAX)
@@ -98,7 +109,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
}
break;
case 64:
- *(s64 *)place = sval;
+ WRITE_PLACE((s64 *)place, sval, me);
break;
default:
pr_err("Invalid length (%d) for data relocation\n", len);
@@ -113,7 +124,8 @@ enum aarch64_insn_movw_imm_type {
};
static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
- int lsb, enum aarch64_insn_movw_imm_type imm_type)
+ int lsb, enum aarch64_insn_movw_imm_type imm_type,
+ struct module *me)
{
u64 imm;
s64 sval;
@@ -145,7 +157,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
/* Update the instruction with the new encoding. */
insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
- *place = cpu_to_le32(insn);
+ WRITE_PLACE(place, cpu_to_le32(insn), me);
if (imm > U16_MAX)
return -ERANGE;
@@ -154,7 +166,8 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
}
static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
- int lsb, int len, enum aarch64_insn_imm_type imm_type)
+ int lsb, int len, enum aarch64_insn_imm_type imm_type,
+ struct module *me)
{
u64 imm, imm_mask;
s64 sval;
@@ -170,7 +183,7 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
/* Update the instruction's immediate field. */
insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
- *place = cpu_to_le32(insn);
+ WRITE_PLACE(place, cpu_to_le32(insn), me);
/*
* Extract the upper value bits (including the sign bit) and
@@ -189,17 +202,17 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
}
static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
- __le32 *place, u64 val)
+ __le32 *place, u64 val, struct module *me)
{
u32 insn;
if (!is_forbidden_offset_for_adrp(place))
return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21,
- AARCH64_INSN_IMM_ADR);
+ AARCH64_INSN_IMM_ADR, me);
/* patch ADRP to ADR if it is in range */
if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
- AARCH64_INSN_IMM_ADR)) {
+ AARCH64_INSN_IMM_ADR, me)) {
insn = le32_to_cpu(*place);
insn &= ~BIT(31);
} else {
@@ -211,7 +224,7 @@ static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
AARCH64_INSN_BRANCH_NOLINK);
}
- *place = cpu_to_le32(insn);
+ WRITE_PLACE(place, cpu_to_le32(insn), me);
return 0;
}
@@ -255,23 +268,23 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
/* Data relocations. */
case R_AARCH64_ABS64:
overflow_check = false;
- ovf = reloc_data(RELOC_OP_ABS, loc, val, 64);
+ ovf = reloc_data(RELOC_OP_ABS, loc, val, 64, me);
break;
case R_AARCH64_ABS32:
- ovf = reloc_data(RELOC_OP_ABS, loc, val, 32);
+ ovf = reloc_data(RELOC_OP_ABS, loc, val, 32, me);
break;
case R_AARCH64_ABS16:
- ovf = reloc_data(RELOC_OP_ABS, loc, val, 16);
+ ovf = reloc_data(RELOC_OP_ABS, loc, val, 16, me);
break;
case R_AARCH64_PREL64:
overflow_check = false;
- ovf = reloc_data(RELOC_OP_PREL, loc, val, 64);
+ ovf = reloc_data(RELOC_OP_PREL, loc, val, 64, me);
break;
case R_AARCH64_PREL32:
- ovf = reloc_data(RELOC_OP_PREL, loc, val, 32);
+ ovf = reloc_data(RELOC_OP_PREL, loc, val, 32, me);
break;
case R_AARCH64_PREL16:
- ovf = reloc_data(RELOC_OP_PREL, loc, val, 16);
+ ovf = reloc_data(RELOC_OP_PREL, loc, val, 16, me);
break;
/* MOVW instruction relocations. */
@@ -280,88 +293,88 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
fallthrough;
case R_AARCH64_MOVW_UABS_G0:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_UABS_G1_NC:
overflow_check = false;
fallthrough;
case R_AARCH64_MOVW_UABS_G1:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_UABS_G2_NC:
overflow_check = false;
fallthrough;
case R_AARCH64_MOVW_UABS_G2:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_UABS_G3:
/* We're using the top bits so we can't overflow. */
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_SABS_G0:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_SABS_G1:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_SABS_G2:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G0_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_PREL_G0:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G1_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_PREL_G1:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G2_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_PREL_G2:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G3:
/* We're using the top bits so we can't overflow. */
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
/* Immediate instruction relocations. */
case R_AARCH64_LD_PREL_LO19:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
- AARCH64_INSN_IMM_19);
+ AARCH64_INSN_IMM_19, me);
break;
case R_AARCH64_ADR_PREL_LO21:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
- AARCH64_INSN_IMM_ADR);
+ AARCH64_INSN_IMM_ADR, me);
break;
case R_AARCH64_ADR_PREL_PG_HI21_NC:
overflow_check = false;
fallthrough;
case R_AARCH64_ADR_PREL_PG_HI21:
- ovf = reloc_insn_adrp(me, sechdrs, loc, val);
+ ovf = reloc_insn_adrp(me, sechdrs, loc, val, me);
if (ovf && ovf != -ERANGE)
return ovf;
break;
@@ -369,46 +382,46 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_AARCH64_LDST8_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST16_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST32_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST64_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST128_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_TSTBR14:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14,
- AARCH64_INSN_IMM_14);
+ AARCH64_INSN_IMM_14, me);
break;
case R_AARCH64_CONDBR19:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
- AARCH64_INSN_IMM_19);
+ AARCH64_INSN_IMM_19, me);
break;
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
- AARCH64_INSN_IMM_26);
+ AARCH64_INSN_IMM_26, me);
if (ovf == -ERANGE) {
val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym);
if (!val)
return -ENOEXEC;
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
- 26, AARCH64_INSN_IMM_26);
+ 26, AARCH64_INSN_IMM_26, me);
}
break;
@@ -453,6 +466,17 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
__init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR);
mod->arch.ftrace_trampolines = plts;
+
+ s = find_section(hdr, sechdrs, ".init.text.ftrace_trampoline");
+ if (!s)
+ return -ENOEXEC;
+
+ plts = (void *)s->sh_addr;
+
+ __init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR);
+
+ mod->arch.init_ftrace_trampolines = plts;
+
#endif
return 0;
}
@@ -465,16 +489,29 @@ int module_finalize(const Elf_Ehdr *hdr,
int ret;
s = find_section(hdr, sechdrs, ".altinstructions");
- if (s)
- apply_alternatives_module((void *)s->sh_addr, s->sh_size);
+ if (s) {
+ ret = apply_alternatives_module((void *)s->sh_addr, s->sh_size);
+ if (ret < 0) {
+ pr_err("module %s: error occurred when applying alternatives\n", me->name);
+ return ret;
+ }
+ }
if (scs_is_dynamic()) {
s = find_section(hdr, sechdrs, ".init.eh_frame");
if (s) {
- ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size);
- if (ret)
+ /*
+ * Because we can reject modules that are malformed
+ * so SCS patching fails, skip dry run and try to patch
+ * it in place. If patching fails, the module would not
+ * be loaded anyway.
+ */
+ ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size, true);
+ if (ret) {
pr_err("module %s: error occurred during dynamic SCS patching (%d)\n",
me->name, ret);
+ return -ENOEXEC;
+ }
}
}
diff --git a/arch/arm64/kernel/mpam.c b/arch/arm64/kernel/mpam.c
new file mode 100644
index 000000000000..3a490de4fa12
--- /dev/null
+++ b/arch/arm64/kernel/mpam.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Arm Ltd. */
+
+#include <asm/mpam.h>
+
+#include <linux/arm_mpam.h>
+#include <linux/cpu_pm.h>
+#include <linux/jump_label.h>
+#include <linux/percpu.h>
+
+DEFINE_STATIC_KEY_FALSE(mpam_enabled);
+DEFINE_PER_CPU(u64, arm64_mpam_default);
+DEFINE_PER_CPU(u64, arm64_mpam_current);
+
+u64 arm64_mpam_global_default;
+
+static int mpam_pm_notifier(struct notifier_block *self,
+ unsigned long cmd, void *v)
+{
+ u64 regval;
+ int cpu = smp_processor_id();
+
+ switch (cmd) {
+ case CPU_PM_EXIT:
+ /*
+ * Don't use mpam_thread_switch() as the system register
+ * value has changed under our feet.
+ */
+ regval = READ_ONCE(per_cpu(arm64_mpam_current, cpu));
+ write_sysreg_s(regval | MPAM1_EL1_MPAMEN, SYS_MPAM1_EL1);
+ if (system_supports_sme()) {
+ write_sysreg_s(regval & (MPAMSM_EL1_PARTID_D | MPAMSM_EL1_PMG_D),
+ SYS_MPAMSM_EL1);
+ }
+ isb();
+
+ write_sysreg_s(regval, SYS_MPAM0_EL1);
+
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block mpam_pm_nb = {
+ .notifier_call = mpam_pm_notifier,
+};
+
+static int __init arm64_mpam_register_cpus(void)
+{
+ u64 mpamidr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1);
+ u16 partid_max = FIELD_GET(MPAMIDR_EL1_PARTID_MAX, mpamidr);
+ u8 pmg_max = FIELD_GET(MPAMIDR_EL1_PMG_MAX, mpamidr);
+
+ if (!system_supports_mpam())
+ return 0;
+
+ cpu_pm_register_notifier(&mpam_pm_nb);
+ return mpam_register_requestor(partid_max, pmg_max);
+}
+/* Must occur before mpam_msc_driver_init() from subsys_initcall() */
+arch_initcall(arm64_mpam_register_cpus)
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 2fbfd27ff5f2..6874b16d0657 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -157,6 +157,24 @@ void mte_enable_kernel_asymm(void)
mte_enable_kernel_sync();
}
}
+
+int mte_enable_kernel_store_only(void)
+{
+ /*
+ * If the CPU does not support MTE store only,
+ * the kernel checks all operations.
+ */
+ if (!cpus_have_cap(ARM64_MTE_STORE_ONLY))
+ return -EINVAL;
+
+ sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCSO_MASK,
+ SYS_FIELD_PREP(SCTLR_EL1, TCSO, 1));
+ isb();
+
+ pr_info_once("MTE: enabled store only mode at EL1\n");
+
+ return 0;
+}
#endif
#ifdef CONFIG_KASAN_HW_TAGS
@@ -200,7 +218,7 @@ static void mte_update_sctlr_user(struct task_struct *task)
* program requested values go with what was requested.
*/
resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
- sctlr &= ~SCTLR_EL1_TCF0_MASK;
+ sctlr &= ~(SCTLR_EL1_TCF0_MASK | SCTLR_EL1_TCSO0_MASK);
/*
* Pick an actual setting. The order in which we check for
* set bits and map into register values determines our
@@ -212,6 +230,10 @@ static void mte_update_sctlr_user(struct task_struct *task)
sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYNC);
else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, SYNC);
+
+ if (mte_ctrl & MTE_CTRL_STORE_ONLY)
+ sctlr |= SYS_FIELD_PREP(SCTLR_EL1, TCSO0, 1);
+
task->thread.sctlr_user = sctlr;
}
@@ -269,6 +291,9 @@ void mte_thread_switch(struct task_struct *next)
/* TCO may not have been disabled on exception entry for the current task. */
mte_disable_tco_entry(next);
+ if (!system_uses_mte_async_or_asymm_mode())
+ return;
+
/*
* Check if an async tag exception occurred at EL1.
*
@@ -293,8 +318,8 @@ void mte_cpu_setup(void)
* CnP is not a boot feature so MTE gets enabled before CnP, but let's
* make sure that is the case.
*/
- BUG_ON(read_sysreg(ttbr0_el1) & TTBR_CNP_BIT);
- BUG_ON(read_sysreg(ttbr1_el1) & TTBR_CNP_BIT);
+ BUG_ON(read_sysreg(ttbr0_el1) & TTBRx_EL1_CnP);
+ BUG_ON(read_sysreg(ttbr1_el1) & TTBRx_EL1_CnP);
/* Normal Tagged memory type at the corresponding MAIR index */
sysreg_clear_set(mair_el1,
@@ -328,6 +353,9 @@ void mte_suspend_enter(void)
if (!system_supports_mte())
return;
+ if (!system_uses_mte_async_or_asymm_mode())
+ return;
+
/*
* The barriers are required to guarantee that the indirect writes
* to TFSR_EL1 are synchronized before we report the state.
@@ -371,6 +399,9 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
(arg & PR_MTE_TCF_SYNC))
mte_ctrl |= MTE_CTRL_TCF_ASYMM;
+ if (arg & PR_MTE_STORE_ONLY)
+ mte_ctrl |= MTE_CTRL_STORE_ONLY;
+
task->thread.mte_ctrl = mte_ctrl;
if (task == current) {
preempt_disable();
@@ -398,6 +429,8 @@ long get_mte_ctrl(struct task_struct *task)
ret |= PR_MTE_TCF_ASYNC;
if (mte_ctrl & MTE_CTRL_TCF_SYNC)
ret |= PR_MTE_TCF_SYNC;
+ if (mte_ctrl & MTE_CTRL_STORE_ONLY)
+ ret |= PR_MTE_STORE_ONLY;
return ret;
}
@@ -449,9 +482,10 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
folio = page_folio(page);
if (folio_test_hugetlb(folio))
- WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio));
+ WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio) &&
+ !is_huge_zero_folio(folio));
else
- WARN_ON_ONCE(!page_mte_tagged(page));
+ WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page));
/* limit access to the end of the page */
offset = offset_in_page(addr);
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index aa718d6a9274..572efb96b23f 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -19,21 +19,12 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/static_call.h>
+#include <linux/sched/cputime.h>
#include <asm/paravirt.h>
#include <asm/pvclock-abi.h>
#include <asm/smp_plat.h>
-struct static_key paravirt_steal_enabled;
-struct static_key paravirt_steal_rq_enabled;
-
-static u64 native_steal_clock(int cpu)
-{
- return 0;
-}
-
-DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
-
struct pv_time_stolen_time_region {
struct pvclock_vcpu_stolen_time __rcu *kaddr;
};
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
index 4d11a8c29181..be92d73c25b2 100644
--- a/arch/arm64/kernel/pi/Makefile
+++ b/arch/arm64/kernel/pi/Makefile
@@ -2,7 +2,7 @@
# Copyright 2022 Google LLC
KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
- -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
+ -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_KSTACK_ERASE) \
$(DISABLE_LATENT_ENTROPY_PLUGIN) \
$(call cc-option,-mbranch-protection=none) \
-I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
@@ -41,4 +41,4 @@ obj-y := idreg-override.pi.o \
obj-$(CONFIG_RELOCATABLE) += relocate.pi.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_early.pi.o
obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.pi.o
-extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
+targets := $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c
index c6b185b885f7..bc57b290e5e7 100644
--- a/arch/arm64/kernel/pi/idreg-override.c
+++ b/arch/arm64/kernel/pi/idreg-override.c
@@ -127,6 +127,7 @@ static const struct ftr_set_desc pfr0 __prel64_initconst = {
.fields = {
FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter),
FIELD("el0", ID_AA64PFR0_EL1_EL0_SHIFT, NULL),
+ FIELD("mpam", ID_AA64PFR0_EL1_MPAM_SHIFT, NULL),
{}
},
};
@@ -154,6 +155,7 @@ static const struct ftr_set_desc pfr1 __prel64_initconst = {
FIELD("gcs", ID_AA64PFR1_EL1_GCS_SHIFT, NULL),
FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL),
FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter),
+ FIELD("mpam_frac", ID_AA64PFR1_EL1_MPAM_frac_SHIFT, NULL),
{}
},
};
@@ -246,6 +248,7 @@ static const struct {
{ "rodata=off", "arm64_sw.rodataoff=1" },
{ "arm64.nolva", "id_aa64mmfr2.varange=0" },
{ "arm64.no32bit_el0", "id_aa64pfr0.el0=1" },
+ { "arm64.nompam", "id_aa64pfr0.mpam=0 id_aa64pfr1.mpam_frac=0" },
};
static int __init parse_hexdigit(const char *p, u64 *v)
diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c
index 0257b43819db..e0e018046a46 100644
--- a/arch/arm64/kernel/pi/kaslr_early.c
+++ b/arch/arm64/kernel/pi/kaslr_early.c
@@ -18,8 +18,6 @@
#include "pi.h"
-extern u16 memstart_offset_seed;
-
static u64 __init get_kaslr_seed(void *fdt, int node)
{
static char const seed_str[] __initconst = "kaslr-seed";
@@ -53,8 +51,6 @@ u64 __init kaslr_early_init(void *fdt, int chosen)
return 0;
}
- memstart_offset_seed = seed & U16_MAX;
-
/*
* OK, so we are proceeding with KASLR enabled. Calculate a suitable
* kernel image offset from the seed. Let's place the kernel in the
diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
index e57b043f324b..a852264958c3 100644
--- a/arch/arm64/kernel/pi/map_kernel.c
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -18,9 +18,9 @@
extern const u8 __eh_frame_start[], __eh_frame_end[];
-extern void idmap_cpu_replace_ttbr1(void *pgdir);
+extern void idmap_cpu_replace_ttbr1(phys_addr_t pgdir);
-static void __init map_segment(pgd_t *pg_dir, u64 *pgd, u64 va_offset,
+static void __init map_segment(pgd_t *pg_dir, phys_addr_t *pgd, u64 va_offset,
void *start, void *end, pgprot_t prot,
bool may_use_cont, int root_level)
{
@@ -40,7 +40,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
{
bool enable_scs = IS_ENABLED(CONFIG_UNWIND_PATCH_PAC_INTO_SCS);
bool twopass = IS_ENABLED(CONFIG_RELOCATABLE);
- u64 pgdp = (u64)init_pg_dir + PAGE_SIZE;
+ phys_addr_t pgdp = (phys_addr_t)init_pg_dir + PAGE_SIZE;
pgprot_t text_prot = PAGE_KERNEL_ROX;
pgprot_t data_prot = PAGE_KERNEL;
pgprot_t prot;
@@ -78,6 +78,12 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
twopass |= enable_scs;
prot = twopass ? data_prot : text_prot;
+ /*
+ * [_stext, _text) isn't executed after boot and contains some
+ * non-executable, unpredictable data, so map it non-executable.
+ */
+ map_segment(init_pg_dir, &pgdp, va_offset, _text, _stext, data_prot,
+ false, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
!twopass, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
@@ -90,7 +96,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
true, root_level);
dsb(ishst);
- idmap_cpu_replace_ttbr1(init_pg_dir);
+ idmap_cpu_replace_ttbr1((phys_addr_t)init_pg_dir);
if (twopass) {
if (IS_ENABLED(CONFIG_RELOCATABLE))
@@ -98,7 +104,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
if (enable_scs) {
scs_patch(__eh_frame_start + va_offset,
- __eh_frame_end - __eh_frame_start);
+ __eh_frame_end - __eh_frame_start, false);
asm("ic ialluis");
dynamic_scs_is_enabled = true;
@@ -129,19 +135,19 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
/* Copy the root page table to its final location */
memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PAGE_SIZE);
dsb(ishst);
- idmap_cpu_replace_ttbr1(swapper_pg_dir);
+ idmap_cpu_replace_ttbr1((phys_addr_t)swapper_pg_dir);
}
-static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr)
+static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(phys_addr_t ttbr)
{
u64 sctlr = read_sysreg(sctlr_el1);
- u64 tcr = read_sysreg(tcr_el1) | TCR_DS;
+ u64 tcr = read_sysreg(tcr_el1) | TCR_EL1_DS;
u64 mmfr0 = read_sysreg(id_aa64mmfr0_el1);
u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
ID_AA64MMFR0_EL1_PARANGE_SHIFT);
- tcr &= ~TCR_IPS_MASK;
- tcr |= parange << TCR_IPS_SHIFT;
+ tcr &= ~TCR_EL1_IPS_MASK;
+ tcr |= parange << TCR_EL1_IPS_SHIFT;
asm(" msr sctlr_el1, %0 ;"
" isb ;"
@@ -159,7 +165,7 @@ static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr)
static void __init remap_idmap_for_lpa2(void)
{
/* clear the bits that change meaning once LPA2 is turned on */
- pteval_t mask = PTE_SHARED;
+ ptdesc_t mask = PTE_SHARED;
/*
* We have to clear bits [9:8] in all block or page descriptors in the
@@ -172,30 +178,30 @@ static void __init remap_idmap_for_lpa2(void)
*/
create_init_idmap(init_pg_dir, mask);
dsb(ishst);
- set_ttbr0_for_lpa2((u64)init_pg_dir);
+ set_ttbr0_for_lpa2((phys_addr_t)init_pg_dir);
/*
* Recreate the initial ID map with the same granularity as before.
* Don't bother with the FDT, we no longer need it after this.
*/
memset(init_idmap_pg_dir, 0,
- (u64)init_idmap_pg_end - (u64)init_idmap_pg_dir);
+ (char *)init_idmap_pg_end - (char *)init_idmap_pg_dir);
create_init_idmap(init_idmap_pg_dir, mask);
dsb(ishst);
/* switch back to the updated initial ID map */
- set_ttbr0_for_lpa2((u64)init_idmap_pg_dir);
+ set_ttbr0_for_lpa2((phys_addr_t)init_idmap_pg_dir);
/* wipe the temporary ID map from memory */
- memset(init_pg_dir, 0, (u64)init_pg_end - (u64)init_pg_dir);
+ memset(init_pg_dir, 0, (char *)init_pg_end - (char *)init_pg_dir);
}
-static void __init map_fdt(u64 fdt)
+static void *__init map_fdt(phys_addr_t fdt)
{
static u8 ptes[INIT_IDMAP_FDT_SIZE] __initdata __aligned(PAGE_SIZE);
- u64 efdt = fdt + MAX_FDT_SIZE;
- u64 ptep = (u64)ptes;
+ phys_addr_t efdt = fdt + MAX_FDT_SIZE;
+ phys_addr_t ptep = (phys_addr_t)ptes; /* We're idmapped when called */
/*
* Map up to MAX_FDT_SIZE bytes, but avoid overlap with
@@ -205,9 +211,34 @@ static void __init map_fdt(u64 fdt)
fdt, PAGE_KERNEL, IDMAP_ROOT_LEVEL,
(pte_t *)init_idmap_pg_dir, false, 0);
dsb(ishst);
+
+ return (void *)fdt;
+}
+
+/*
+ * PI version of the Cavium Eratum 27456 detection, which makes it
+ * impossible to use non-global mappings.
+ */
+static bool __init ng_mappings_allowed(void)
+{
+ static const struct midr_range cavium_erratum_27456_cpus[] __initconst = {
+ /* Cavium ThunderX, T88 pass 1.x - 2.1 */
+ MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1),
+ /* Cavium ThunderX, T81 pass 1.0 */
+ MIDR_REV(MIDR_THUNDERX_81XX, 0, 0),
+ {},
+ };
+
+ for (const struct midr_range *r = cavium_erratum_27456_cpus; r->model; r++) {
+ if (midr_is_cpu_model_range(read_cpuid_id(), r->model,
+ r->rv_min, r->rv_max))
+ return false;
+ }
+
+ return true;
}
-asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
+asmlinkage void __init early_map_kernel(u64 boot_status, phys_addr_t fdt)
{
static char const chosen_str[] __initconst = "/chosen";
u64 va_base, pa_base = (u64)&_text;
@@ -215,15 +246,14 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
int root_level = 4 - CONFIG_PGTABLE_LEVELS;
int va_bits = VA_BITS;
int chosen;
-
- map_fdt((u64)fdt);
+ void *fdt_mapped = map_fdt(fdt);
/* Clear BSS and the initial page tables */
- memset(__bss_start, 0, (u64)init_pg_end - (u64)__bss_start);
+ memset(__bss_start, 0, (char *)init_pg_end - (char *)__bss_start);
/* Parse the command line for CPU feature overrides */
- chosen = fdt_path_offset(fdt, chosen_str);
- init_feature_override(boot_status, fdt, chosen);
+ chosen = fdt_path_offset(fdt_mapped, chosen_str);
+ init_feature_override(boot_status, fdt_mapped, chosen);
if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && !cpu_has_lva()) {
va_bits = VA_BITS_MIN;
@@ -233,7 +263,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
}
if (va_bits > VA_BITS_MIN)
- sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(va_bits));
+ sysreg_clear_set(tcr_el1, TCR_EL1_T1SZ_MASK, TCR_T1SZ(va_bits));
/*
* The virtual KASLR displacement modulo 2MiB is decided by the
@@ -243,10 +273,10 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
* fill in the high bits from the seed.
*/
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
- u64 kaslr_seed = kaslr_early_init(fdt, chosen);
+ u64 kaslr_seed = kaslr_early_init(fdt_mapped, chosen);
if (kaslr_seed && kaslr_requires_kpti())
- arm64_use_ng_mappings = true;
+ arm64_use_ng_mappings = ng_mappings_allowed();
kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1);
}
diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
index 2b69e3beeef8..de52cd85c691 100644
--- a/arch/arm64/kernel/pi/map_range.c
+++ b/arch/arm64/kernel/pi/map_range.c
@@ -26,12 +26,13 @@
* @va_offset: Offset between a physical page and its current mapping
* in the VA space
*/
-void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
- int level, pte_t *tbl, bool may_use_cont, u64 va_offset)
+void __init map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
+ pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
+ u64 va_offset)
{
u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
- pteval_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
- int lshift = (3 - level) * (PAGE_SHIFT - 3);
+ ptdesc_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
+ int lshift = (3 - level) * PTDESC_TABLE_SHIFT;
u64 lmask = (PAGE_SIZE << lshift) - 1;
start &= PAGE_MASK;
@@ -45,12 +46,12 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
* clearing the mapping
*/
if (protval)
- protval |= (level < 3) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
+ protval |= (level == 2) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
while (start < end) {
u64 next = min((start | lmask) + 1, PAGE_ALIGN(end));
- if (level < 3 && (start | next | pa) & lmask) {
+ if (level < 2 || (level == 2 && (start | next | pa) & lmask)) {
/*
* This chunk needs a finer grained mapping. Create a
* table mapping if necessary and recurse.
@@ -87,19 +88,22 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
}
}
-asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
+asmlinkage phys_addr_t __init create_init_idmap(pgd_t *pg_dir, ptdesc_t clrmask)
{
- u64 ptep = (u64)pg_dir + PAGE_SIZE;
+ phys_addr_t ptep = (phys_addr_t)pg_dir + PAGE_SIZE; /* MMU is off */
pgprot_t text_prot = PAGE_KERNEL_ROX;
pgprot_t data_prot = PAGE_KERNEL;
pgprot_val(text_prot) &= ~clrmask;
pgprot_val(data_prot) &= ~clrmask;
- map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
- text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
- map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
- data_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
+ /* MMU is off; pointer casts to phys_addr_t are safe */
+ map_range(&ptep, (u64)_stext, (u64)__initdata_begin,
+ (phys_addr_t)_stext, text_prot, IDMAP_ROOT_LEVEL,
+ (pte_t *)pg_dir, false, 0);
+ map_range(&ptep, (u64)__initdata_begin, (u64)_end,
+ (phys_addr_t)__initdata_begin, data_prot, IDMAP_ROOT_LEVEL,
+ (pte_t *)pg_dir, false, 0);
return ptep;
}
diff --git a/arch/arm64/kernel/pi/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c
index 55d0cd64ef71..dac568e4a54f 100644
--- a/arch/arm64/kernel/pi/patch-scs.c
+++ b/arch/arm64/kernel/pi/patch-scs.c
@@ -192,6 +192,14 @@ static int scs_handle_fde_frame(const struct eh_frame *frame,
size -= 2;
break;
+ case DW_CFA_advance_loc4:
+ loc += *opcode++ * code_alignment_factor;
+ loc += (*opcode++ << 8) * code_alignment_factor;
+ loc += (*opcode++ << 16) * code_alignment_factor;
+ loc += (*opcode++ << 24) * code_alignment_factor;
+ size -= 4;
+ break;
+
case DW_CFA_def_cfa:
case DW_CFA_offset_extended:
size = skip_xleb128(&opcode, size);
@@ -225,7 +233,7 @@ static int scs_handle_fde_frame(const struct eh_frame *frame,
return 0;
}
-int scs_patch(const u8 eh_frame[], int size)
+int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run)
{
int code_alignment_factor = 1;
bool fde_use_sdata8 = false;
@@ -277,11 +285,13 @@ int scs_patch(const u8 eh_frame[], int size)
}
} else {
ret = scs_handle_fde_frame(frame, code_alignment_factor,
- fde_use_sdata8, true);
+ fde_use_sdata8, !skip_dry_run);
if (ret)
return ret;
- scs_handle_fde_frame(frame, code_alignment_factor,
- fde_use_sdata8, false);
+
+ if (!skip_dry_run)
+ scs_handle_fde_frame(frame, code_alignment_factor,
+ fde_use_sdata8, false);
}
p += sizeof(frame->size) + frame->size;
diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
index c91e5e965cd3..aec3172d4003 100644
--- a/arch/arm64/kernel/pi/pi.h
+++ b/arch/arm64/kernel/pi/pi.h
@@ -22,15 +22,17 @@ static inline void *prel64_to_pointer(const prel64_t *offset)
extern bool dynamic_scs_is_enabled;
extern pgd_t init_idmap_pg_dir[], init_idmap_pg_end[];
+extern pgd_t init_pg_dir[], init_pg_end[];
void init_feature_override(u64 boot_status, const void *fdt, int chosen);
u64 kaslr_early_init(void *fdt, int chosen);
void relocate_kernel(u64 offset);
-int scs_patch(const u8 eh_frame[], int size);
+int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run);
-void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
- int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
+void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
+ pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
+ u64 va_offset);
-asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
+asmlinkage void early_map_kernel(u64 boot_status, phys_addr_t fdt);
-asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
+asmlinkage phys_addr_t create_init_idmap(pgd_t *pgd, ptdesc_t clrmask);
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index 6438bf62e753..4137cc5ef031 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -108,9 +108,10 @@ arm_probe_decode_insn(u32 insn, struct arch_probe_insn *api)
aarch64_insn_is_bl(insn)) {
api->handler = simulate_b_bl;
} else if (aarch64_insn_is_br(insn) ||
- aarch64_insn_is_blr(insn) ||
- aarch64_insn_is_ret(insn)) {
- api->handler = simulate_br_blr_ret;
+ aarch64_insn_is_blr(insn)) {
+ api->handler = simulate_br_blr;
+ } else if (aarch64_insn_is_ret(insn)) {
+ api->handler = simulate_ret;
} else {
/*
* Instruction cannot be stepped out-of-line and we don't
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index d9e462eafb95..43a0361a8bf0 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -10,6 +10,7 @@
#define pr_fmt(fmt) "kprobes: " fmt
+#include <linux/execmem.h>
#include <linux/extable.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
@@ -41,6 +42,20 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
static void __kprobes
post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
+void *alloc_insn_page(void)
+{
+ void *addr;
+
+ addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
+ if (!addr)
+ return NULL;
+ if (set_memory_rox((unsigned long)addr, 1)) {
+ execmem_free(addr);
+ return NULL;
+ }
+ return addr;
+}
+
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
kprobe_opcode_t *addr = p->ainsn.xol_insn;
@@ -292,8 +307,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
return 0;
}
-static int __kprobes
-kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
+int __kprobes
+kprobe_brk_handler(struct pt_regs *regs, unsigned long esr)
{
struct kprobe *p, *cur_kprobe;
struct kprobe_ctlblk *kcb;
@@ -336,13 +351,8 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_HANDLED;
}
-static struct break_hook kprobes_break_hook = {
- .imm = KPROBES_BRK_IMM,
- .fn = kprobe_breakpoint_handler,
-};
-
-static int __kprobes
-kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr)
+int __kprobes
+kprobe_ss_brk_handler(struct pt_regs *regs, unsigned long esr)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
unsigned long addr = instruction_pointer(regs);
@@ -360,13 +370,8 @@ kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_ERROR;
}
-static struct break_hook kprobes_break_ss_hook = {
- .imm = KPROBES_BRK_SS_IMM,
- .fn = kprobe_breakpoint_ss_handler,
-};
-
-static int __kprobes
-kretprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
+int __kprobes
+kretprobe_brk_handler(struct pt_regs *regs, unsigned long esr)
{
if (regs->pc != (unsigned long)__kretprobe_trampoline)
return DBG_HOOK_ERROR;
@@ -375,11 +380,6 @@ kretprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_HANDLED;
}
-static struct break_hook kretprobes_break_hook = {
- .imm = KRETPROBES_BRK_IMM,
- .fn = kretprobe_breakpoint_handler,
-};
-
/*
* Provide a blacklist of symbols identifying ranges which cannot be kprobed.
* This blacklist is exposed to userspace via debugfs (kprobes/blacklist).
@@ -422,9 +422,5 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
int __init arch_init_kprobes(void)
{
- register_kernel_break_hook(&kprobes_break_hook);
- register_kernel_break_hook(&kprobes_break_ss_hook);
- register_kernel_break_hook(&kretprobes_break_hook);
-
return 0;
}
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
index a362f3dbb3d1..b60739d3983f 100644
--- a/arch/arm64/kernel/probes/kprobes_trampoline.S
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -12,7 +12,7 @@
SYM_CODE_START(__kretprobe_trampoline)
/*
* Trigger a breakpoint exception. The PC will be adjusted by
- * kretprobe_breakpoint_handler(), and no subsequent instructions will
+ * kretprobe_brk_handler(), and no subsequent instructions will
* be executed from the trampoline.
*/
brk #KRETPROBES_BRK_IMM
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
index 4c6d2d712fbd..89fbeb32107e 100644
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -13,6 +13,7 @@
#include <asm/traps.h>
#include "simulate-insn.h"
+#include "asm/gcs.h"
#define bbl_displacement(insn) \
sign_extend32(((insn) & 0x3ffffff) << 2, 27)
@@ -49,6 +50,21 @@ static inline u32 get_w_reg(struct pt_regs *regs, int reg)
return lower_32_bits(pt_regs_read_reg(regs, reg));
}
+static inline int update_lr(struct pt_regs *regs, long addr)
+{
+ int err = 0;
+
+ if (user_mode(regs) && task_gcs_el0_enabled(current)) {
+ push_user_gcs(addr, &err);
+ if (err) {
+ force_sig(SIGSEGV);
+ return err;
+ }
+ }
+ procedure_link_pointer_set(regs, addr);
+ return err;
+}
+
static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)
{
int xn = opcode & 0x1f;
@@ -107,9 +123,9 @@ simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs)
{
int disp = bbl_displacement(opcode);
- /* Link register is x30 */
if (opcode & (1 << 31))
- set_x_reg(regs, 30, addr + 4);
+ if (update_lr(regs, addr + 4))
+ return;
instruction_pointer_set(regs, addr + disp);
}
@@ -126,16 +142,34 @@ simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs)
}
void __kprobes
-simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs)
+simulate_br_blr(u32 opcode, long addr, struct pt_regs *regs)
{
int xn = (opcode >> 5) & 0x1f;
+ u64 b_target = get_x_reg(regs, xn);
- /* update pc first in case we're doing a "blr lr" */
- instruction_pointer_set(regs, get_x_reg(regs, xn));
-
- /* Link register is x30 */
if (((opcode >> 21) & 0x3) == 1)
- set_x_reg(regs, 30, addr + 4);
+ if (update_lr(regs, addr + 4))
+ return;
+
+ instruction_pointer_set(regs, b_target);
+}
+
+void __kprobes
+simulate_ret(u32 opcode, long addr, struct pt_regs *regs)
+{
+ u64 ret_addr;
+ int err = 0;
+ int xn = (opcode >> 5) & 0x1f;
+ u64 r_target = get_x_reg(regs, xn);
+
+ if (user_mode(regs) && task_gcs_el0_enabled(current)) {
+ ret_addr = pop_user_gcs(&err);
+ if (err || ret_addr != r_target) {
+ force_sig(SIGSEGV);
+ return;
+ }
+ }
+ instruction_pointer_set(regs, r_target);
}
void __kprobes
diff --git a/arch/arm64/kernel/probes/simulate-insn.h b/arch/arm64/kernel/probes/simulate-insn.h
index efb2803ec943..9e772a292d56 100644
--- a/arch/arm64/kernel/probes/simulate-insn.h
+++ b/arch/arm64/kernel/probes/simulate-insn.h
@@ -11,7 +11,8 @@
void simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs);
void simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs);
void simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs);
-void simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_br_blr(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_ret(u32 opcode, long addr, struct pt_regs *regs);
void simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs);
void simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs);
void simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs);
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index cb3d05af36e3..2a9b0bc083a3 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -6,6 +6,7 @@
#include <linux/ptrace.h>
#include <linux/uprobes.h>
#include <asm/cacheflush.h>
+#include <asm/gcs.h>
#include "decode-insn.h"
@@ -14,7 +15,7 @@
void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
void *src, unsigned long len)
{
- void *xol_page_kaddr = kmap_atomic(page);
+ void *xol_page_kaddr = kmap_local_page(page);
void *dst = xol_page_kaddr + (vaddr & ~PAGE_MASK);
/*
@@ -31,7 +32,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len);
done:
- kunmap_atomic(xol_page_kaddr);
+ kunmap_local(xol_page_kaddr);
}
unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
@@ -102,10 +103,7 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
* insn itself is trapped, then detect the case with the help of
* invalid fault code which is being set in arch_uprobe_pre_xol
*/
- if (t->thread.fault_code != UPROBE_INV_FAULT_CODE)
- return true;
-
- return false;
+ return t->thread.fault_code != UPROBE_INV_FAULT_CODE;
}
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
@@ -130,7 +128,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
struct uprobe_task *utask = current->utask;
/*
- * Task has received a fatal signal, so reset back to probbed
+ * Task has received a fatal signal, so reset back to probed
* address.
*/
instruction_pointer_set(regs, utask->vaddr);
@@ -159,11 +157,43 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr,
struct pt_regs *regs)
{
unsigned long orig_ret_vaddr;
+ unsigned long gcs_ret_vaddr;
+ int err = 0;
+ u64 gcspr;
orig_ret_vaddr = procedure_link_pointer(regs);
+
+ if (task_gcs_el0_enabled(current)) {
+ gcspr = read_sysreg_s(SYS_GCSPR_EL0);
+ gcs_ret_vaddr = get_user_gcs((__force unsigned long __user *)gcspr, &err);
+ if (err) {
+ force_sig(SIGSEGV);
+ goto out;
+ }
+
+ /*
+ * If the LR and GCS return addr don't match, then some kind of PAC
+ * signing or control flow occurred since entering the probed function.
+ * Likely because the user is attempting to retprobe on an instruction
+ * that isn't a function boundary or inside a leaf function. Explicitly
+ * abort this retprobe because it will generate a GCS exception.
+ */
+ if (gcs_ret_vaddr != orig_ret_vaddr) {
+ orig_ret_vaddr = -1;
+ goto out;
+ }
+
+ put_user_gcs(trampoline_vaddr, (__force unsigned long __user *)gcspr, &err);
+ if (err) {
+ force_sig(SIGSEGV);
+ goto out;
+ }
+ }
+
/* Replace the return addr with trampoline addr */
procedure_link_pointer_set(regs, trampoline_vaddr);
+out:
return orig_ret_vaddr;
}
@@ -173,7 +203,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self,
return NOTIFY_DONE;
}
-static int uprobe_breakpoint_handler(struct pt_regs *regs,
+int uprobe_brk_handler(struct pt_regs *regs,
unsigned long esr)
{
if (uprobe_pre_sstep_notifier(regs))
@@ -182,7 +212,7 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs,
return DBG_HOOK_ERROR;
}
-static int uprobe_single_step_handler(struct pt_regs *regs,
+int uprobe_single_step_handler(struct pt_regs *regs,
unsigned long esr)
{
struct uprobe_task *utask = current->utask;
@@ -194,23 +224,3 @@ static int uprobe_single_step_handler(struct pt_regs *regs,
return DBG_HOOK_ERROR;
}
-/* uprobe breakpoint handler hook */
-static struct break_hook uprobes_break_hook = {
- .imm = UPROBES_BRK_IMM,
- .fn = uprobe_breakpoint_handler,
-};
-
-/* uprobe single step handler hook */
-static struct step_hook uprobes_step_hook = {
- .fn = uprobe_single_step_handler,
-};
-
-static int __init arch_init_uprobes(void)
-{
- register_user_break_hook(&uprobes_break_hook);
- register_user_step_hook(&uprobes_step_hook);
-
- return 0;
-}
-
-device_initcall(arch_init_uprobes);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 42faebb7b712..c0bf1f46cdc6 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -51,6 +51,7 @@
#include <asm/fpsimd.h>
#include <asm/gcs.h>
#include <asm/mmu_context.h>
+#include <asm/mpam.h>
#include <asm/mte.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
@@ -288,8 +289,11 @@ static void flush_gcs(void)
if (!system_supports_gcs())
return;
- gcs_free(current);
+ current->thread.gcspr_el0 = 0;
+ current->thread.gcs_base = 0;
+ current->thread.gcs_size = 0;
current->thread.gcs_el0_mode = 0;
+ current->thread.gcs_el0_locked = 0;
write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1);
write_sysreg_s(0, SYS_GCSPR_EL0);
}
@@ -305,13 +309,13 @@ static int copy_thread_gcs(struct task_struct *p,
p->thread.gcs_base = 0;
p->thread.gcs_size = 0;
+ p->thread.gcs_el0_mode = current->thread.gcs_el0_mode;
+ p->thread.gcs_el0_locked = current->thread.gcs_el0_locked;
+
gcs = gcs_alloc_thread_stack(p, args);
if (IS_ERR_VALUE(gcs))
return PTR_ERR((void *)gcs);
- p->thread.gcs_el0_mode = current->thread.gcs_el0_mode;
- p->thread.gcs_el0_locked = current->thread.gcs_el0_locked;
-
return 0;
}
@@ -339,55 +343,38 @@ void flush_thread(void)
void arch_release_task_struct(struct task_struct *tsk)
{
fpsimd_release_task(tsk);
- gcs_free(tsk);
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- if (current->mm)
- fpsimd_preserve_current_state();
+ /*
+ * The current/src task's FPSIMD state may or may not be live, and may
+ * have been altered by ptrace after entry to the kernel. Save the
+ * effective FPSIMD state so that this will be copied into dst.
+ */
+ fpsimd_save_and_flush_current_state();
+ fpsimd_sync_from_effective_state(src);
+
*dst = *src;
/*
- * Detach src's sve_state (if any) from dst so that it does not
- * get erroneously used or freed prematurely. dst's copies
- * will be allocated on demand later on if dst uses SVE.
- * For consistency, also clear TIF_SVE here: this could be done
- * later in copy_process(), but to avoid tripping up future
- * maintainers it is best not to leave TIF flags and buffers in
- * an inconsistent state, even temporarily.
+ * Drop stale reference to src's sve_state and convert dst to
+ * non-streaming FPSIMD mode.
*/
+ dst->thread.fp_type = FP_STATE_FPSIMD;
dst->thread.sve_state = NULL;
clear_tsk_thread_flag(dst, TIF_SVE);
+ task_smstop_sm(dst);
/*
- * In the unlikely event that we create a new thread with ZA
- * enabled we should retain the ZA and ZT state so duplicate
- * it here. This may be shortly freed if we exec() or if
- * CLONE_SETTLS but it's simpler to do it here. To avoid
- * confusing the rest of the code ensure that we have a
- * sve_state allocated whenever sme_state is allocated.
+ * Drop stale reference to src's sme_state and ensure dst has ZA
+ * disabled.
+ *
+ * When necessary, ZA will be inherited later in copy_thread_za().
*/
- if (thread_za_enabled(&src->thread)) {
- dst->thread.sve_state = kzalloc(sve_state_size(src),
- GFP_KERNEL);
- if (!dst->thread.sve_state)
- return -ENOMEM;
-
- dst->thread.sme_state = kmemdup(src->thread.sme_state,
- sme_state_size(src),
- GFP_KERNEL);
- if (!dst->thread.sme_state) {
- kfree(dst->thread.sve_state);
- dst->thread.sve_state = NULL;
- return -ENOMEM;
- }
- } else {
- dst->thread.sme_state = NULL;
- clear_tsk_thread_flag(dst, TIF_SME);
- }
-
- dst->thread.fp_type = FP_STATE_FPSIMD;
+ dst->thread.sme_state = NULL;
+ clear_tsk_thread_flag(dst, TIF_SME);
+ dst->thread.svcr &= ~SVCR_ZA_MASK;
/* clear any pending asynchronous tag fault raised by the parent */
clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
@@ -395,11 +382,36 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
return 0;
}
+static int copy_thread_za(struct task_struct *dst, struct task_struct *src)
+{
+ if (!thread_za_enabled(&src->thread))
+ return 0;
+
+ dst->thread.sve_state = kzalloc(sve_state_size(src),
+ GFP_KERNEL);
+ if (!dst->thread.sve_state)
+ return -ENOMEM;
+
+ dst->thread.sme_state = kmemdup(src->thread.sme_state,
+ sme_state_size(src),
+ GFP_KERNEL);
+ if (!dst->thread.sme_state) {
+ kfree(dst->thread.sve_state);
+ dst->thread.sve_state = NULL;
+ return -ENOMEM;
+ }
+
+ set_tsk_thread_flag(dst, TIF_SME);
+ dst->thread.svcr |= SVCR_ZA_MASK;
+
+ return 0;
+}
+
asmlinkage void ret_from_fork(void) asm("ret_from_fork");
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
- unsigned long clone_flags = args->flags;
+ u64 clone_flags = args->flags;
unsigned long stack_start = args->stack;
unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
@@ -427,8 +439,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
* out-of-sync with the saved value.
*/
*task_user_tls(p) = read_sysreg(tpidr_el0);
- if (system_supports_tpidr2())
- p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
if (system_supports_poe())
p->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
@@ -441,13 +451,39 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
}
/*
+ * Due to the AAPCS64 "ZA lazy saving scheme", PSTATE.ZA and
+ * TPIDR2 need to be manipulated as a pair, and either both
+ * need to be inherited or both need to be reset.
+ *
+ * Within a process, child threads must not inherit their
+ * parent's TPIDR2 value or they may clobber their parent's
+ * stack at some later point.
+ *
+ * When a process is fork()'d, the child must inherit ZA and
+ * TPIDR2 from its parent in case there was dormant ZA state.
+ *
+ * Use CLONE_VM to determine when the child will share the
+ * address space with the parent, and cannot safely inherit the
+ * state.
+ */
+ if (system_supports_sme()) {
+ if (!(clone_flags & CLONE_VM)) {
+ p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
+ ret = copy_thread_za(p, current);
+ if (ret)
+ return ret;
+ } else {
+ p->thread.tpidr2_el0 = 0;
+ WARN_ON_ONCE(p->thread.svcr & SVCR_ZA_MASK);
+ }
+ }
+
+ /*
* If a TLS pointer was passed to clone, use it for the new
- * thread. We also reset TPIDR2 if it's in use.
+ * thread.
*/
- if (clone_flags & CLONE_SETTLS) {
+ if (clone_flags & CLONE_SETTLS)
p->thread.uw.tp_value = tls;
- p->thread.tpidr2_el0 = 0;
- }
ret = copy_thread_gcs(p, args);
if (ret != 0)
@@ -638,6 +674,11 @@ static void permission_overlay_switch(struct task_struct *next)
current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
if (current->thread.por_el0 != next->thread.por_el0) {
write_sysreg_s(next->thread.por_el0, SYS_POR_EL0);
+ /*
+ * No ISB required as we can tolerate spurious Overlay faults -
+ * the fault handler will check again based on the new value
+ * of POR_EL0.
+ */
}
}
@@ -659,6 +700,29 @@ void update_sctlr_el1(u64 sctlr)
isb();
}
+static inline void debug_switch_state(void)
+{
+ if (system_uses_irq_prio_masking()) {
+ unsigned long daif_expected = 0;
+ unsigned long daif_actual = read_sysreg(daif);
+ unsigned long pmr_expected = GIC_PRIO_IRQOFF;
+ unsigned long pmr_actual = read_sysreg_s(SYS_ICC_PMR_EL1);
+
+ WARN_ONCE(daif_actual != daif_expected ||
+ pmr_actual != pmr_expected,
+ "Unexpected DAIF + PMR: 0x%lx + 0x%lx (expected 0x%lx + 0x%lx)\n",
+ daif_actual, pmr_actual,
+ daif_expected, pmr_expected);
+ } else {
+ unsigned long daif_expected = DAIF_PROCCTX_NOIRQ;
+ unsigned long daif_actual = read_sysreg(daif);
+
+ WARN_ONCE(daif_actual != daif_expected,
+ "Unexpected DAIF value: 0x%lx (expected 0x%lx)\n",
+ daif_actual, daif_expected);
+ }
+}
+
/*
* Thread switching.
*/
@@ -668,6 +732,8 @@ struct task_struct *__switch_to(struct task_struct *prev,
{
struct task_struct *last;
+ debug_switch_state();
+
fpsimd_thread_switch(next);
tls_thread_switch(next);
hw_breakpoint_thread_switch(next);
@@ -680,10 +746,11 @@ struct task_struct *__switch_to(struct task_struct *prev,
gcs_thread_switch(next);
/*
- * Complete any pending TLB or cache maintenance on this CPU in case
- * the thread migrates to a different CPU.
- * This full barrier is also required by the membarrier system
- * call.
+ * Complete any pending TLB or cache maintenance on this CPU in case the
+ * thread migrates to a different CPU. This full barrier is also
+ * required by the membarrier system call. Additionally it makes any
+ * in-progress pgtable writes visible to the table walker; See
+ * emit_pte_barriers().
*/
dsb(ish);
@@ -697,6 +764,12 @@ struct task_struct *__switch_to(struct task_struct *prev,
if (prev->thread.sctlr_user != next->thread.sctlr_user)
update_sctlr_el1(next->thread.sctlr_user);
+ /*
+ * MPAM thread switch happens after the DSB to ensure prev's accesses
+ * use prev's MPAM settings.
+ */
+ mpam_thread_switch(next);
+
/* the actual thread switch */
last = cpu_switch_to(prev, next);
@@ -815,10 +888,14 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
if (is_compat_thread(ti))
return -EINVAL;
- if (system_supports_mte())
+ if (system_supports_mte()) {
valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \
| PR_MTE_TAG_MASK;
+ if (cpus_have_cap(ARM64_MTE_STORE_ONLY))
+ valid_mask |= PR_MTE_STORE_ONLY;
+ }
+
if (arg & ~valid_mask)
return -EINVAL;
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index da53722f95d4..b3801f532b10 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -91,12 +91,7 @@ early_param("nospectre_v2", parse_spectre_v2_param);
static bool spectre_v2_mitigations_off(void)
{
- bool ret = __nospectre_v2 || cpu_mitigations_off();
-
- if (ret)
- pr_info_once("spectre-v2 mitigation disabled by command line option\n");
-
- return ret;
+ return __nospectre_v2 || cpu_mitigations_off();
}
static const char *get_bhb_affected_string(enum mitigation_state bhb_state)
@@ -172,7 +167,7 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void)
return SPECTRE_UNAFFECTED;
/* Alternatively, we have a list of unaffected CPUs */
- if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list))
+ if (is_midr_in_range_list(spectre_v2_safe_list))
return SPECTRE_UNAFFECTED;
return SPECTRE_VULNERABLE;
@@ -331,7 +326,7 @@ bool has_spectre_v3a(const struct arm64_cpu_capabilities *entry, int scope)
};
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- return is_midr_in_range_list(read_cpuid_id(), spectre_v3a_unsafe_list);
+ return is_midr_in_range_list(spectre_v3a_unsafe_list);
}
void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
@@ -421,13 +416,8 @@ early_param("ssbd", parse_spectre_v4_param);
*/
static bool spectre_v4_mitigations_off(void)
{
- bool ret = cpu_mitigations_off() ||
- __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED;
-
- if (ret)
- pr_info_once("spectre-v4 mitigation disabled by command-line option\n");
-
- return ret;
+ return cpu_mitigations_off() ||
+ __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED;
}
/* Do we need to toggle the mitigation state on entry to/exit from the kernel? */
@@ -475,7 +465,7 @@ static enum mitigation_state spectre_v4_get_cpu_hw_mitigation_state(void)
{ /* sentinel */ },
};
- if (is_midr_in_range_list(read_cpuid_id(), spectre_v4_safe_list))
+ if (is_midr_in_range_list(spectre_v4_safe_list))
return SPECTRE_UNAFFECTED;
/* CPU features are detected first */
@@ -845,52 +835,92 @@ static unsigned long system_bhb_mitigations;
* This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any
* SCOPE_SYSTEM call will give the right answer.
*/
-u8 spectre_bhb_loop_affected(int scope)
+static bool is_spectre_bhb_safe(int scope)
+{
+ static const struct midr_range spectre_bhb_safe_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A510),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A520),
+ MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
+ {},
+ };
+ static bool all_safe = true;
+
+ if (scope != SCOPE_LOCAL_CPU)
+ return all_safe;
+
+ if (is_midr_in_range_list(spectre_bhb_safe_list))
+ return true;
+
+ all_safe = false;
+
+ return false;
+}
+
+static u8 spectre_bhb_loop_affected(void)
{
u8 k = 0;
- static u8 max_bhb_k;
-
- if (scope == SCOPE_LOCAL_CPU) {
- static const struct midr_range spectre_bhb_k32_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
- {},
- };
- static const struct midr_range spectre_bhb_k24_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
- {},
- };
- static const struct midr_range spectre_bhb_k11_list[] = {
- MIDR_ALL_VERSIONS(MIDR_AMPERE1),
- {},
- };
- static const struct midr_range spectre_bhb_k8_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
- {},
- };
-
- if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
- k = 32;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
- k = 24;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list))
- k = 11;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
- k = 8;
-
- max_bhb_k = max(max_bhb_k, k);
- } else {
- k = max_bhb_k;
- }
+
+ static const struct midr_range spectre_bhb_k132_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k38_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A715),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k32_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+ MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k24_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76AE),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_GOLD),
+ MIDR_ALL_VERSIONS(MIDR_HISI_HIP09),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k11_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k8_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ {},
+ };
+
+ if (is_midr_in_range_list(spectre_bhb_k132_list))
+ k = 132;
+ else if (is_midr_in_range_list(spectre_bhb_k38_list))
+ k = 38;
+ else if (is_midr_in_range_list(spectre_bhb_k32_list))
+ k = 32;
+ else if (is_midr_in_range_list(spectre_bhb_k24_list))
+ k = 24;
+ else if (is_midr_in_range_list(spectre_bhb_k11_list))
+ k = 11;
+ else if (is_midr_in_range_list(spectre_bhb_k8_list))
+ k = 8;
return k;
}
@@ -916,29 +946,13 @@ static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void)
}
}
-static bool is_spectre_bhb_fw_affected(int scope)
+static bool has_spectre_bhb_fw_mitigation(void)
{
- static bool system_affected;
enum mitigation_state fw_state;
bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE;
- static const struct midr_range spectre_bhb_firmware_mitigated_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
- {},
- };
- bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(),
- spectre_bhb_firmware_mitigated_list);
-
- if (scope != SCOPE_LOCAL_CPU)
- return system_affected;
fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
- if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) {
- system_affected = true;
- return true;
- }
-
- return false;
+ return has_smccc && fw_state == SPECTRE_MITIGATED;
}
static bool supports_ecbhb(int scope)
@@ -954,6 +968,8 @@ static bool supports_ecbhb(int scope)
ID_AA64MMFR1_EL1_ECBHB_SHIFT);
}
+static u8 max_bhb_k;
+
bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
int scope)
{
@@ -962,16 +978,23 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
if (supports_csv2p3(scope))
return false;
- if (supports_clearbhb(scope))
- return true;
+ if (is_spectre_bhb_safe(scope))
+ return false;
- if (spectre_bhb_loop_affected(scope))
- return true;
+ /*
+ * At this point the core isn't known to be "safe" so we're going to
+ * assume it's vulnerable. We still need to update `max_bhb_k` though,
+ * but only if we aren't mitigating with clearbhb though.
+ */
+ if (scope == SCOPE_LOCAL_CPU && !supports_clearbhb(SCOPE_LOCAL_CPU))
+ max_bhb_k = max(max_bhb_k, spectre_bhb_loop_affected());
- if (is_spectre_bhb_fw_affected(scope))
- return true;
+ return true;
+}
- return false;
+u8 get_spectre_bhb_loop_value(void)
+{
+ return max_bhb_k;
}
static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
@@ -991,7 +1014,7 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
isb();
}
-static bool __read_mostly __nospectre_bhb;
+bool __read_mostly __nospectre_bhb;
static int __init parse_spectre_bhb_param(char *str)
{
__nospectre_bhb = true;
@@ -1002,7 +1025,7 @@ early_param("nospectre_bhb", parse_spectre_bhb_param);
void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
{
bp_hardening_cb_t cpu_cb;
- enum mitigation_state fw_state, state = SPECTRE_VULNERABLE;
+ enum mitigation_state state = SPECTRE_VULNERABLE;
struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU))
@@ -1011,9 +1034,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) {
/* No point mitigating Spectre-BHB alone. */
} else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) {
- pr_info_once("spectre-bhb mitigation disabled by compile time option\n");
- } else if (cpu_mitigations_off() || __nospectre_bhb) {
- pr_info_once("spectre-bhb mitigation disabled by command line option\n");
+ /* Do nothing */
} else if (supports_ecbhb(SCOPE_LOCAL_CPU)) {
state = SPECTRE_MITIGATED;
set_bit(BHB_HW, &system_bhb_mitigations);
@@ -1028,7 +1049,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN);
state = SPECTRE_MITIGATED;
set_bit(BHB_INSN, &system_bhb_mitigations);
- } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) {
+ } else if (spectre_bhb_loop_affected()) {
/*
* Ensure KVM uses the indirect vector which will have the
* branchy-loop added. A57/A72-r0 will already have selected
@@ -1041,37 +1062,39 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP);
state = SPECTRE_MITIGATED;
set_bit(BHB_LOOP, &system_bhb_mitigations);
- } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) {
- fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
- if (fw_state == SPECTRE_MITIGATED) {
- /*
- * Ensure KVM uses one of the spectre bp_hardening
- * vectors. The indirect vector doesn't include the EL3
- * call, so needs upgrading to
- * HYP_VECTOR_SPECTRE_INDIRECT.
- */
- if (!data->slot || data->slot == HYP_VECTOR_INDIRECT)
- data->slot += 1;
-
- this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
-
- /*
- * The WA3 call in the vectors supersedes the WA1 call
- * made during context-switch. Uninstall any firmware
- * bp_hardening callback.
- */
- cpu_cb = spectre_v2_get_sw_mitigation_cb();
- if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb)
- __this_cpu_write(bp_hardening_data.fn, NULL);
-
- state = SPECTRE_MITIGATED;
- set_bit(BHB_FW, &system_bhb_mitigations);
- }
+ } else if (has_spectre_bhb_fw_mitigation()) {
+ /*
+ * Ensure KVM uses one of the spectre bp_hardening
+ * vectors. The indirect vector doesn't include the EL3
+ * call, so needs upgrading to
+ * HYP_VECTOR_SPECTRE_INDIRECT.
+ */
+ if (!data->slot || data->slot == HYP_VECTOR_INDIRECT)
+ data->slot += 1;
+
+ this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
+
+ /*
+ * The WA3 call in the vectors supersedes the WA1 call
+ * made during context-switch. Uninstall any firmware
+ * bp_hardening callback.
+ */
+ cpu_cb = spectre_v2_get_sw_mitigation_cb();
+ if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb)
+ __this_cpu_write(bp_hardening_data.fn, NULL);
+
+ state = SPECTRE_MITIGATED;
+ set_bit(BHB_FW, &system_bhb_mitigations);
}
update_mitigation_state(&spectre_bhb_state, state);
}
+bool is_spectre_bhb_fw_mitigated(void)
+{
+ return test_bit(BHB_FW, &system_bhb_mitigations);
+}
+
/* Patched to NOP when enabled */
void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt,
__le32 *origptr,
@@ -1100,7 +1123,6 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
{
u8 rd;
u32 insn;
- u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM);
BUG_ON(nr_inst != 1); /* MOV -> MOV */
@@ -1109,7 +1131,7 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
insn = le32_to_cpu(*origptr);
rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
- insn = aarch64_insn_gen_movewide(rd, loop_count, 0,
+ insn = aarch64_insn_gen_movewide(rd, max_bhb_k, 0,
AARCH64_INSN_VARIANT_64BIT,
AARCH64_INSN_MOVEWIDE_ZERO);
*updptr++ = cpu_to_le32(insn);
@@ -1166,3 +1188,18 @@ void unpriv_ebpf_notify(int new_state)
pr_err("WARNING: %s", EBPF_WARN);
}
#endif
+
+void spectre_print_disabled_mitigations(void)
+{
+ /* Keep a single copy of the common message suffix to avoid duplication. */
+ const char *spectre_disabled_suffix = "mitigation disabled by command-line option\n";
+
+ if (spectre_v2_mitigations_off())
+ pr_info("spectre-v2 %s", spectre_disabled_suffix);
+
+ if (spectre_v4_mitigations_off())
+ pr_info("spectre-v4 %s", spectre_disabled_suffix);
+
+ if (__nospectre_bhb || cpu_mitigations_off())
+ pr_info("spectre-bhb %s", spectre_disabled_suffix);
+}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index f79b0d5f71ac..ba5eab23fd90 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -141,7 +141,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
- return *addr;
+ return READ_ONCE_NOCHECK(*addr);
else
return 0;
}
@@ -594,7 +594,7 @@ static int __fpr_get(struct task_struct *target,
{
struct user_fpsimd_state *uregs;
- sve_sync_to_fpsimd(target);
+ fpsimd_sync_from_effective_state(target);
uregs = &target->thread.uw.fpsimd_state;
@@ -626,7 +626,7 @@ static int __fpr_set(struct task_struct *target,
* Ensure target->thread.uw.fpsimd_state is up to date, so that a
* short copyin can't resurrect stale data.
*/
- sve_sync_to_fpsimd(target);
+ fpsimd_sync_from_effective_state(target);
newstate = target->thread.uw.fpsimd_state;
@@ -653,7 +653,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;
- sve_sync_from_fpsimd_zeropad(target);
+ fpsimd_sync_to_effective_state_zeropad(target);
fpsimd_flush_task_state(target);
return ret;
@@ -775,6 +775,11 @@ static void sve_init_header_from_task(struct user_sve_header *header,
task_type = ARM64_VEC_SVE;
active = (task_type == type);
+ if (active && target->thread.fp_type == FP_STATE_SVE)
+ header->flags = SVE_PT_REGS_SVE;
+ else
+ header->flags = SVE_PT_REGS_FPSIMD;
+
switch (type) {
case ARM64_VEC_SVE:
if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
@@ -789,19 +794,14 @@ static void sve_init_header_from_task(struct user_sve_header *header,
return;
}
- if (active) {
- if (target->thread.fp_type == FP_STATE_FPSIMD) {
- header->flags |= SVE_PT_REGS_FPSIMD;
- } else {
- header->flags |= SVE_PT_REGS_SVE;
- }
- }
-
header->vl = task_get_vl(target, type);
vq = sve_vq_from_vl(header->vl);
header->max_vl = vec_max_vl(type);
- header->size = SVE_PT_SIZE(vq, header->flags);
+ if (active)
+ header->size = SVE_PT_SIZE(vq, header->flags);
+ else
+ header->size = sizeof(header);
header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
SVE_PT_REGS_SVE);
}
@@ -820,18 +820,25 @@ static int sve_get_common(struct task_struct *target,
unsigned int vq;
unsigned long start, end;
+ if (target == current)
+ fpsimd_preserve_current_state();
+
/* Header */
sve_init_header_from_task(&header, target, type);
vq = sve_vq_from_vl(header.vl);
membuf_write(&to, &header, sizeof(header));
- if (target == current)
- fpsimd_preserve_current_state();
-
BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+ /*
+ * When the requested vector type is not active, do not present data
+ * from the other mode to userspace.
+ */
+ if (header.size == sizeof(header))
+ return 0;
+
switch ((header.flags & SVE_PT_REGS_MASK)) {
case SVE_PT_REGS_FPSIMD:
return __fpr_get(target, regset, to);
@@ -859,7 +866,7 @@ static int sve_get_common(struct task_struct *target,
return membuf_zero(&to, end - start);
default:
- return 0;
+ BUILD_BUG();
}
}
@@ -883,6 +890,9 @@ static int sve_set_common(struct task_struct *target,
struct user_sve_header header;
unsigned int vq;
unsigned long start, end;
+ bool fpsimd;
+
+ fpsimd_flush_task_state(target);
/* Header */
if (count < sizeof(header))
@@ -890,16 +900,65 @@ static int sve_set_common(struct task_struct *target,
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header,
0, sizeof(header));
if (ret)
- goto out;
+ return ret;
/*
- * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by
- * vec_set_vector_length(), which will also validate them for us:
+ * Streaming SVE data is always stored and presented in SVE format.
+ * Require the user to provide SVE formatted data for consistency, and
+ * to avoid the risk that we configure the task into an invalid state.
*/
- ret = vec_set_vector_length(target, type, header.vl,
- ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
- if (ret)
- goto out;
+ fpsimd = (header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD;
+ if (fpsimd && type == ARM64_VEC_SME)
+ return -EINVAL;
+
+ /*
+ * On systems without SVE we accept FPSIMD format writes with
+ * a VL of 0 to allow exiting streaming mode, otherwise a VL
+ * is required.
+ */
+ if (header.vl) {
+ /*
+ * If the system does not support SVE we can't
+ * configure a SVE VL.
+ */
+ if (!system_supports_sve() && type == ARM64_VEC_SVE)
+ return -EINVAL;
+
+ /*
+ * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are
+ * consumed by vec_set_vector_length(), which will
+ * also validate them for us:
+ */
+ ret = vec_set_vector_length(target, type, header.vl,
+ ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
+ if (ret)
+ return ret;
+ } else {
+ /* If the system supports SVE we require a VL. */
+ if (system_supports_sve())
+ return -EINVAL;
+
+ /*
+ * Only FPSIMD formatted data with no flags set is
+ * supported.
+ */
+ if (header.flags != SVE_PT_REGS_FPSIMD)
+ return -EINVAL;
+ }
+
+ /* Allocate SME storage if necessary, preserving any existing ZA/ZT state */
+ if (type == ARM64_VEC_SME) {
+ sme_alloc(target, false);
+ if (!target->thread.sme_state)
+ return -ENOMEM;
+ }
+
+ /* Allocate SVE storage if necessary, zeroing any existing SVE state */
+ if (!fpsimd) {
+ sve_alloc(target, true);
+ if (!target->thread.sve_state)
+ return -ENOMEM;
+ }
/*
* Actual VL set may be different from what the user asked
@@ -909,82 +968,46 @@ static int sve_set_common(struct task_struct *target,
vq = sve_vq_from_vl(task_get_vl(target, type));
/* Enter/exit streaming mode */
- if (system_supports_sme()) {
- u64 old_svcr = target->thread.svcr;
-
- switch (type) {
- case ARM64_VEC_SVE:
- target->thread.svcr &= ~SVCR_SM_MASK;
- break;
- case ARM64_VEC_SME:
- target->thread.svcr |= SVCR_SM_MASK;
-
- /*
- * Disable traps and ensure there is SME storage but
- * preserve any currently set values in ZA/ZT.
- */
- sme_alloc(target, false);
- set_tsk_thread_flag(target, TIF_SME);
- break;
- default:
- WARN_ON_ONCE(1);
- ret = -EINVAL;
- goto out;
- }
-
- /*
- * If we switched then invalidate any existing SVE
- * state and ensure there's storage.
- */
- if (target->thread.svcr != old_svcr)
- sve_alloc(target, true);
+ switch (type) {
+ case ARM64_VEC_SVE:
+ target->thread.svcr &= ~SVCR_SM_MASK;
+ set_tsk_thread_flag(target, TIF_SVE);
+ break;
+ case ARM64_VEC_SME:
+ target->thread.svcr |= SVCR_SM_MASK;
+ set_tsk_thread_flag(target, TIF_SME);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return -EINVAL;
}
+ /* Always zero V regs, FPSR, and FPCR */
+ memset(&current->thread.uw.fpsimd_state, 0,
+ sizeof(current->thread.uw.fpsimd_state));
+
/* Registers: FPSIMD-only case */
BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
- if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) {
- ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
- SVE_PT_FPSIMD_OFFSET);
+ if (fpsimd) {
clear_tsk_thread_flag(target, TIF_SVE);
target->thread.fp_type = FP_STATE_FPSIMD;
- goto out;
+ ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
+ SVE_PT_FPSIMD_OFFSET);
+ return ret;
}
- /*
- * Otherwise: no registers or full SVE case. For backwards
- * compatibility reasons we treat empty flags as SVE registers.
- */
+ /* Otherwise: no registers or full SVE case. */
+
+ target->thread.fp_type = FP_STATE_SVE;
/*
* If setting a different VL from the requested VL and there is
* register data, the data layout will be wrong: don't even
* try to set the registers in this case.
*/
- if (count && vq != sve_vq_from_vl(header.vl)) {
- ret = -EIO;
- goto out;
- }
-
- sve_alloc(target, true);
- if (!target->thread.sve_state) {
- ret = -ENOMEM;
- clear_tsk_thread_flag(target, TIF_SVE);
- target->thread.fp_type = FP_STATE_FPSIMD;
- goto out;
- }
-
- /*
- * Ensure target->thread.sve_state is up to date with target's
- * FPSIMD regs, so that a short copyin leaves trailing
- * registers unmodified. Only enable SVE if we are
- * configuring normal SVE, a system with streaming SVE may not
- * have normal SVE.
- */
- fpsimd_sync_to_sve(target);
- if (type == ARM64_VEC_SVE)
- set_tsk_thread_flag(target, TIF_SVE);
- target->thread.fp_type = FP_STATE_SVE;
+ if (count && vq != sve_vq_from_vl(header.vl))
+ return -EIO;
BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
start = SVE_PT_SVE_OFFSET;
@@ -993,7 +1016,7 @@ static int sve_set_common(struct task_struct *target,
target->thread.sve_state,
start, end);
if (ret)
- goto out;
+ return ret;
start = end;
end = SVE_PT_SVE_FPSR_OFFSET(vq);
@@ -1009,8 +1032,6 @@ static int sve_set_common(struct task_struct *target,
&target->thread.uw.fpsimd_state.fpsr,
start, end);
-out:
- fpsimd_flush_task_state(target);
return ret;
}
@@ -1019,7 +1040,7 @@ static int sve_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return -EINVAL;
return sve_set_common(target, regset, pos, count, kbuf, ubuf,
@@ -1463,6 +1484,9 @@ static int poe_get(struct task_struct *target,
if (!system_supports_poe())
return -EINVAL;
+ if (target == current)
+ current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
+
return membuf_write(&to, &target->thread.por_el0,
sizeof(target->thread.por_el0));
}
@@ -1589,7 +1613,7 @@ enum aarch64_regset {
static const struct user_regset aarch64_regsets[] = {
[REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS,
+ USER_REGSET_NOTE_TYPE(PRSTATUS),
.n = sizeof(struct user_pt_regs) / sizeof(u64),
.size = sizeof(u64),
.align = sizeof(u64),
@@ -1597,7 +1621,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = gpr_set
},
[REGSET_FPR] = {
- .core_note_type = NT_PRFPREG,
+ USER_REGSET_NOTE_TYPE(PRFPREG),
.n = sizeof(struct user_fpsimd_state) / sizeof(u32),
/*
* We pretend we have 32-bit registers because the fpsr and
@@ -1610,7 +1634,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = fpr_set
},
[REGSET_TLS] = {
- .core_note_type = NT_ARM_TLS,
+ USER_REGSET_NOTE_TYPE(ARM_TLS),
.n = 2,
.size = sizeof(void *),
.align = sizeof(void *),
@@ -1619,7 +1643,7 @@ static const struct user_regset aarch64_regsets[] = {
},
#ifdef CONFIG_HAVE_HW_BREAKPOINT
[REGSET_HW_BREAK] = {
- .core_note_type = NT_ARM_HW_BREAK,
+ USER_REGSET_NOTE_TYPE(ARM_HW_BREAK),
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
@@ -1627,7 +1651,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = hw_break_set,
},
[REGSET_HW_WATCH] = {
- .core_note_type = NT_ARM_HW_WATCH,
+ USER_REGSET_NOTE_TYPE(ARM_HW_WATCH),
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
@@ -1636,7 +1660,7 @@ static const struct user_regset aarch64_regsets[] = {
},
#endif
[REGSET_SYSTEM_CALL] = {
- .core_note_type = NT_ARM_SYSTEM_CALL,
+ USER_REGSET_NOTE_TYPE(ARM_SYSTEM_CALL),
.n = 1,
.size = sizeof(int),
.align = sizeof(int),
@@ -1644,7 +1668,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = system_call_set,
},
[REGSET_FPMR] = {
- .core_note_type = NT_ARM_FPMR,
+ USER_REGSET_NOTE_TYPE(ARM_FPMR),
.n = 1,
.size = sizeof(u64),
.align = sizeof(u64),
@@ -1653,7 +1677,7 @@ static const struct user_regset aarch64_regsets[] = {
},
#ifdef CONFIG_ARM64_SVE
[REGSET_SVE] = { /* Scalable Vector Extension */
- .core_note_type = NT_ARM_SVE,
+ USER_REGSET_NOTE_TYPE(ARM_SVE),
.n = DIV_ROUND_UP(SVE_PT_SIZE(ARCH_SVE_VQ_MAX,
SVE_PT_REGS_SVE),
SVE_VQ_BYTES),
@@ -1665,7 +1689,7 @@ static const struct user_regset aarch64_regsets[] = {
#endif
#ifdef CONFIG_ARM64_SME
[REGSET_SSVE] = { /* Streaming mode SVE */
- .core_note_type = NT_ARM_SSVE,
+ USER_REGSET_NOTE_TYPE(ARM_SSVE),
.n = DIV_ROUND_UP(SVE_PT_SIZE(SME_VQ_MAX, SVE_PT_REGS_SVE),
SVE_VQ_BYTES),
.size = SVE_VQ_BYTES,
@@ -1674,7 +1698,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = ssve_set,
},
[REGSET_ZA] = { /* SME ZA */
- .core_note_type = NT_ARM_ZA,
+ USER_REGSET_NOTE_TYPE(ARM_ZA),
/*
* ZA is a single register but it's variably sized and
* the ptrace core requires that the size of any data
@@ -1690,7 +1714,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = za_set,
},
[REGSET_ZT] = { /* SME ZT */
- .core_note_type = NT_ARM_ZT,
+ USER_REGSET_NOTE_TYPE(ARM_ZT),
.n = 1,
.size = ZT_SIG_REG_BYTES,
.align = sizeof(u64),
@@ -1700,7 +1724,7 @@ static const struct user_regset aarch64_regsets[] = {
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
[REGSET_PAC_MASK] = {
- .core_note_type = NT_ARM_PAC_MASK,
+ USER_REGSET_NOTE_TYPE(ARM_PAC_MASK),
.n = sizeof(struct user_pac_mask) / sizeof(u64),
.size = sizeof(u64),
.align = sizeof(u64),
@@ -1708,7 +1732,7 @@ static const struct user_regset aarch64_regsets[] = {
/* this cannot be set dynamically */
},
[REGSET_PAC_ENABLED_KEYS] = {
- .core_note_type = NT_ARM_PAC_ENABLED_KEYS,
+ USER_REGSET_NOTE_TYPE(ARM_PAC_ENABLED_KEYS),
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
@@ -1717,7 +1741,7 @@ static const struct user_regset aarch64_regsets[] = {
},
#ifdef CONFIG_CHECKPOINT_RESTORE
[REGSET_PACA_KEYS] = {
- .core_note_type = NT_ARM_PACA_KEYS,
+ USER_REGSET_NOTE_TYPE(ARM_PACA_KEYS),
.n = sizeof(struct user_pac_address_keys) / sizeof(__uint128_t),
.size = sizeof(__uint128_t),
.align = sizeof(__uint128_t),
@@ -1725,7 +1749,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = pac_address_keys_set,
},
[REGSET_PACG_KEYS] = {
- .core_note_type = NT_ARM_PACG_KEYS,
+ USER_REGSET_NOTE_TYPE(ARM_PACG_KEYS),
.n = sizeof(struct user_pac_generic_keys) / sizeof(__uint128_t),
.size = sizeof(__uint128_t),
.align = sizeof(__uint128_t),
@@ -1736,7 +1760,7 @@ static const struct user_regset aarch64_regsets[] = {
#endif
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
[REGSET_TAGGED_ADDR_CTRL] = {
- .core_note_type = NT_ARM_TAGGED_ADDR_CTRL,
+ USER_REGSET_NOTE_TYPE(ARM_TAGGED_ADDR_CTRL),
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
@@ -1746,7 +1770,7 @@ static const struct user_regset aarch64_regsets[] = {
#endif
#ifdef CONFIG_ARM64_POE
[REGSET_POE] = {
- .core_note_type = NT_ARM_POE,
+ USER_REGSET_NOTE_TYPE(ARM_POE),
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
@@ -1756,7 +1780,7 @@ static const struct user_regset aarch64_regsets[] = {
#endif
#ifdef CONFIG_ARM64_GCS
[REGSET_GCS] = {
- .core_note_type = NT_ARM_GCS,
+ USER_REGSET_NOTE_TYPE(ARM_GCS),
.n = sizeof(struct user_gcs) / sizeof(u64),
.size = sizeof(u64),
.align = sizeof(u64),
@@ -1946,7 +1970,7 @@ static int compat_tls_set(struct task_struct *target,
static const struct user_regset aarch32_regsets[] = {
[REGSET_COMPAT_GPR] = {
- .core_note_type = NT_PRSTATUS,
+ USER_REGSET_NOTE_TYPE(PRSTATUS),
.n = COMPAT_ELF_NGREG,
.size = sizeof(compat_elf_greg_t),
.align = sizeof(compat_elf_greg_t),
@@ -1954,7 +1978,7 @@ static const struct user_regset aarch32_regsets[] = {
.set = compat_gpr_set
},
[REGSET_COMPAT_VFP] = {
- .core_note_type = NT_ARM_VFP,
+ USER_REGSET_NOTE_TYPE(ARM_VFP),
.n = VFP_STATE_SIZE / sizeof(compat_ulong_t),
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
@@ -1971,7 +1995,7 @@ static const struct user_regset_view user_aarch32_view = {
static const struct user_regset aarch32_ptrace_regsets[] = {
[REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS,
+ USER_REGSET_NOTE_TYPE(PRSTATUS),
.n = COMPAT_ELF_NGREG,
.size = sizeof(compat_elf_greg_t),
.align = sizeof(compat_elf_greg_t),
@@ -1979,7 +2003,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.set = compat_gpr_set
},
[REGSET_FPR] = {
- .core_note_type = NT_ARM_VFP,
+ USER_REGSET_NOTE_TYPE(ARM_VFP),
.n = VFP_STATE_SIZE / sizeof(compat_ulong_t),
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
@@ -1987,7 +2011,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.set = compat_vfp_set
},
[REGSET_TLS] = {
- .core_note_type = NT_ARM_TLS,
+ USER_REGSET_NOTE_TYPE(ARM_TLS),
.n = 1,
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
@@ -1996,7 +2020,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
},
#ifdef CONFIG_HAVE_HW_BREAKPOINT
[REGSET_HW_BREAK] = {
- .core_note_type = NT_ARM_HW_BREAK,
+ USER_REGSET_NOTE_TYPE(ARM_HW_BREAK),
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
@@ -2004,7 +2028,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.set = hw_break_set,
},
[REGSET_HW_WATCH] = {
- .core_note_type = NT_ARM_HW_WATCH,
+ USER_REGSET_NOTE_TYPE(ARM_HW_WATCH),
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
@@ -2013,7 +2037,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
},
#endif
[REGSET_SYSTEM_CALL] = {
- .core_note_type = NT_ARM_SYSTEM_CALL,
+ USER_REGSET_NOTE_TYPE(ARM_SYSTEM_CALL),
.n = 1,
.size = sizeof(int),
.align = sizeof(int),
@@ -2320,9 +2344,10 @@ enum ptrace_syscall_dir {
PTRACE_SYSCALL_EXIT,
};
-static void report_syscall(struct pt_regs *regs, enum ptrace_syscall_dir dir)
+static __always_inline unsigned long ptrace_save_reg(struct pt_regs *regs,
+ enum ptrace_syscall_dir dir,
+ int *regno)
{
- int regno;
unsigned long saved_reg;
/*
@@ -2341,15 +2366,34 @@ static void report_syscall(struct pt_regs *regs, enum ptrace_syscall_dir dir)
* - Syscall stops behave differently to seccomp and pseudo-step traps
* (the latter do not nobble any registers).
*/
- regno = (is_compat_task() ? 12 : 7);
- saved_reg = regs->regs[regno];
- regs->regs[regno] = dir;
+ *regno = (is_compat_task() ? 12 : 7);
+ saved_reg = regs->regs[*regno];
+ regs->regs[*regno] = dir;
- if (dir == PTRACE_SYSCALL_ENTER) {
- if (ptrace_report_syscall_entry(regs))
- forget_syscall(regs);
- regs->regs[regno] = saved_reg;
- } else if (!test_thread_flag(TIF_SINGLESTEP)) {
+ return saved_reg;
+}
+
+static int report_syscall_entry(struct pt_regs *regs)
+{
+ unsigned long saved_reg;
+ int regno, ret;
+
+ saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_ENTER, &regno);
+ ret = ptrace_report_syscall_entry(regs);
+ if (ret)
+ forget_syscall(regs);
+ regs->regs[regno] = saved_reg;
+
+ return ret;
+}
+
+static void report_syscall_exit(struct pt_regs *regs)
+{
+ unsigned long saved_reg;
+ int regno;
+
+ saved_reg = ptrace_save_reg(regs, PTRACE_SYSCALL_EXIT, &regno);
+ if (!test_thread_flag(TIF_SINGLESTEP)) {
ptrace_report_syscall_exit(regs, 0);
regs->regs[regno] = saved_reg;
} else {
@@ -2367,10 +2411,11 @@ static void report_syscall(struct pt_regs *regs, enum ptrace_syscall_dir dir)
int syscall_trace_enter(struct pt_regs *regs)
{
unsigned long flags = read_thread_flags();
+ int ret;
if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) {
- report_syscall(regs, PTRACE_SYSCALL_ENTER);
- if (flags & _TIF_SYSCALL_EMU)
+ ret = report_syscall_entry(regs);
+ if (ret || (flags & _TIF_SYSCALL_EMU))
return NO_SYSCALL;
}
@@ -2397,7 +2442,7 @@ void syscall_trace_exit(struct pt_regs *regs)
trace_sys_exit(regs, syscall_get_return_value(current, regs));
if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP))
- report_syscall(regs, PTRACE_SYSCALL_EXIT);
+ report_syscall_exit(regs);
rseq_syscall(regs);
}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index 413f899e4ac6..6cb4209f5dab 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -64,7 +64,8 @@ SYM_CODE_START(arm64_relocate_new_kernel)
mov x19, x13
copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
add x1, x19, #PAGE_SIZE
- dcache_by_myline_op civac, sy, x19, x1, x15, x20
+ dcache_by_myline_op_nosync civac, x19, x1, x15, x20
+ dsb sy
b .Lnext
.Ltest_indirection:
tbz x16, IND_INDIRECTION_BIT, .Ltest_destination
diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
index ce4778141ec7..92160f2e57ff 100644
--- a/arch/arm64/kernel/rsi.c
+++ b/arch/arm64/kernel/rsi.c
@@ -12,6 +12,7 @@
#include <asm/io.h>
#include <asm/mem_encrypt.h>
+#include <asm/pgtable.h>
#include <asm/rsi.h>
static struct realm_config config;
@@ -84,7 +85,25 @@ static void __init arm64_rsi_setup_memory(void)
}
}
-bool __arm64_is_protected_mmio(phys_addr_t base, size_t size)
+/*
+ * Check if a given PA range is Trusted (e.g., Protected memory, a Trusted Device
+ * mapping, or an MMIO emulated in the Realm world).
+ *
+ * We can rely on the RIPAS value of the region to detect if a given region is
+ * protected.
+ *
+ * RIPAS_DEV - A trusted device memory or a trusted emulated MMIO (in the Realm
+ * world
+ * RIPAS_RAM - Memory (RAM), protected by the RMM guarantees. (e.g., Firmware
+ * reserved regions for data sharing).
+ *
+ * RIPAS_DESTROYED is a special case of one of the above, where the host did
+ * something without our permission and as such we can't do anything about it.
+ *
+ * The only case where something is emulated by the untrusted hypervisor or is
+ * backed by shared memory is indicated by RSI_RIPAS_EMPTY.
+ */
+bool arm64_rsi_is_protected(phys_addr_t base, size_t size)
{
enum ripas ripas;
phys_addr_t end, top;
@@ -101,18 +120,18 @@ bool __arm64_is_protected_mmio(phys_addr_t base, size_t size)
break;
if (WARN_ON(top <= base))
break;
- if (ripas != RSI_RIPAS_DEV)
+ if (ripas == RSI_RIPAS_EMPTY)
break;
base = top;
}
return base >= end;
}
-EXPORT_SYMBOL(__arm64_is_protected_mmio);
+EXPORT_SYMBOL(arm64_rsi_is_protected);
static int realm_ioremap_hook(phys_addr_t phys, size_t size, pgprot_t *prot)
{
- if (__arm64_is_protected_mmio(phys, size))
+ if (arm64_rsi_is_protected(phys, size))
*prot = pgprot_encrypted(*prot);
else
*prot = pgprot_decrypted(*prot);
@@ -126,9 +145,9 @@ void __init arm64_rsi_init(void)
return;
if (!rsi_version_matches())
return;
- if (WARN_ON(rsi_get_realm_config(&config)))
+ if (WARN_ON(rsi_get_realm_config(lm_alias(&config))))
return;
- prot_ns_shared = BIT(config.ipa_bits - 1);
+ prot_ns_shared = __phys_to_pte_val(BIT(config.ipa_bits - 1));
if (arm64_ioremap_prot_hook_register(realm_ioremap_hook))
return;
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 255d12f881c2..778f2a1faac8 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -34,10 +34,8 @@ unsigned long sdei_exit_mode;
DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
-#ifdef CONFIG_VMAP_STACK
DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
-#endif
DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
@@ -65,9 +63,6 @@ static void free_sdei_stacks(void)
{
int cpu;
- if (!IS_ENABLED(CONFIG_VMAP_STACK))
- return;
-
for_each_possible_cpu(cpu) {
_free_sdei_stack(&sdei_stack_normal_ptr, cpu);
_free_sdei_stack(&sdei_stack_critical_ptr, cpu);
@@ -91,9 +86,6 @@ static int init_sdei_stacks(void)
int cpu;
int err = 0;
- if (!IS_ENABLED(CONFIG_VMAP_STACK))
- return 0;
-
for_each_possible_cpu(cpu) {
err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu);
if (err)
@@ -206,7 +198,7 @@ out_err:
/*
* do_sdei_event() returns one of:
* SDEI_EV_HANDLED - success, return to the interrupted context.
- * SDEI_EV_FAILED - failure, return this error code to firmare.
+ * SDEI_EV_FAILED - failure, return this error code to firmware.
* virtual-address - success, return to this address.
*/
unsigned long __kprobes do_sdei_event(struct pt_regs *regs,
@@ -247,7 +239,7 @@ unsigned long __kprobes do_sdei_event(struct pt_regs *regs,
* If we interrupted the kernel with interrupts masked, we always go
* back to wherever we came from.
*/
- if (mode == kernel_mode && !interrupts_enabled(regs))
+ if (mode == kernel_mode && regs_irqs_disabled(regs))
return SDEI_EV_HANDLED;
/*
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 85104587f849..23c05dc7a8f2 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -169,7 +169,7 @@ static void __init smp_build_mpidr_hash(void)
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
- int size;
+ int size = 0;
void *dt_virt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
const char *name;
@@ -182,10 +182,10 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys)
*/
if (!early_init_dt_scan(dt_virt, dt_phys)) {
pr_crit("\n"
- "Error: invalid device tree blob at physical address %pa (virtual address 0x%px)\n"
- "The dtb must be 8-byte aligned and must not exceed 2 MB in size\n"
- "\nPlease check your bootloader.",
- &dt_phys, dt_virt);
+ "Error: invalid device tree blob: PA=%pa, VA=%px, size=%d bytes\n"
+ "The dtb must be 8-byte aligned and must not exceed 2 MB in size.\n"
+ "\nPlease check your bootloader.\n",
+ &dt_phys, dt_virt, size);
/*
* Note that in this _really_ early stage we cannot even BUG()
@@ -214,7 +214,7 @@ static void __init request_standard_resources(void)
unsigned long i = 0;
size_t res_size;
- kernel_code.start = __pa_symbol(_stext);
+ kernel_code.start = __pa_symbol(_text);
kernel_code.end = __pa_symbol(__init_begin - 1);
kernel_data.start = __pa_symbol(_sdata);
kernel_data.end = __pa_symbol(_end - 1);
@@ -280,7 +280,7 @@ u64 cpu_logical_map(unsigned int cpu)
void __init __no_sanitize_address setup_arch(char **cmdline_p)
{
- setup_initial_init_mm(_stext, _etext, _edata, _end);
+ setup_initial_init_mm(_text, _etext, _edata, _end);
*cmdline_p = boot_command_line;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 99ea26d400ff..08ffc5a5aea4 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -9,6 +9,7 @@
#include <linux/cache.h>
#include <linux/compat.h>
#include <linux/errno.h>
+#include <linux/irq-entry-common.h>
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/freezer.h>
@@ -91,12 +92,15 @@ static void save_reset_user_access_state(struct user_access_state *ua_state)
u64 por_enable_all = 0;
for (int pkey = 0; pkey < arch_max_pkey(); pkey++)
- por_enable_all |= POE_RXW << (pkey * POR_BITS_PER_PKEY);
+ por_enable_all |= POR_ELx_PERM_PREP(pkey, POE_RWX);
ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0);
write_sysreg_s(por_enable_all, SYS_POR_EL0);
- /* Ensure that any subsequent uaccess observes the updated value */
- isb();
+ /*
+ * No ISB required as we can tolerate spurious Overlay faults -
+ * the fault handler will check again based on the new value
+ * of POR_EL0.
+ */
}
}
@@ -250,6 +254,8 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
&current->thread.uw.fpsimd_state;
int err;
+ fpsimd_sync_from_effective_state(current);
+
/* copy the FP and status/control registers */
err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
__put_user_error(fpsimd->fpsr, &ctx->fpsr, err);
@@ -262,37 +268,46 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
return err ? -EFAULT : 0;
}
-static int restore_fpsimd_context(struct user_ctxs *user)
+static int read_fpsimd_context(struct user_fpsimd_state *fpsimd,
+ struct user_ctxs *user)
{
- struct user_fpsimd_state fpsimd;
- int err = 0;
+ int err;
/* check the size information */
if (user->fpsimd_size != sizeof(struct fpsimd_context))
return -EINVAL;
/* copy the FP and status/control registers */
- err = __copy_from_user(fpsimd.vregs, &(user->fpsimd->vregs),
- sizeof(fpsimd.vregs));
- __get_user_error(fpsimd.fpsr, &(user->fpsimd->fpsr), err);
- __get_user_error(fpsimd.fpcr, &(user->fpsimd->fpcr), err);
+ err = __copy_from_user(fpsimd->vregs, &(user->fpsimd->vregs),
+ sizeof(fpsimd->vregs));
+ __get_user_error(fpsimd->fpsr, &(user->fpsimd->fpsr), err);
+ __get_user_error(fpsimd->fpcr, &(user->fpsimd->fpcr), err);
+
+ return err ? -EFAULT : 0;
+}
+
+static int restore_fpsimd_context(struct user_ctxs *user)
+{
+ struct user_fpsimd_state fpsimd;
+ int err;
+
+ err = read_fpsimd_context(&fpsimd, user);
+ if (err)
+ return err;
clear_thread_flag(TIF_SVE);
+ current->thread.svcr &= ~SVCR_SM_MASK;
current->thread.fp_type = FP_STATE_FPSIMD;
/* load the hardware registers from the fpsimd_state structure */
- if (!err)
- fpsimd_update_current_state(&fpsimd);
-
- return err ? -EFAULT : 0;
+ fpsimd_update_current_state(&fpsimd);
+ return 0;
}
static int preserve_fpmr_context(struct fpmr_context __user *ctx)
{
int err = 0;
- current->thread.uw.fpmr = read_sysreg_s(SYS_FPMR);
-
__put_user_error(FPMR_MAGIC, &ctx->head.magic, err);
__put_user_error(sizeof(*ctx), &ctx->head.size, err);
__put_user_error(current->thread.uw.fpmr, &ctx->fpmr, err);
@@ -310,7 +325,7 @@ static int restore_fpmr_context(struct user_ctxs *user)
__get_user_error(fpmr, &user->fpmr->fpmr, err);
if (!err)
- write_sysreg_s(fpmr, SYS_FPMR);
+ current->thread.uw.fpmr = fpmr;
return err;
}
@@ -372,11 +387,6 @@ static int preserve_sve_context(struct sve_context __user *ctx)
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
if (vq) {
- /*
- * This assumes that the SVE state has already been saved to
- * the task struct by calling the function
- * fpsimd_signal_preserve_current_state().
- */
err |= __copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET,
current->thread.sve_state,
SVE_SIG_REGS_SIZE(vq));
@@ -391,6 +401,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
unsigned int vl, vq;
struct user_fpsimd_state fpsimd;
u16 user_vl, flags;
+ bool sm;
if (user->sve_size < sizeof(*user->sve))
return -EINVAL;
@@ -400,7 +411,8 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
if (err)
return err;
- if (flags & SVE_SIG_FLAG_SM) {
+ sm = flags & SVE_SIG_FLAG_SM;
+ if (sm) {
if (!system_supports_sme())
return -EINVAL;
@@ -420,27 +432,28 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
if (user_vl != vl)
return -EINVAL;
- if (user->sve_size == sizeof(*user->sve)) {
- clear_thread_flag(TIF_SVE);
- current->thread.svcr &= ~SVCR_SM_MASK;
- current->thread.fp_type = FP_STATE_FPSIMD;
- goto fpsimd_only;
- }
+ /*
+ * Non-streaming SVE state may be preserved without an SVE payload, in
+ * which case the SVE context only has a header with VL==0, and all
+ * state can be restored from the FPSIMD context.
+ *
+ * Streaming SVE state is always preserved with an SVE payload. For
+ * consistency and robustness, reject restoring streaming SVE state
+ * without an SVE payload.
+ */
+ if (!sm && user->sve_size == sizeof(*user->sve))
+ return restore_fpsimd_context(user);
vq = sve_vq_from_vl(vl);
if (user->sve_size < SVE_SIG_CONTEXT_SIZE(vq))
return -EINVAL;
- /*
- * Careful: we are about __copy_from_user() directly into
- * thread.sve_state with preemption enabled, so protection is
- * needed to prevent a racing context switch from writing stale
- * registers back over the new data.
- */
-
- fpsimd_flush_task_state(current);
- /* From now, fpsimd_thread_switch() won't touch thread.sve_state */
+ if (sm) {
+ sme_alloc(current, false);
+ if (!current->thread.sme_state)
+ return -ENOMEM;
+ }
sve_alloc(current, true);
if (!current->thread.sve_state) {
@@ -448,6 +461,16 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
return -ENOMEM;
}
+ if (sm) {
+ current->thread.svcr |= SVCR_SM_MASK;
+ set_thread_flag(TIF_SME);
+ } else {
+ current->thread.svcr &= ~SVCR_SM_MASK;
+ set_thread_flag(TIF_SVE);
+ }
+
+ current->thread.fp_type = FP_STATE_SVE;
+
err = __copy_from_user(current->thread.sve_state,
(char __user const *)user->sve +
SVE_SIG_REGS_OFFSET,
@@ -455,25 +478,14 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
if (err)
return -EFAULT;
- if (flags & SVE_SIG_FLAG_SM)
- current->thread.svcr |= SVCR_SM_MASK;
- else
- set_thread_flag(TIF_SVE);
- current->thread.fp_type = FP_STATE_SVE;
+ err = read_fpsimd_context(&fpsimd, user);
+ if (err)
+ return err;
-fpsimd_only:
- /* copy the FP and status/control registers */
- /* restore_sigframe() already checked that user->fpsimd != NULL. */
- err = __copy_from_user(fpsimd.vregs, user->fpsimd->vregs,
- sizeof(fpsimd.vregs));
- __get_user_error(fpsimd.fpsr, &user->fpsimd->fpsr, err);
- __get_user_error(fpsimd.fpcr, &user->fpsimd->fpcr, err);
+ /* Merge the FPSIMD registers into the SVE state */
+ fpsimd_update_current_state(&fpsimd);
- /* load the hardware registers from the fpsimd_state structure */
- if (!err)
- fpsimd_update_current_state(&fpsimd);
-
- return err ? -EFAULT : 0;
+ return 0;
}
#else /* ! CONFIG_ARM64_SVE */
@@ -493,13 +505,12 @@ extern int preserve_sve_context(void __user *ctx);
static int preserve_tpidr2_context(struct tpidr2_context __user *ctx)
{
+ u64 tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
int err = 0;
- current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
-
__put_user_error(TPIDR2_MAGIC, &ctx->head.magic, err);
__put_user_error(sizeof(*ctx), &ctx->head.size, err);
- __put_user_error(current->thread.tpidr2_el0, &ctx->tpidr2, err);
+ __put_user_error(tpidr2_el0, &ctx->tpidr2, err);
return err;
}
@@ -541,11 +552,6 @@ static int preserve_za_context(struct za_context __user *ctx)
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
if (vq) {
- /*
- * This assumes that the ZA state has already been saved to
- * the task struct by calling the function
- * fpsimd_signal_preserve_current_state().
- */
err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET,
current->thread.sme_state,
ZA_SIG_REGS_SIZE(vq));
@@ -580,15 +586,9 @@ static int restore_za_context(struct user_ctxs *user)
if (user->za_size < ZA_SIG_CONTEXT_SIZE(vq))
return -EINVAL;
- /*
- * Careful: we are about __copy_from_user() directly into
- * thread.sme_state with preemption enabled, so protection is
- * needed to prevent a racing context switch from writing stale
- * registers back over the new data.
- */
-
- fpsimd_flush_task_state(current);
- /* From now, fpsimd_thread_switch() won't touch thread.sve_state */
+ sve_alloc(current, false);
+ if (!current->thread.sve_state)
+ return -ENOMEM;
sme_alloc(current, true);
if (!current->thread.sme_state) {
@@ -627,11 +627,6 @@ static int preserve_zt_context(struct zt_context __user *ctx)
BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
- /*
- * This assumes that the ZT state has already been saved to
- * the task struct by calling the function
- * fpsimd_signal_preserve_current_state().
- */
err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET,
thread_zt_state(&current->thread),
ZT_SIG_REGS_SIZE(1));
@@ -657,16 +652,6 @@ static int restore_zt_context(struct user_ctxs *user)
if (nregs != 1)
return -EINVAL;
- /*
- * Careful: we are about __copy_from_user() directly into
- * thread.zt_state with preemption enabled, so protection is
- * needed to prevent a racing context switch from writing stale
- * registers back over the new data.
- */
-
- fpsimd_flush_task_state(current);
- /* From now, fpsimd_thread_switch() won't touch ZT in thread state */
-
err = __copy_from_user(thread_zt_state(&current->thread),
(char __user const *)user->zt +
ZT_SIG_REGS_OFFSET,
@@ -1017,6 +1002,8 @@ static int restore_sigframe(struct pt_regs *regs,
*/
forget_syscall(regs);
+ fpsimd_save_and_flush_current_state();
+
err |= !valid_user_regs(&regs->user_regs, current);
if (err == 0)
err = parse_user_sigframe(&user, sf);
@@ -1507,21 +1494,9 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig,
/* Signal handlers are invoked with ZA and streaming mode disabled */
if (system_supports_sme()) {
- /*
- * If we were in streaming mode the saved register
- * state was SVE but we will exit SM and use the
- * FPSIMD register state - flush the saved FPSIMD
- * register state in case it gets loaded.
- */
- if (current->thread.svcr & SVCR_SM_MASK) {
- memset(&current->thread.uw.fpsimd_state, 0,
- sizeof(current->thread.uw.fpsimd_state));
- current->thread.fp_type = FP_STATE_FPSIMD;
- }
-
- current->thread.svcr &= ~(SVCR_ZA_MASK |
- SVCR_SM_MASK);
- sme_smstop();
+ task_smstop_sm(current);
+ current->thread.svcr &= ~SVCR_ZA_MASK;
+ write_sysreg_s(0, SYS_TPIDR2_EL0);
}
return 0;
@@ -1535,7 +1510,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
struct user_access_state ua_state;
int err = 0;
- fpsimd_signal_preserve_current_state();
+ fpsimd_save_and_flush_current_state();
if (get_sigframe(&user, ksig, regs))
return 1;
@@ -1616,7 +1591,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
* the kernel can handle, and then we build all the user-level signal handling
* stack-frames in one go after that.
*/
-void do_signal(struct pt_regs *regs)
+void arch_do_signal_or_restart(struct pt_regs *regs)
{
unsigned long continue_addr = 0, restart_addr = 0;
int retval = 0;
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 81e798b6dada..bb3b526ff43f 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -103,7 +103,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
* Note that this also saves V16-31, which aren't visible
* in AArch32.
*/
- fpsimd_signal_preserve_current_state();
+ fpsimd_save_and_flush_current_state();
/* Place structure header on the stack */
__put_user_error(magic, &frame->magic, err);
@@ -169,14 +169,17 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
fpsimd.fpsr = fpscr & VFP_FPSCR_STAT_MASK;
fpsimd.fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
+ if (err)
+ return -EFAULT;
+
/*
* We don't need to touch the exception register, so
* reload the hardware state.
*/
- if (!err)
- fpsimd_update_current_state(&fpsimd);
+ fpsimd_save_and_flush_current_state();
+ current->thread.uw.fpsimd_state = fpsimd;
- return err ? -EFAULT : 0;
+ return 0;
}
static int compat_restore_sigframe(struct pt_regs *regs,
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 3b3f6b56e733..1aa324104afb 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -64,26 +64,18 @@ struct secondary_data secondary_data;
/* Number of CPUs which aren't online, but looping in kernel text. */
static int cpus_stuck_in_kernel;
-enum ipi_msg_type {
- IPI_RESCHEDULE,
- IPI_CALL_FUNC,
- IPI_CPU_STOP,
- IPI_CPU_STOP_NMI,
- IPI_TIMER,
- IPI_IRQ_WORK,
- NR_IPI,
- /*
- * Any enum >= NR_IPI and < MAX_IPI is special and not tracable
- * with trace_ipi_*
- */
- IPI_CPU_BACKTRACE = NR_IPI,
- IPI_KGDB_ROUNDUP,
- MAX_IPI
-};
-
static int ipi_irq_base __ro_after_init;
static int nr_ipi __ro_after_init = NR_IPI;
-static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
+
+struct ipi_descs {
+ struct irq_desc *descs[MAX_IPI];
+};
+
+static DEFINE_PER_CPU_READ_MOSTLY(struct ipi_descs, pcpu_ipi_desc);
+
+#define get_ipi_desc(__cpu, __ipi) (per_cpu_ptr(&pcpu_ipi_desc, __cpu)->descs[__ipi])
+
+static bool percpu_ipi_descs __ro_after_init;
static bool crash_stop;
@@ -358,7 +350,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
/*
* Now that the dying CPU is beyond the point of no return w.r.t.
- * in-kernel synchronisation, try to get the firwmare to help us to
+ * in-kernel synchronisation, try to get the firmware to help us to
* verify that it has really left the kernel before we consider
* clobbering anything it might still be using.
*/
@@ -531,7 +523,7 @@ int arch_register_cpu(int cpu)
/*
* Availability of the acpi handle is sufficient to establish
- * that _STA has aleady been checked. No need to recheck here.
+ * that _STA has already been checked. No need to recheck here.
*/
c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
@@ -844,7 +836,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
prec >= 4 ? " " : "");
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
+ seq_printf(p, "%10u ", irq_desc_kstat_cpu(get_ipi_desc(cpu, i), cpu));
seq_printf(p, " %s\n", ipi_types[i]);
}
@@ -917,9 +909,20 @@ static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs
#endif
}
+static void arm64_send_ipi(const cpumask_t *mask, unsigned int nr)
+{
+ unsigned int cpu;
+
+ if (!percpu_ipi_descs)
+ __ipi_send_mask(get_ipi_desc(0, nr), mask);
+ else
+ for_each_cpu(cpu, mask)
+ __ipi_send_single(get_ipi_desc(cpu, nr), cpu);
+}
+
static void arm64_backtrace_ipi(cpumask_t *mask)
{
- __ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask);
+ arm64_send_ipi(mask, IPI_CPU_BACKTRACE);
}
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
@@ -944,7 +947,7 @@ void kgdb_roundup_cpus(void)
if (cpu == this_cpu)
continue;
- __ipi_send_single(ipi_desc[IPI_KGDB_ROUNDUP], cpu);
+ __ipi_send_single(get_ipi_desc(cpu, IPI_KGDB_ROUNDUP), cpu);
}
}
#endif
@@ -1013,14 +1016,16 @@ static void do_handle_IPI(int ipinr)
static irqreturn_t ipi_handler(int irq, void *data)
{
- do_handle_IPI(irq - ipi_irq_base);
+ unsigned int ipi = (irq - ipi_irq_base) % nr_ipi;
+
+ do_handle_IPI(ipi);
return IRQ_HANDLED;
}
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
{
trace_ipi_raise(target, ipi_types[ipinr]);
- __ipi_send_mask(ipi_desc[ipinr], target);
+ arm64_send_ipi(target, ipinr);
}
static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
@@ -1046,11 +1051,15 @@ static void ipi_setup(int cpu)
return;
for (i = 0; i < nr_ipi; i++) {
- if (ipi_should_be_nmi(i)) {
- prepare_percpu_nmi(ipi_irq_base + i);
- enable_percpu_nmi(ipi_irq_base + i, 0);
+ if (!percpu_ipi_descs) {
+ if (ipi_should_be_nmi(i)) {
+ prepare_percpu_nmi(ipi_irq_base + i);
+ enable_percpu_nmi(ipi_irq_base + i, 0);
+ } else {
+ enable_percpu_irq(ipi_irq_base + i, 0);
+ }
} else {
- enable_percpu_irq(ipi_irq_base + i, 0);
+ enable_irq(irq_desc_get_irq(get_ipi_desc(cpu, i)));
}
}
}
@@ -1064,44 +1073,77 @@ static void ipi_teardown(int cpu)
return;
for (i = 0; i < nr_ipi; i++) {
- if (ipi_should_be_nmi(i)) {
- disable_percpu_nmi(ipi_irq_base + i);
- teardown_percpu_nmi(ipi_irq_base + i);
+ if (!percpu_ipi_descs) {
+ if (ipi_should_be_nmi(i)) {
+ disable_percpu_nmi(ipi_irq_base + i);
+ teardown_percpu_nmi(ipi_irq_base + i);
+ } else {
+ disable_percpu_irq(ipi_irq_base + i);
+ }
} else {
- disable_percpu_irq(ipi_irq_base + i);
+ disable_irq(irq_desc_get_irq(get_ipi_desc(cpu, i)));
}
}
}
#endif
-void __init set_smp_ipi_range(int ipi_base, int n)
+static void ipi_setup_sgi(int ipi)
{
- int i;
+ int err, irq, cpu;
- WARN_ON(n < MAX_IPI);
- nr_ipi = min(n, MAX_IPI);
+ irq = ipi_irq_base + ipi;
- for (i = 0; i < nr_ipi; i++) {
- int err;
+ if (ipi_should_be_nmi(ipi)) {
+ err = request_percpu_nmi(irq, ipi_handler, "IPI", NULL, &irq_stat);
+ WARN(err, "Could not request IRQ %d as NMI, err=%d\n", irq, err);
+ } else {
+ err = request_percpu_irq(irq, ipi_handler, "IPI", &irq_stat);
+ WARN(err, "Could not request IRQ %d as IRQ, err=%d\n", irq, err);
+ }
- if (ipi_should_be_nmi(i)) {
- err = request_percpu_nmi(ipi_base + i, ipi_handler,
- "IPI", &irq_stat);
- WARN(err, "Could not request IPI %d as NMI, err=%d\n",
- i, err);
- } else {
- err = request_percpu_irq(ipi_base + i, ipi_handler,
- "IPI", &irq_stat);
- WARN(err, "Could not request IPI %d as IRQ, err=%d\n",
- i, err);
- }
+ for_each_possible_cpu(cpu)
+ get_ipi_desc(cpu, ipi) = irq_to_desc(irq);
+
+ irq_set_status_flags(irq, IRQ_HIDDEN);
+}
+
+static void ipi_setup_lpi(int ipi, int ncpus)
+{
+ for (int cpu = 0; cpu < ncpus; cpu++) {
+ int err, irq;
+
+ irq = ipi_irq_base + (cpu * nr_ipi) + ipi;
+
+ err = irq_force_affinity(irq, cpumask_of(cpu));
+ WARN(err, "Could not force affinity IRQ %d, err=%d\n", irq, err);
+
+ err = request_irq(irq, ipi_handler, IRQF_NO_AUTOEN, "IPI",
+ NULL);
+ WARN(err, "Could not request IRQ %d, err=%d\n", irq, err);
+
+ irq_set_status_flags(irq, (IRQ_HIDDEN | IRQ_NO_BALANCING_MASK));
- ipi_desc[i] = irq_to_desc(ipi_base + i);
- irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
+ get_ipi_desc(cpu, ipi) = irq_to_desc(irq);
}
+}
+
+void __init set_smp_ipi_range_percpu(int ipi_base, int n, int ncpus)
+{
+ int i;
+
+ WARN_ON(n < MAX_IPI);
+ nr_ipi = min(n, MAX_IPI);
+ percpu_ipi_descs = !!ncpus;
ipi_irq_base = ipi_base;
+ for (i = 0; i < nr_ipi; i++) {
+ if (!percpu_ipi_descs)
+ ipi_setup_sgi(i);
+ else
+ ipi_setup_lpi(i, ncpus);
+ }
+
/* Setup the boot CPU immediately */
ipi_setup(smp_processor_id());
}
@@ -1143,7 +1185,7 @@ static inline unsigned int num_other_online_cpus(void)
void smp_send_stop(void)
{
static unsigned long stop_in_progress;
- cpumask_t mask;
+ static cpumask_t mask;
unsigned long timeout;
/*
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 1d9d51d7627f..3ebcf8c53fb0 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -152,6 +152,8 @@ kunwind_recover_return_address(struct kunwind_state *state)
orig_pc = kretprobe_find_ret_addr(state->task,
(void *)state->common.fp,
&state->kr_cur);
+ if (!orig_pc)
+ return -EINVAL;
state->common.pc = orig_pc;
state->flags.kretprobe = 1;
}
@@ -277,21 +279,24 @@ kunwind_next(struct kunwind_state *state)
typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie);
-static __always_inline void
+static __always_inline int
do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
void *cookie)
{
- if (kunwind_recover_return_address(state))
- return;
+ int ret;
- while (1) {
- int ret;
+ ret = kunwind_recover_return_address(state);
+ if (ret)
+ return ret;
+ while (1) {
if (!consume_state(state, cookie))
- break;
+ return -EINVAL;
ret = kunwind_next(state);
+ if (ret == -ENOENT)
+ return 0;
if (ret < 0)
- break;
+ return ret;
}
}
@@ -324,7 +329,7 @@ do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
: stackinfo_get_unknown(); \
})
-static __always_inline void
+static __always_inline int
kunwind_stack_walk(kunwind_consume_fn consume_state,
void *cookie, struct task_struct *task,
struct pt_regs *regs)
@@ -332,10 +337,8 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
struct stack_info stacks[] = {
stackinfo_get_task(task),
STACKINFO_CPU(irq),
-#if defined(CONFIG_VMAP_STACK)
STACKINFO_CPU(overflow),
-#endif
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE)
+#if defined(CONFIG_ARM_SDE_INTERFACE)
STACKINFO_SDEI(normal),
STACKINFO_SDEI(critical),
#endif
@@ -352,7 +355,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
if (regs) {
if (task != current)
- return;
+ return -EINVAL;
kunwind_init_from_regs(&state, regs);
} else if (task == current) {
kunwind_init_from_caller(&state);
@@ -360,7 +363,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
kunwind_init_from_task(&state, task);
}
- do_kunwind(&state, consume_state, cookie);
+ return do_kunwind(&state, consume_state, cookie);
}
struct kunwind_consume_entry_data {
@@ -387,6 +390,36 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
}
+static __always_inline bool
+arch_reliable_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
+{
+ /*
+ * At an exception boundary we can reliably consume the saved PC. We do
+ * not know whether the LR was live when the exception was taken, and
+ * so we cannot perform the next unwind step reliably.
+ *
+ * All that matters is whether the *entire* unwind is reliable, so give
+ * up as soon as we hit an exception boundary.
+ */
+ if (state->source == KUNWIND_SOURCE_REGS_PC)
+ return false;
+
+ return arch_kunwind_consume_entry(state, cookie);
+}
+
+noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie,
+ struct task_struct *task)
+{
+ struct kunwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
+
+ return kunwind_stack_walk(arch_reliable_kunwind_consume_entry, &data,
+ task, NULL);
+}
+
struct bpf_unwind_consume_entry_data {
bool (*consume_entry)(void *cookie, u64 ip, u64 sp, u64 fp);
void *cookie;
diff --git a/arch/arm64/kernel/static_call.c b/arch/arm64/kernel/static_call.c
new file mode 100644
index 000000000000..8b3a19e10871
--- /dev/null
+++ b/arch/arm64/kernel/static_call.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/static_call.h>
+#include <linux/memory.h>
+#include <asm/text-patching.h>
+
+void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
+{
+ u64 literal;
+ int ret;
+
+ if (!func)
+ func = __static_call_return0;
+
+ /* decode the instructions to discover the literal address */
+ literal = ALIGN_DOWN((u64)tramp + 4, SZ_4K) +
+ aarch64_insn_adrp_get_offset(le32_to_cpup(tramp + 4)) +
+ 8 * aarch64_insn_decode_immediate(AARCH64_INSN_IMM_12,
+ le32_to_cpup(tramp + 8));
+
+ ret = aarch64_insn_write_literal_u64((void *)literal, (u64)func);
+ WARN_ON_ONCE(ret);
+}
+EXPORT_SYMBOL_GPL(arch_static_call_transform);
diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c
index 96bcfb907443..12a948f3a504 100644
--- a/arch/arm64/kernel/sys32.c
+++ b/arch/arm64/kernel/sys32.c
@@ -89,7 +89,7 @@ COMPAT_SYSCALL_DEFINE4(aarch32_truncate64, const char __user *, pathname,
COMPAT_SYSCALL_DEFINE4(aarch32_ftruncate64, unsigned int, fd, u32, __pad,
arg_u32p(length))
{
- return ksys_ftruncate(fd, arg_u64(length));
+ return ksys_ftruncate(fd, arg_u64(length), FTRUNCATE_LFS);
}
COMPAT_SYSCALL_DEFINE5(aarch32_readahead, int, fd, u32, __pad,
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 4a609e9b65de..7e9860143add 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -36,8 +36,8 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
* The workaround requires an inner-shareable tlbi.
* We pick the reserved-ASID to minimise the impact.
*/
- __tlbi(aside1is, __TLBI_VADDR(0, 0));
- dsb(ish);
+ __tlbi(aside1is, 0UL);
+ __tlbi_sync_s1ish();
}
ret = caches_clean_inval_user_pou(start, start + chunk);
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index c442fcec6b9e..358ddfbf1401 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -43,7 +43,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
add_random_kstack_offset();
- if (scno < sc_nr) {
+ if (likely(scno < sc_nr)) {
syscall_fn_t syscall_fn;
syscall_fn = syscall_table[array_index_nospec(scno, sc_nr)];
ret = __invoke_syscall(regs, syscall_fn);
@@ -52,17 +52,6 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
}
syscall_set_return_value(current, regs, 0, ret);
-
- /*
- * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
- * bits. The actual entropy will be further reduced by the compiler
- * when applying stack alignment constraints: the AAPCS mandates a
- * 16-byte aligned SP at function boundaries, which will remove the
- * 4 low bits from any entropy chosen here.
- *
- * The resulting 6 bits of entropy is seen in SP[9:4].
- */
- choose_random_kstack_offset(get_random_u16());
}
static inline bool has_syscall_work(unsigned long flags)
@@ -96,7 +85,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
* (Similarly for HVC and SMC elsewhere.)
*/
- if (flags & _TIF_MTE_ASYNC_FAULT) {
+ if (unlikely(flags & _TIF_MTE_ASYNC_FAULT)) {
/*
* Process the asynchronous tag check fault before the actual
* syscall. do_notify_resume() will send a signal to userspace
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index cb180684d10d..b32f13358fbb 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -15,62 +15,16 @@
#include <linux/arch_topology.h>
#include <linux/cacheinfo.h>
#include <linux/cpufreq.h>
+#include <linux/cpu_smt.h>
#include <linux/init.h>
#include <linux/percpu.h>
+#include <linux/sched/isolation.h>
+#include <linux/xarray.h>
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/topology.h>
-#ifdef CONFIG_ACPI
-static bool __init acpi_cpu_is_threaded(int cpu)
-{
- int is_threaded = acpi_pptt_cpu_is_thread(cpu);
-
- /*
- * if the PPTT doesn't have thread information, assume a homogeneous
- * machine and return the current CPU's thread state.
- */
- if (is_threaded < 0)
- is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK;
-
- return !!is_threaded;
-}
-
-/*
- * Propagate the topology information of the processor_topology_node tree to the
- * cpu_topology array.
- */
-int __init parse_acpi_topology(void)
-{
- int cpu, topology_id;
-
- if (acpi_disabled)
- return 0;
-
- for_each_possible_cpu(cpu) {
- topology_id = find_acpi_cpu_topology(cpu, 0);
- if (topology_id < 0)
- return topology_id;
-
- if (acpi_cpu_is_threaded(cpu)) {
- cpu_topology[cpu].thread_id = topology_id;
- topology_id = find_acpi_cpu_topology(cpu, 1);
- cpu_topology[cpu].core_id = topology_id;
- } else {
- cpu_topology[cpu].thread_id = -1;
- cpu_topology[cpu].core_id = topology_id;
- }
- topology_id = find_acpi_cpu_topology_cluster(cpu);
- cpu_topology[cpu].cluster_id = topology_id;
- topology_id = find_acpi_cpu_topology_package(cpu);
- cpu_topology[cpu].package_id = topology_id;
- }
-
- return 0;
-}
-#endif
-
#ifdef CONFIG_ARM64_AMU_EXTN
#define read_corecnt() read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0)
#define read_constcnt() read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)
@@ -88,18 +42,28 @@ int __init parse_acpi_topology(void)
* initialized.
*/
static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT);
-static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
-static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
static cpumask_var_t amu_fie_cpus;
+struct amu_cntr_sample {
+ u64 arch_const_cycles_prev;
+ u64 arch_core_cycles_prev;
+ unsigned long last_scale_update;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct amu_cntr_sample, cpu_amu_samples);
+
void update_freq_counters_refs(void)
{
- this_cpu_write(arch_core_cycles_prev, read_corecnt());
- this_cpu_write(arch_const_cycles_prev, read_constcnt());
+ struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples);
+
+ amu_sample->arch_core_cycles_prev = read_corecnt();
+ amu_sample->arch_const_cycles_prev = read_constcnt();
}
static inline bool freq_counters_valid(int cpu)
{
+ struct amu_cntr_sample *amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu);
+
if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask))
return false;
@@ -108,8 +72,8 @@ static inline bool freq_counters_valid(int cpu)
return false;
}
- if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) ||
- !per_cpu(arch_core_cycles_prev, cpu))) {
+ if (unlikely(!amu_sample->arch_const_cycles_prev ||
+ !amu_sample->arch_core_cycles_prev)) {
pr_debug("CPU%d: cycle counters are not enabled.\n", cpu);
return false;
}
@@ -152,17 +116,22 @@ void freq_inv_set_max_ratio(int cpu, u64 max_rate)
static void amu_scale_freq_tick(void)
{
+ struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples);
u64 prev_core_cnt, prev_const_cnt;
u64 core_cnt, const_cnt, scale;
- prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
- prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
+ prev_const_cnt = amu_sample->arch_const_cycles_prev;
+ prev_core_cnt = amu_sample->arch_core_cycles_prev;
update_freq_counters_refs();
- const_cnt = this_cpu_read(arch_const_cycles_prev);
- core_cnt = this_cpu_read(arch_core_cycles_prev);
+ const_cnt = amu_sample->arch_const_cycles_prev;
+ core_cnt = amu_sample->arch_core_cycles_prev;
+ /*
+ * This should not happen unless the AMUs have been reset and the
+ * counter values have not been restored - unlikely
+ */
if (unlikely(core_cnt <= prev_core_cnt ||
const_cnt <= prev_const_cnt))
return;
@@ -182,6 +151,8 @@ static void amu_scale_freq_tick(void)
scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
this_cpu_write(arch_freq_scale, (unsigned long)scale);
+
+ amu_sample->last_scale_update = jiffies;
}
static struct scale_freq_data amu_sfd = {
@@ -189,6 +160,96 @@ static struct scale_freq_data amu_sfd = {
.set_freq_scale = amu_scale_freq_tick,
};
+static __always_inline bool amu_fie_cpu_supported(unsigned int cpu)
+{
+ return cpumask_available(amu_fie_cpus) &&
+ cpumask_test_cpu(cpu, amu_fie_cpus);
+}
+
+void arch_cpu_idle_enter(void)
+{
+ unsigned int cpu = smp_processor_id();
+
+ if (!amu_fie_cpu_supported(cpu))
+ return;
+
+ /* Kick in AMU update but only if one has not happened already */
+ if (housekeeping_cpu(cpu, HK_TYPE_TICK) &&
+ time_is_before_jiffies(per_cpu(cpu_amu_samples.last_scale_update, cpu)))
+ amu_scale_freq_tick();
+}
+
+#define AMU_SAMPLE_EXP_MS 20
+
+int arch_freq_get_on_cpu(int cpu)
+{
+ struct amu_cntr_sample *amu_sample;
+ unsigned int start_cpu = cpu;
+ unsigned long last_update;
+ unsigned int freq = 0;
+ u64 scale;
+
+ if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu))
+ return -EOPNOTSUPP;
+
+ while (1) {
+
+ amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu);
+
+ last_update = amu_sample->last_scale_update;
+
+ /*
+ * For those CPUs that are in full dynticks mode, or those that have
+ * not seen tick for a while, try an alternative source for the counters
+ * (and thus freq scale), if available, for given policy: this boils
+ * down to identifying an active cpu within the same freq domain, if any.
+ */
+ if (!housekeeping_cpu(cpu, HK_TYPE_TICK) ||
+ time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) {
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ int ref_cpu;
+
+ if (!policy)
+ return -EINVAL;
+
+ if (!cpumask_intersects(policy->related_cpus,
+ housekeeping_cpumask(HK_TYPE_TICK))) {
+ cpufreq_cpu_put(policy);
+ return -EOPNOTSUPP;
+ }
+
+ for_each_cpu_wrap(ref_cpu, policy->cpus, cpu + 1) {
+ if (ref_cpu == start_cpu) {
+ /* Prevent verifying same CPU twice */
+ ref_cpu = nr_cpu_ids;
+ break;
+ }
+ if (!idle_cpu(ref_cpu))
+ break;
+ }
+
+ cpufreq_cpu_put(policy);
+
+ if (ref_cpu >= nr_cpu_ids)
+ /* No alternative to pull info from */
+ return -EAGAIN;
+
+ cpu = ref_cpu;
+ } else {
+ break;
+ }
+ }
+ /*
+ * Reversed computation to the one used to determine
+ * the arch_freq_scale value
+ * (see amu_scale_freq_tick for details)
+ */
+ scale = arch_scale_freq_capacity(cpu);
+ freq = scale * arch_scale_freq_ref(cpu);
+ freq >>= SCHED_CAPACITY_SHIFT;
+ return freq;
+}
+
static void amu_fie_setup(const struct cpumask *cpus)
{
int cpu;
@@ -211,7 +272,7 @@ static void amu_fie_setup(const struct cpumask *cpus)
cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus);
- topology_set_scale_freq_source(&amu_sfd, amu_fie_cpus);
+ topology_set_scale_freq_source(&amu_sfd, cpus);
pr_debug("CPUs[%*pbl]: counters will be used for FIE.",
cpumask_pr_args(cpus));
@@ -223,7 +284,7 @@ static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val,
struct cpufreq_policy *policy = data;
if (val == CPUFREQ_CREATE_POLICY)
- amu_fie_setup(policy->related_cpus);
+ amu_fie_setup(policy->cpus);
/*
* We don't need to handle CPUFREQ_REMOVE_POLICY event as the AMU
@@ -242,10 +303,70 @@ static struct notifier_block init_amu_fie_notifier = {
.notifier_call = init_amu_fie_callback,
};
+static int cpuhp_topology_online(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_policy(cpu);
+
+ /* Those are cheap checks */
+
+ /*
+ * Skip this CPU if:
+ * - it has no cpufreq policy assigned yet,
+ * - no policy exists that spans CPUs with AMU counters, or
+ * - it was already handled.
+ */
+ if (unlikely(!policy) || !cpumask_available(amu_fie_cpus) ||
+ cpumask_test_cpu(cpu, amu_fie_cpus))
+ return 0;
+
+ /*
+ * Only proceed if all already-online CPUs in this policy
+ * support AMU counters.
+ */
+ if (unlikely(!cpumask_subset(policy->cpus, amu_fie_cpus)))
+ return 0;
+
+ /*
+ * If the new online CPU cannot pass this check, all the CPUs related to
+ * the same policy should be clear from amu_fie_cpus mask, otherwise they
+ * may use different source of the freq scale.
+ */
+ if (!freq_counters_valid(cpu)) {
+ topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_ARCH,
+ policy->related_cpus);
+ cpumask_andnot(amu_fie_cpus, amu_fie_cpus, policy->related_cpus);
+ return 0;
+ }
+
+ cpumask_set_cpu(cpu, amu_fie_cpus);
+
+ topology_set_scale_freq_source(&amu_sfd, cpumask_of(cpu));
+
+ pr_debug("CPU[%u]: counter will be used for FIE.", cpu);
+
+ return 0;
+}
+
static int __init init_amu_fie(void)
{
- return cpufreq_register_notifier(&init_amu_fie_notifier,
+ int ret;
+
+ ret = cpufreq_register_notifier(&init_amu_fie_notifier,
CPUFREQ_POLICY_NOTIFIER);
+ if (ret)
+ return ret;
+
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "arm64/topology:online",
+ cpuhp_topology_online,
+ NULL);
+ if (ret < 0) {
+ cpufreq_unregister_notifier(&init_amu_fie_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+ return ret;
+ }
+
+ return 0;
}
core_initcall(init_amu_fie);
@@ -279,16 +400,25 @@ static inline
int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val)
{
/*
- * Abort call on counterless CPU or when interrupts are
- * disabled - can lead to deadlock in smp sync call.
+ * Abort call on counterless CPU.
*/
if (!cpu_has_amu_feat(cpu))
return -EOPNOTSUPP;
- if (WARN_ON_ONCE(irqs_disabled()))
- return -EPERM;
-
- smp_call_function_single(cpu, func, val, 1);
+ if (irqs_disabled()) {
+ /*
+ * When IRQs are disabled (tick path: sched_tick ->
+ * topology_scale_freq_tick or cppc_scale_freq_tick), only local
+ * CPU counter reads are allowed. Remote CPU counter read would
+ * require smp_call_function_single() which is unsafe with IRQs
+ * disabled.
+ */
+ if (WARN_ON_ONCE(cpu != smp_processor_id()))
+ return -EPERM;
+ func(val);
+ } else {
+ smp_call_function_single(cpu, func, val, 1);
+ }
return 0;
}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 4e26bd356a48..914282016069 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -149,19 +149,18 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = {
int show_unhandled_signals = 0;
-static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
+void dump_kernel_instr(unsigned long kaddr)
{
- unsigned long addr = instruction_pointer(regs);
char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
int i;
- if (user_mode(regs))
+ if (!is_ttbr1_addr(kaddr))
return;
for (i = -4; i < 1; i++) {
unsigned int val, bad;
- bad = aarch64_insn_read(&((u32 *)addr)[i], &val);
+ bad = aarch64_insn_read(&((u32 *)kaddr)[i], &val);
if (!bad)
p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
@@ -169,25 +168,18 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
p += sprintf(p, i == 0 ? "(????????) " : "???????? ");
}
- printk("%sCode: %s\n", lvl, str);
+ printk(KERN_EMERG "Code: %s\n", str);
}
-#ifdef CONFIG_PREEMPT
-#define S_PREEMPT " PREEMPT"
-#elif defined(CONFIG_PREEMPT_RT)
-#define S_PREEMPT " PREEMPT_RT"
-#else
-#define S_PREEMPT ""
-#endif
-
#define S_SMP " SMP"
static int __die(const char *str, long err, struct pt_regs *regs)
{
static int die_counter;
int ret;
+ unsigned long addr = instruction_pointer(regs);
- pr_emerg("Internal error: %s: %016lx [#%d]" S_PREEMPT S_SMP "\n",
+ pr_emerg("Internal error: %s: %016lx [#%d] " S_SMP "\n",
str, err, ++die_counter);
/* trap and error numbers are mostly meaningless on ARM */
@@ -198,7 +190,10 @@ static int __die(const char *str, long err, struct pt_regs *regs)
print_modules();
show_regs(regs);
- dump_kernel_instr(KERN_EMERG, regs);
+ if (user_mode(regs))
+ return ret;
+
+ dump_kernel_instr(addr);
return ret;
}
@@ -462,7 +457,7 @@ void do_el0_undef(struct pt_regs *regs, unsigned long esr)
u32 insn;
/* check for AArch32 breakpoint instructions */
- if (!aarch32_break_handler(regs))
+ if (try_handle_aarch32_break(regs))
return;
if (user_insn_read(regs, &insn))
@@ -902,8 +897,6 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr)
"Bad EL0 synchronous exception");
}
-#ifdef CONFIG_VMAP_STACK
-
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
@@ -929,16 +922,16 @@ void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigne
__show_regs(regs);
/*
- * We use nmi_panic to limit the potential for recusive overflows, and
+ * We use nmi_panic to limit the potential for recursive overflows, and
* to get a better stack trace.
*/
nmi_panic(NULL, "kernel stack overflow");
cpu_park_loop();
}
-#endif
void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr)
{
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
console_verbose();
pr_crit("SError Interrupt on CPU%d, code 0x%016lx -- %s\n",
@@ -995,7 +988,7 @@ void do_serror(struct pt_regs *regs, unsigned long esr)
int is_valid_bugaddr(unsigned long addr)
{
/*
- * bug_handler() only called for BRK #BUG_BRK_IMM.
+ * bug_brk_handler() only called for BRK #BUG_BRK_IMM.
* So the answer is trivial -- any spurious instances with no
* bug table entry will be rejected by report_bug() and passed
* back to the debug-monitors code and handled as a fatal
@@ -1005,7 +998,7 @@ int is_valid_bugaddr(unsigned long addr)
}
#endif
-static int bug_handler(struct pt_regs *regs, unsigned long esr)
+int bug_brk_handler(struct pt_regs *regs, unsigned long esr)
{
switch (report_bug(regs->pc, regs)) {
case BUG_TRAP_TYPE_BUG:
@@ -1025,13 +1018,8 @@ static int bug_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_HANDLED;
}
-static struct break_hook bug_break_hook = {
- .fn = bug_handler,
- .imm = BUG_BRK_IMM,
-};
-
-#ifdef CONFIG_CFI_CLANG
-static int cfi_handler(struct pt_regs *regs, unsigned long esr)
+#ifdef CONFIG_CFI
+int cfi_brk_handler(struct pt_regs *regs, unsigned long esr)
{
unsigned long target;
u32 type;
@@ -1054,15 +1042,9 @@ static int cfi_handler(struct pt_regs *regs, unsigned long esr)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
return DBG_HOOK_HANDLED;
}
+#endif /* CONFIG_CFI */
-static struct break_hook cfi_break_hook = {
- .fn = cfi_handler,
- .imm = CFI_BRK_IMM_BASE,
- .mask = CFI_BRK_IMM_MASK,
-};
-#endif /* CONFIG_CFI_CLANG */
-
-static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr)
+int reserved_fault_brk_handler(struct pt_regs *regs, unsigned long esr)
{
pr_err("%s generated an invalid instruction at %pS!\n",
"Kernel text patching",
@@ -1072,11 +1054,6 @@ static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_ERROR;
}
-static struct break_hook fault_break_hook = {
- .fn = reserved_fault_handler,
- .imm = FAULT_BRK_IMM,
-};
-
#ifdef CONFIG_KASAN_SW_TAGS
#define KASAN_ESR_RECOVER 0x20
@@ -1084,7 +1061,7 @@ static struct break_hook fault_break_hook = {
#define KASAN_ESR_SIZE_MASK 0x0f
#define KASAN_ESR_SIZE(esr) (1 << ((esr) & KASAN_ESR_SIZE_MASK))
-static int kasan_handler(struct pt_regs *regs, unsigned long esr)
+int kasan_brk_handler(struct pt_regs *regs, unsigned long esr)
{
bool recover = esr & KASAN_ESR_RECOVER;
bool write = esr & KASAN_ESR_WRITE;
@@ -1115,62 +1092,12 @@ static int kasan_handler(struct pt_regs *regs, unsigned long esr)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
return DBG_HOOK_HANDLED;
}
-
-static struct break_hook kasan_break_hook = {
- .fn = kasan_handler,
- .imm = KASAN_BRK_IMM,
- .mask = KASAN_BRK_MASK,
-};
#endif
#ifdef CONFIG_UBSAN_TRAP
-static int ubsan_handler(struct pt_regs *regs, unsigned long esr)
+int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr)
{
- die(report_ubsan_failure(regs, esr & UBSAN_BRK_MASK), regs, esr);
+ die(report_ubsan_failure(esr & UBSAN_BRK_MASK), regs, esr);
return DBG_HOOK_HANDLED;
}
-
-static struct break_hook ubsan_break_hook = {
- .fn = ubsan_handler,
- .imm = UBSAN_BRK_IMM,
- .mask = UBSAN_BRK_MASK,
-};
-#endif
-
-/*
- * Initial handler for AArch64 BRK exceptions
- * This handler only used until debug_traps_init().
- */
-int __init early_brk64(unsigned long addr, unsigned long esr,
- struct pt_regs *regs)
-{
-#ifdef CONFIG_CFI_CLANG
- if (esr_is_cfi_brk(esr))
- return cfi_handler(regs, esr) != DBG_HOOK_HANDLED;
-#endif
-#ifdef CONFIG_KASAN_SW_TAGS
- if ((esr_brk_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
- return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
-#endif
-#ifdef CONFIG_UBSAN_TRAP
- if ((esr_brk_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM)
- return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED;
#endif
- return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
-}
-
-void __init trap_init(void)
-{
- register_kernel_break_hook(&bug_break_hook);
-#ifdef CONFIG_CFI_CLANG
- register_kernel_break_hook(&cfi_break_hook);
-#endif
- register_kernel_break_hook(&fault_break_hook);
-#ifdef CONFIG_KASAN_SW_TAGS
- register_kernel_break_hook(&kasan_break_hook);
-#endif
-#ifdef CONFIG_UBSAN_TRAP
- register_kernel_break_hook(&ubsan_break_hook);
-#endif
- debug_traps_init();
-}
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index e8ed8e5b713b..592dd8668de4 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -18,7 +18,7 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/slab.h>
-#include <linux/time_namespace.h>
+#include <linux/vdso_datastore.h>
#include <linux/vmalloc.h>
#include <vdso/datapage.h>
#include <vdso/helpers.h>
@@ -57,12 +57,6 @@ static struct vdso_abi_info vdso_info[] __ro_after_init = {
#endif /* CONFIG_COMPAT_VDSO */
};
-/*
- * The vDSO data page.
- */
-static union vdso_data_store vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = vdso_data_store.data;
-
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *new_vma)
{
@@ -87,9 +81,7 @@ static int __init __vdso_init(enum vdso_abi abi)
vdso_info[abi].vdso_code_start) >>
PAGE_SHIFT;
- vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages,
- sizeof(struct page *),
- GFP_KERNEL);
+ vdso_pagelist = kzalloc_objs(struct page *, vdso_info[abi].vdso_pages);
if (vdso_pagelist == NULL)
return -ENOMEM;
@@ -104,78 +96,6 @@ static int __init __vdso_init(enum vdso_abi abi)
return 0;
}
-#ifdef CONFIG_TIME_NS
-struct vdso_data *arch_get_vdso_data(void *vvar_page)
-{
- return (struct vdso_data *)(vvar_page);
-}
-
-static const struct vm_special_mapping vvar_map;
-
-/*
- * The vvar mapping contains data for a specific time namespace, so when a task
- * changes namespace we must unmap its vvar data for the old namespace.
- * Subsequent faults will map in data for the new namespace.
- *
- * For more details see timens_setup_vdso_data().
- */
-int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
-{
- struct mm_struct *mm = task->mm;
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
-
- mmap_read_lock(mm);
-
- for_each_vma(vmi, vma) {
- if (vma_is_special_mapping(vma, &vvar_map))
- zap_vma_pages(vma);
- }
-
- mmap_read_unlock(mm);
- return 0;
-}
-#endif
-
-static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
- struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct page *timens_page = find_timens_vvar_page(vma);
- unsigned long pfn;
-
- switch (vmf->pgoff) {
- case VVAR_DATA_PAGE_OFFSET:
- if (timens_page)
- pfn = page_to_pfn(timens_page);
- else
- pfn = sym_to_pfn(vdso_data);
- break;
-#ifdef CONFIG_TIME_NS
- case VVAR_TIMENS_PAGE_OFFSET:
- /*
- * If a task belongs to a time namespace then a namespace
- * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
- * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
- * offset.
- * See also the comment near timens_setup_vdso_data().
- */
- if (!timens_page)
- return VM_FAULT_SIGBUS;
- pfn = sym_to_pfn(vdso_data);
- break;
-#endif /* CONFIG_TIME_NS */
- default:
- return VM_FAULT_SIGBUS;
- }
-
- return vmf_insert_pfn(vma, vmf->address, pfn);
-}
-
-static const struct vm_special_mapping vvar_map = {
- .name = "[vvar]",
- .fault = vvar_fault,
-};
-
static int __setup_additional_pages(enum vdso_abi abi,
struct mm_struct *mm,
struct linux_binprm *bprm,
@@ -185,11 +105,11 @@ static int __setup_additional_pages(enum vdso_abi abi,
unsigned long gp_flags = 0;
void *ret;
- BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+ BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES);
vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT;
/* Be sure to map the data page */
- vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
+ vdso_mapping_len = vdso_text_len + VDSO_NR_PAGES * PAGE_SIZE;
vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
if (IS_ERR_VALUE(vdso_base)) {
@@ -197,20 +117,19 @@ static int __setup_additional_pages(enum vdso_abi abi,
goto up_fail;
}
- ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE,
- VM_READ|VM_MAYREAD|VM_PFNMAP,
- &vvar_map);
+ ret = vdso_install_vvar_mapping(mm, vdso_base);
if (IS_ERR(ret))
goto up_fail;
if (system_supports_bti_kernel())
gp_flags = VM_ARM64_BTI;
- vdso_base += VVAR_NR_PAGES * PAGE_SIZE;
+ vdso_base += VDSO_NR_PAGES * PAGE_SIZE;
mm->context.vdso = (void *)vdso_base;
ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
VM_READ|VM_EXEC|gp_flags|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+ VM_SEALED_SYSMAP,
vdso_info[abi].cm);
if (IS_ERR(ret))
goto up_fail;
@@ -336,7 +255,8 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
*/
ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE,
VM_READ | VM_EXEC |
- VM_MAYREAD | VM_MAYEXEC,
+ VM_MAYREAD | VM_MAYEXEC |
+ VM_SEALED_SYSMAP,
&aarch32_vdso_maps[AA32_MAP_VECTORS]);
return PTR_ERR_OR_ZERO(ret);
@@ -359,7 +279,8 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm)
*/
ret = _install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ | VM_EXEC | VM_MAYREAD |
- VM_MAYWRITE | VM_MAYEXEC,
+ VM_MAYWRITE | VM_MAYEXEC |
+ VM_SEALED_SYSMAP,
&aarch32_vdso_maps[AA32_MAP_SIGPAGE]);
if (IS_ERR(ret))
goto out;
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 35685c036044..7dec05dd33b7 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -7,7 +7,7 @@
#
# Include the generic Makefile to check the built vdso.
-include $(srctree)/lib/vdso/Makefile
+include $(srctree)/lib/vdso/Makefile.include
obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o
@@ -36,7 +36,8 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
# -Wmissing-prototypes and -Wmissing-declarations are removed from
# the CFLAGS to make possible to build the kernel with CONFIG_WERROR enabled.
CC_FLAGS_REMOVE_VDSO := $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
- $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
+ $(RANDSTRUCT_CFLAGS) $(KSTACK_ERASE_CFLAGS) \
+ $(GCC_PLUGINS_CFLAGS) \
$(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
-Wmissing-prototypes -Wmissing-declarations
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
index 47ad6944f9f0..52314be29191 100644
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -20,11 +20,8 @@ OUTPUT_ARCH(aarch64)
SECTIONS
{
- PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
- PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
-#ifdef CONFIG_TIME_NS
- PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
+ VDSO_VVAR_SYMS
+
. = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 25a2cb6317f3..bea3675fa668 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -3,7 +3,7 @@
# Makefile for vdso32
#
-include $(srctree)/lib/vdso/Makefile
+include $(srctree)/lib/vdso/Makefile.include
# Same as cc-*option, but using CC_COMPAT instead of CC
ifeq ($(CONFIG_CC_IS_CLANG), y)
@@ -21,8 +21,6 @@ endif
cc32-option = $(call try-run,\
$(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
-cc32-disable-warning = $(call try-run,\
- $(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
# We cannot use the global flags to compile the vDSO files, the main reason
# being that the 32-bit compiler may be older than the main (64-bit) compiler
@@ -59,13 +57,13 @@ VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING
VDSO_CAFLAGS += -march=armv8-a
VDSO_CFLAGS := $(VDSO_CAFLAGS)
-VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1
# KBUILD_CFLAGS from top-level Makefile
VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -fno-common \
+ $(filter -Werror,$(KBUILD_CPPFLAGS)) \
-Werror-implicit-function-declaration \
-Wno-format-security \
- -std=gnu11
+ $(CC_FLAGS_DIALECT)
VDSO_CFLAGS += -O2
# Some useful compiler-dependent flags from top-level Makefile
VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign)
@@ -74,16 +72,6 @@ VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes)
VDSO_CFLAGS += -Werror=date-time
VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types)
-# The 32-bit compiler does not provide 128-bit integers, which are used in
-# some headers that are indirectly included from the vDSO code.
-# This hack makes the compiler happy and should trigger a warning/error if
-# variables of such type are referenced.
-VDSO_CFLAGS += -D__uint128_t='void*'
-# Silence some warnings coming from headers that operate on long's
-# (on GCC 4.8 or older, there is unfortunately no way to silence this warning)
-VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow)
-VDSO_CFLAGS += -Wno-int-to-pointer-cast
-
# Compile as THUMB2 or ARM. Unwinding via frame-pointers in THUMB2 is
# unreliable.
ifeq ($(CONFIG_THUMB2_COMPAT_VDSO), y)
diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
index 732702a187e9..c374fb0146f3 100644
--- a/arch/arm64/kernel/vdso32/vdso.lds.S
+++ b/arch/arm64/kernel/vdso32/vdso.lds.S
@@ -12,16 +12,15 @@
#include <asm/page.h>
#include <asm/vdso.h>
#include <asm-generic/vmlinux.lds.h>
+#include <vdso/datapage.h>
OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
OUTPUT_ARCH(arm)
SECTIONS
{
- PROVIDE_HIDDEN(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
-#ifdef CONFIG_TIME_NS
- PROVIDE_HIDDEN(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
+ VDSO_VVAR_SYMS
+
. = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
@@ -87,6 +86,7 @@ VERSION
__vdso_gettimeofday;
__vdso_clock_getres;
__vdso_clock_gettime64;
+ __vdso_clock_getres_time64;
local: *;
};
}
diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c
index 29b4d8f61e39..0c6998ebe491 100644
--- a/arch/arm64/kernel/vdso32/vgettimeofday.c
+++ b/arch/arm64/kernel/vdso32/vgettimeofday.c
@@ -32,6 +32,11 @@ int __vdso_clock_getres(clockid_t clock_id,
return __cvdso_clock_getres_time32(clock_id, res);
}
+int __vdso_clock_getres_time64(clockid_t clock_id, struct __kernel_timespec *res)
+{
+ return __cvdso_clock_getres(clock_id, res);
+}
+
/* Avoid unresolved references emitted by GCC */
void __aeabi_unwind_cpp_pr0(void)
diff --git a/arch/arm64/kernel/vmcore_info.c b/arch/arm64/kernel/vmcore_info.c
index b19d5d6cb8b3..9619ece66b79 100644
--- a/arch/arm64/kernel/vmcore_info.c
+++ b/arch/arm64/kernel/vmcore_info.c
@@ -14,7 +14,7 @@ static inline u64 get_tcr_el1_t1sz(void);
static inline u64 get_tcr_el1_t1sz(void)
{
- return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET;
+ return (read_sysreg(tcr_el1) & TCR_EL1_T1SZ_MASK) >> TCR_EL1_T1SZ_SHIFT;
}
void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index e73326bd3ff7..e1ac876200a3 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -13,16 +13,36 @@
*(__kvm_ex_table) \
__stop___kvm_ex_table = .;
-#define HYPERVISOR_DATA_SECTIONS \
+#ifdef CONFIG_NVHE_EL2_TRACING
+#define HYPERVISOR_EVENT_IDS \
+ . = ALIGN(PAGE_SIZE); \
+ __hyp_event_ids_start = .; \
+ *(HYP_SECTION_NAME(.event_ids)) \
+ __hyp_event_ids_end = .;
+#else
+#define HYPERVISOR_EVENT_IDS
+#endif
+
+#define HYPERVISOR_RODATA_SECTIONS \
HYP_SECTION_NAME(.rodata) : { \
. = ALIGN(PAGE_SIZE); \
__hyp_rodata_start = .; \
*(HYP_SECTION_NAME(.data..ro_after_init)) \
*(HYP_SECTION_NAME(.rodata)) \
+ HYPERVISOR_EVENT_IDS \
. = ALIGN(PAGE_SIZE); \
__hyp_rodata_end = .; \
}
+#define HYPERVISOR_DATA_SECTION \
+ HYP_SECTION_NAME(.data) : { \
+ . = ALIGN(PAGE_SIZE); \
+ __hyp_data_start = .; \
+ *(HYP_SECTION_NAME(.data)) \
+ . = ALIGN(PAGE_SIZE); \
+ __hyp_data_end = .; \
+ }
+
#define HYPERVISOR_PERCPU_SECTION \
. = ALIGN(PAGE_SIZE); \
HYP_SECTION_NAME(.data..percpu) : { \
@@ -51,7 +71,8 @@
#define SBSS_ALIGN PAGE_SIZE
#else /* CONFIG_KVM */
#define HYPERVISOR_EXTABLE
-#define HYPERVISOR_DATA_SECTIONS
+#define HYPERVISOR_RODATA_SECTIONS
+#define HYPERVISOR_DATA_SECTION
#define HYPERVISOR_PERCPU_SECTION
#define HYPERVISOR_RELOC_SECTION
#define SBSS_ALIGN 0
@@ -181,6 +202,7 @@ SECTIONS
LOCK_TEXT
KPROBES_TEXT
HYPERVISOR_TEXT
+ STATIC_CALL_TEXT
*(.gnu.warning)
}
@@ -190,7 +212,7 @@ SECTIONS
/* everything from this point to __init_begin will be marked RO NX */
RO_DATA(PAGE_SIZE)
- HYPERVISOR_DATA_SECTIONS
+ HYPERVISOR_RODATA_SECTIONS
.got : { *(.got) }
/*
@@ -249,9 +271,9 @@ SECTIONS
__inittext_end = .;
__initdata_begin = .;
- init_idmap_pg_dir = .;
+ __pi_init_idmap_pg_dir = .;
. += INIT_IDMAP_DIR_SIZE;
- init_idmap_pg_end = .;
+ __pi_init_idmap_pg_end = .;
.init.data : {
INIT_DATA
@@ -295,6 +317,15 @@ SECTIONS
_sdata = .;
RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
+ HYPERVISOR_DATA_SECTION
+
+#ifdef CONFIG_NVHE_EL2_TRACING
+ .data.hyp_events : {
+ __hyp_events_start = .;
+ *(SORT(_hyp_events.*))
+ __hyp_events_end = .;
+ }
+#endif
/*
* Data written with the MMU off but read with the MMU on requires
* cache lines to be invalidated, discarding up to a Cache Writeback
@@ -319,11 +350,12 @@ SECTIONS
/* start of zero-init region */
BSS_SECTION(SBSS_ALIGN, 0, 0)
+ __pi___bss_start = __bss_start;
. = ALIGN(PAGE_SIZE);
- init_pg_dir = .;
+ __pi_init_pg_dir = .;
. += INIT_DIR_SIZE;
- init_pg_end = .;
+ __pi_init_pg_end = .;
/* end of zero-init region */
. += SZ_4K; /* stack for the early C runtime */
@@ -332,9 +364,11 @@ SECTIONS
. = ALIGN(SEGMENT_ALIGN);
__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
_end = .;
+ __pi__end = .;
STABS_DEBUG
DWARF_DEBUG
+ MODINFO
ELF_DETAILS
HEAD_SYMBOLS
diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c
index dcd25322127c..3093037dcb7b 100644
--- a/arch/arm64/kernel/watchdog_hld.c
+++ b/arch/arm64/kernel/watchdog_hld.c
@@ -34,3 +34,61 @@ bool __init arch_perf_nmi_is_available(void)
*/
return arm_pmu_irq_is_nmi();
}
+
+static int watchdog_perf_update_period(void *data)
+{
+ int cpu = smp_processor_id();
+ u64 max_cpu_freq, new_period;
+
+ max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
+ if (!max_cpu_freq)
+ return 0;
+
+ new_period = watchdog_thresh * max_cpu_freq;
+ hardlockup_detector_perf_adjust_period(new_period);
+
+ return 0;
+}
+
+static int watchdog_freq_notifier_callback(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct cpufreq_policy *policy = data;
+ int cpu;
+
+ if (val != CPUFREQ_CREATE_POLICY)
+ return NOTIFY_DONE;
+
+ /*
+ * Let each online CPU related to the policy update the period by their
+ * own. This will serialize with the framework on start/stop the lockup
+ * detector (softlockup_{start,stop}_all) and avoid potential race
+ * condition. Otherwise we may have below theoretical race condition:
+ * (core 0/1 share the same policy)
+ * [core 0] [core 1]
+ * hardlockup_detector_event_create()
+ * hw_nmi_get_sample_period()
+ * (cpufreq registered, notifier callback invoked)
+ * watchdog_freq_notifier_callback()
+ * watchdog_perf_update_period()
+ * (since core 1's event's not yet created,
+ * the period is not set)
+ * perf_event_create_kernel_counter()
+ * (event's period is SAFE_MAX_CPU_FREQ)
+ */
+ for_each_cpu(cpu, policy->cpus)
+ smp_call_on_cpu(cpu, watchdog_perf_update_period, NULL, false);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block watchdog_freq_notifier = {
+ .notifier_call = watchdog_freq_notifier_callback,
+};
+
+static int __init init_watchdog_freq_notifier(void)
+{
+ return cpufreq_register_notifier(&watchdog_freq_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+}
+core_initcall(init_watchdog_freq_notifier);