From e8df9d9f4209c04161321d8c12640ae560f65939 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 21 Nov 2023 09:46:28 +0800 Subject: perf/x86/intel: Correct incorrect 'or' operation for PMU capabilities When running perf-stat command on Intel hybrid platform, perf-stat reports the following errors: sudo taskset -c 7 ./perf stat -vvvv -e cpu_atom/instructions/ sleep 1 Opening: cpu/cycles/:HG ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) config 0xa00000000 disabled 1 ------------------------------------------------------------ sys_perf_event_open: pid 0 cpu -1 group_fd -1 flags 0x8 sys_perf_event_open failed, error -16 Performance counter stats for 'sleep 1': cpu_atom/instructions/ It looks the cpu_atom/instructions/ event can't be enabled on atom PMU even when the process is pinned on atom core. Investigation shows that exclusive_event_init() helper always returns -EBUSY error in the perf event creation. That's strange since the atom PMU should not be an exclusive PMU. Further investigation shows the issue was introduced by commit: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") The commit originally intents to clear the bit PERF_PMU_CAP_AUX_OUTPUT from PMU capabilities if intel_cap.pebs_output_pt_available is not set, but it incorrectly uses 'or' operation and leads to all PMU capabilities bits are set to 1 except bit PERF_PMU_CAP_AUX_OUTPUT. Testing this fix on Intel hybrid platforms, the observed issues disappear. Fixes: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") Signed-off-by: Dapeng Mi Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231121014628.729989-1-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a08f794a0e79..ce1c777227b4 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4660,7 +4660,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) if (pmu->intel_cap.pebs_output_pt_available) pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; else - pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT; + pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT; intel_pmu_check_event_constraints(pmu->event_constraints, pmu->num_counters, -- cgit v1.2.3 From 2e569ada424c40ce27c99bfab4f9780619061c83 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 15 Nov 2023 22:02:11 +0100 Subject: x86/microcode: Remove the driver announcement and version First of all, the print is useless. The driver will either load and say which microcode revision the machine has or issue an error. Then, the version number is meaningless and actively confusing, as Yazen mentioned recently: when a subset of patches are backported to a distro kernel, one can't assume the driver version is the same as the upstream one. And besides, the version number of the loader hasn't been used and incremented for a long time. So drop it. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20231115210212.9981-2-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 666d25bbc5ad..b4be3a2c79df 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -41,8 +41,6 @@ #include "internal.h" -#define DRIVER_VERSION "2.2" - static struct microcode_ops *microcode_ops; bool dis_ucode_ldr = true; @@ -846,8 +844,6 @@ static int __init microcode_init(void) cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", mc_cpu_online, mc_cpu_down_prep); - pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); - return 0; out_pdev: -- cgit v1.2.3 From 080990aa3344123673f686cda2df0d1b0deee046 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 15 Nov 2023 22:02:12 +0100 Subject: x86/microcode: Rework early revisions reporting The AMD side of the loader issues the microcode revision for each logical thread on the system, which can become really noisy on huge machines. And doing that doesn't make a whole lot of sense - the microcode revision is already in /proc/cpuinfo. So in case one is interested in the theoretical support of mixed silicon steppings on AMD, one can check there. What is also missing on the AMD side - something which people have requested before - is showing the microcode revision the CPU had *before* the early update. So abstract that up in the main code and have the BSP on each vendor provide those revision numbers. Then, dump them only once on driver init. On Intel, do not dump the patch date - it is not needed. Reported-by: Linus Torvalds Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/CAHk-=wg=%2B8rceshMkB4VnKxmRccVLtBLPBawnewZuuqyx5U=3A@mail.gmail.com --- arch/x86/kernel/cpu/microcode/amd.c | 39 +++++++++----------------------- arch/x86/kernel/cpu/microcode/core.c | 11 +++++++-- arch/x86/kernel/cpu/microcode/intel.c | 17 ++++++-------- arch/x86/kernel/cpu/microcode/internal.h | 14 ++++++++---- 4 files changed, 37 insertions(+), 44 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 9373ec01c5ae..13b45b9c806d 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -104,8 +104,6 @@ struct cont_desc { size_t size; }; -static u32 ucode_new_rev; - /* * Microcode patch container file is prepended to the initrd in cpio * format. See Documentation/arch/x86/microcode.rst @@ -442,12 +440,11 @@ static int __apply_microcode_amd(struct microcode_amd *mc) * * Returns true if container found (sets @desc), false otherwise. */ -static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size) +static bool early_apply_microcode(u32 cpuid_1_eax, u32 old_rev, void *ucode, size_t size) { struct cont_desc desc = { 0 }; struct microcode_amd *mc; bool ret = false; - u32 rev, dummy; desc.cpuid_1_eax = cpuid_1_eax; @@ -457,22 +454,15 @@ static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size) if (!mc) return ret; - native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - /* * Allow application of the same revision to pick up SMT-specific * changes even if the revision of the other SMT thread is already * up-to-date. */ - if (rev > mc->hdr.patch_id) + if (old_rev > mc->hdr.patch_id) return ret; - if (!__apply_microcode_amd(mc)) { - ucode_new_rev = mc->hdr.patch_id; - ret = true; - } - - return ret; + return !__apply_microcode_amd(mc); } static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) @@ -506,9 +496,12 @@ static void __init find_blobs_in_containers(unsigned int cpuid_1_eax, struct cpi *ret = cp; } -void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) +void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_eax) { struct cpio_data cp = { }; + u32 dummy; + + native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->old_rev, dummy); /* Needed in load_microcode_amd() */ ucode_cpu_info[0].cpu_sig.sig = cpuid_1_eax; @@ -517,7 +510,8 @@ void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) if (!(cp.data && cp.size)) return; - early_apply_microcode(cpuid_1_eax, cp.data, cp.size); + if (early_apply_microcode(cpuid_1_eax, ed->old_rev, cp.data, cp.size)) + native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy); } static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); @@ -625,10 +619,8 @@ void reload_ucode_amd(unsigned int cpu) rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); if (rev < mc->hdr.patch_id) { - if (!__apply_microcode_amd(mc)) { - ucode_new_rev = mc->hdr.patch_id; - pr_info("reload patch_level=0x%08x\n", ucode_new_rev); - } + if (!__apply_microcode_amd(mc)) + pr_info_once("reload revision: 0x%08x\n", mc->hdr.patch_id); } } @@ -649,8 +641,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) if (p && (p->patch_id == csig->rev)) uci->mc = p->data; - pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); - return 0; } @@ -691,8 +681,6 @@ static enum ucode_state apply_microcode_amd(int cpu) rev = mc_amd->hdr.patch_id; ret = UCODE_UPDATED; - pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); - out: uci->cpu_sig.rev = rev; c->microcode = rev; @@ -935,11 +923,6 @@ struct microcode_ops * __init init_amd_microcode(void) pr_warn("AMD CPU family 0x%x not supported\n", c->x86); return NULL; } - - if (ucode_new_rev) - pr_info_once("microcode updated early to new patch_level=0x%08x\n", - ucode_new_rev); - return µcode_amd_ops; } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index b4be3a2c79df..232026a239a6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -75,6 +75,8 @@ static u32 final_levels[] = { 0, /* T-101 terminator */ }; +struct early_load_data early_data; + /* * Check the current patch level on this CPU. * @@ -153,9 +155,9 @@ void __init load_ucode_bsp(void) return; if (intel) - load_ucode_intel_bsp(); + load_ucode_intel_bsp(&early_data); else - load_ucode_amd_bsp(cpuid_1_eax); + load_ucode_amd_bsp(&early_data, cpuid_1_eax); } void load_ucode_ap(void) @@ -826,6 +828,11 @@ static int __init microcode_init(void) if (!microcode_ops) return -ENODEV; + pr_info_once("Current revision: 0x%08x\n", (early_data.new_rev ?: early_data.old_rev)); + + if (early_data.new_rev) + pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev); + microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0); if (IS_ERR(microcode_pdev)) return PTR_ERR(microcode_pdev); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 6024feb98d29..070426b9895f 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -339,16 +339,9 @@ static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci, static enum ucode_state apply_microcode_early(struct ucode_cpu_info *uci) { struct microcode_intel *mc = uci->mc; - enum ucode_state ret; - u32 cur_rev, date; + u32 cur_rev; - ret = __apply_microcode(uci, mc, &cur_rev); - if (ret == UCODE_UPDATED) { - date = mc->hdr.date; - pr_info_once("updated early: 0x%x -> 0x%x, date = %04x-%02x-%02x\n", - cur_rev, mc->hdr.rev, date & 0xffff, date >> 24, (date >> 16) & 0xff); - } - return ret; + return __apply_microcode(uci, mc, &cur_rev); } static __init bool load_builtin_intel_microcode(struct cpio_data *cp) @@ -413,13 +406,17 @@ static int __init save_builtin_microcode(void) early_initcall(save_builtin_microcode); /* Load microcode on BSP from initrd or builtin blobs */ -void __init load_ucode_intel_bsp(void) +void __init load_ucode_intel_bsp(struct early_load_data *ed) { struct ucode_cpu_info uci; + ed->old_rev = intel_get_microcode_revision(); + uci.mc = get_microcode_blob(&uci, false); if (uci.mc && apply_microcode_early(&uci) == UCODE_UPDATED) ucode_patch_va = UCODE_BSP_LOADED; + + ed->new_rev = uci.cpu_sig.rev; } void load_ucode_intel_ap(void) diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index f8047b12329a..21776c529fa9 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -37,6 +37,12 @@ struct microcode_ops { use_nmi : 1; }; +struct early_load_data { + u32 old_rev; + u32 new_rev; +}; + +extern struct early_load_data early_data; extern struct ucode_cpu_info ucode_cpu_info[]; struct cpio_data find_microcode_in_initrd(const char *path); @@ -92,14 +98,14 @@ extern bool dis_ucode_ldr; extern bool force_minrev; #ifdef CONFIG_CPU_SUP_AMD -void load_ucode_amd_bsp(unsigned int family); +void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family); void load_ucode_amd_ap(unsigned int family); int save_microcode_in_initrd_amd(unsigned int family); void reload_ucode_amd(unsigned int cpu); struct microcode_ops *init_amd_microcode(void); void exit_amd_microcode(void); #else /* CONFIG_CPU_SUP_AMD */ -static inline void load_ucode_amd_bsp(unsigned int family) { } +static inline void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family) { } static inline void load_ucode_amd_ap(unsigned int family) { } static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } static inline void reload_ucode_amd(unsigned int cpu) { } @@ -108,12 +114,12 @@ static inline void exit_amd_microcode(void) { } #endif /* !CONFIG_CPU_SUP_AMD */ #ifdef CONFIG_CPU_SUP_INTEL -void load_ucode_intel_bsp(void); +void load_ucode_intel_bsp(struct early_load_data *ed); void load_ucode_intel_ap(void); void reload_ucode_intel(void); struct microcode_ops *init_intel_microcode(void); #else /* CONFIG_CPU_SUP_INTEL */ -static inline void load_ucode_intel_bsp(void) { } +static inline void load_ucode_intel_bsp(struct early_load_data *ed) { } static inline void load_ucode_intel_ap(void) { } static inline void reload_ucode_intel(void) { } static inline struct microcode_ops *init_intel_microcode(void) { return NULL; } -- cgit v1.2.3 From 4711b7b8f99583f6105a33e91f106125134beacb Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 30 Oct 2023 11:41:33 +0100 Subject: s390/pai: cleanup event initialization Setting event::hw.last_tag to zero is not necessary. The memory for each event is dynamically allocated by the kernel common code and initialized to zero already. Remove this unnecessary assignment. Move the comment to function paicrypt_start() for clarification. Suggested-by: Sumanth Korikkar Acked-by: Sumanth Korikkar Signed-off-by: Thomas Richter Signed-off-by: Alexander Gordeev --- arch/s390/kernel/perf_pai_crypto.c | 11 +++++------ arch/s390/kernel/perf_pai_ext.c | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 77fd24e6cbb6..39a91b00438a 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -279,12 +279,6 @@ static int paicrypt_event_init(struct perf_event *event) if (IS_ERR(cpump)) return PTR_ERR(cpump); - /* Event initialization sets last_tag to 0. When later on the events - * are deleted and re-added, do not reset the event count value to zero. - * Events are added, deleted and re-added when 2 or more events - * are active at the same time. - */ - event->hw.last_tag = 0; event->destroy = paicrypt_event_destroy; if (a->sample_period) { @@ -318,6 +312,11 @@ static void paicrypt_start(struct perf_event *event, int flags) { u64 sum; + /* Event initialization sets last_tag to 0. When later on the events + * are deleted and re-added, do not reset the event count value to zero. + * Events are added, deleted and re-added when 2 or more events + * are active at the same time. + */ if (!event->hw.last_tag) { event->hw.last_tag = 1; sum = paicrypt_getall(event); /* Get current value */ diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 8ba0f1a3a39d..e7013a2e8960 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -260,7 +260,6 @@ static int paiext_event_init(struct perf_event *event) rc = paiext_alloc(a, event); if (rc) return rc; - event->hw.last_tag = 0; event->destroy = paiext_event_destroy; if (a->sample_period) { -- cgit v1.2.3 From 673752a839694133a328610fcbc54f3d59ae87f3 Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Wed, 8 Nov 2023 18:18:52 +0100 Subject: s390/ipl: add missing IPL_TYPE_ECKD_DUMP case to ipl_init() Add missing IPL_TYPE_ECKD_DUMP case to ipl_init() creating ECKD ipl device attribute group similar to IPL_TYPE_ECKD case. Commit e2d2a2968f2a ("s390/ipl: add eckd dump support") should have had it from the beginning. Fixes: e2d2a2968f2a ("s390/ipl: add eckd dump support") Signed-off-by: Mikhail Zaslonko Reviewed-by: Sven Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/kernel/ipl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index cc364fce6aa9..ba75f6bee774 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -666,6 +666,7 @@ static int __init ipl_init(void) &ipl_ccw_attr_group_lpar); break; case IPL_TYPE_ECKD: + case IPL_TYPE_ECKD_DUMP: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group); break; case IPL_TYPE_FCP: -- cgit v1.2.3 From 0a9ace1117bbaa25687468af703b472235f5c210 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 15 Nov 2023 11:39:02 +0100 Subject: s390: remove odd comment In the meantime hopefully most people got used to forward declarations, therefore remove the explanation. Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/processor.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index dc17896a001a..c15eadbb9983 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -228,7 +228,6 @@ typedef struct thread_struct thread_struct; execve_tail(); \ } while (0) -/* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; struct seq_file; -- cgit v1.2.3 From acfa60dbe03802d6afd28401aa47801270e82021 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 17 Nov 2023 13:14:22 +0000 Subject: arm64: mm: Fix "rodata=on" when CONFIG_RODATA_FULL_DEFAULT_ENABLED=y When CONFIG_RODATA_FULL_DEFAULT_ENABLED=y, passing "rodata=on" on the kernel command-line (rather than "rodata=full") should turn off the "full" behaviour, leaving writable linear aliases of read-only kernel memory. Unfortunately, the option has no effect in this situation and the only way to disable the "rodata=full" behaviour is to disable rodata protection entirely by passing "rodata=off". Fix this by parsing the "on" and "off" options in the arch code, additionally enforcing that 'rodata_full' cannot be set without also setting 'rodata_enabled', allowing us to simplify a couple of checks in the process. Fixes: 2e8cff0a0eee ("arm64: fix rodata=full") Cc: Ard Biesheuvel Cc: Mark Rutland Signed-off-by: Will Deacon Reviewed-by: "Russell King (Oracle)" Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20231117131422.29663-1-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/setup.h | 17 +++++++++++++++-- arch/arm64/mm/pageattr.c | 7 +++---- 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h index f4af547ef54c..2e4d7da74fb8 100644 --- a/arch/arm64/include/asm/setup.h +++ b/arch/arm64/include/asm/setup.h @@ -21,9 +21,22 @@ static inline bool arch_parse_debug_rodata(char *arg) extern bool rodata_enabled; extern bool rodata_full; - if (arg && !strcmp(arg, "full")) { + if (!arg) + return false; + + if (!strcmp(arg, "full")) { + rodata_enabled = rodata_full = true; + return true; + } + + if (!strcmp(arg, "off")) { + rodata_enabled = rodata_full = false; + return true; + } + + if (!strcmp(arg, "on")) { rodata_enabled = true; - rodata_full = true; + rodata_full = false; return true; } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 8e2017ba5f1b..924843f1f661 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -29,8 +29,8 @@ bool can_set_direct_map(void) * * KFENCE pool requires page-granular mapping if initialized late. */ - return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() || - arm64_kfence_can_set_direct_map(); + return rodata_full || debug_pagealloc_enabled() || + arm64_kfence_can_set_direct_map(); } static int change_page_range(pte_t *ptep, unsigned long addr, void *data) @@ -105,8 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, * If we are manipulating read-only permissions, apply the same * change to the linear mapping of the pages that back this VM area. */ - if (rodata_enabled && - rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || + if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || pgprot_val(clear_mask) == PTE_RDONLY)) { for (i = 0; i < area->nr_pages; i++) { __change_memory_common((u64)page_address(area->pages[i]), -- cgit v1.2.3 From 7bf9a6b46549852a37e6d07e52c601c3c706b562 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 22 Nov 2023 15:07:41 -0800 Subject: arm/xen: fix xen_vcpu_info allocation alignment xen_vcpu_info is a percpu area than needs to be mapped by Xen. Currently, it could cross a page boundary resulting in Xen being unable to map it: [ 0.567318] kernel BUG at arch/arm64/xen/../../arm/xen/enlighten.c:164! [ 0.574002] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Fix the issue by using __alloc_percpu and requesting alignment for the memory allocation. Signed-off-by: Stefano Stabellini Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2311221501340.2053963@ubuntu-linux-20-04-desktop Fixes: 24d5373dda7c ("arm/xen: Use alloc_percpu rather than __alloc_percpu") Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- arch/arm/xen/enlighten.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 9afdc4c4a5dc..a395b6c0aae2 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -484,7 +484,8 @@ static int __init xen_guest_init(void) * for secondary CPUs as they are brought up. * For uniformity we use VCPUOP_register_vcpu_info even on cpu0. */ - xen_vcpu_info = alloc_percpu(struct vcpu_info); + xen_vcpu_info = __alloc_percpu(sizeof(struct vcpu_info), + 1 << fls(sizeof(struct vcpu_info) - 1)); if (xen_vcpu_info == NULL) return -ENOMEM; -- cgit v1.2.3 From c0a8574204054effad6ac83cc75c02576e2985fe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 19 Nov 2023 14:32:34 +0900 Subject: arm64: add dependency between vmlinuz.efi and Image A common issue in Makefile is a race in parallel building. You need to be careful to prevent multiple threads from writing to the same file simultaneously. Commit 3939f3345050 ("ARM: 8418/1: add boot image dependencies to not generate invalid images") addressed such a bad scenario. A similar symptom occurs with the following command: $ make -j$(nproc) ARCH=arm64 Image vmlinuz.efi [ snip ] SORTTAB vmlinux OBJCOPY arch/arm64/boot/Image OBJCOPY arch/arm64/boot/Image AS arch/arm64/boot/zboot-header.o PAD arch/arm64/boot/vmlinux.bin GZIP arch/arm64/boot/vmlinuz OBJCOPY arch/arm64/boot/vmlinuz.o LD arch/arm64/boot/vmlinuz.efi.elf OBJCOPY arch/arm64/boot/vmlinuz.efi The log "OBJCOPY arch/arm64/boot/Image" is displayed twice. It indicates that two threads simultaneously enter arch/arm64/boot/ and write to arch/arm64/boot/Image. It occasionally leads to a build failure: $ make -j$(nproc) ARCH=arm64 Image vmlinuz.efi [ snip ] SORTTAB vmlinux OBJCOPY arch/arm64/boot/Image PAD arch/arm64/boot/vmlinux.bin truncate: Invalid number: 'arch/arm64/boot/vmlinux.bin' make[2]: *** [drivers/firmware/efi/libstub/Makefile.zboot:13: arch/arm64/boot/vmlinux.bin] Error 1 make[2]: *** Deleting file 'arch/arm64/boot/vmlinux.bin' make[1]: *** [arch/arm64/Makefile:163: vmlinuz.efi] Error 2 make[1]: *** Waiting for unfinished jobs.... make: *** [Makefile:234: __sub-make] Error 2 vmlinuz.efi depends on Image, but such a dependency is not specified in arch/arm64/Makefile. Signed-off-by: Masahiro Yamada Acked-by: Ard Biesheuvel Reviewed-by: SImon Glass Link: https://lore.kernel.org/r/20231119053234.2367621-1-masahiroy@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 4bd85cc0d32b..9a2d3723cd0f 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -158,7 +158,7 @@ endif all: $(notdir $(KBUILD_IMAGE)) - +vmlinuz.efi: Image Image vmlinuz.efi: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -- cgit v1.2.3 From e11d4cccd094a7cd4696c8c42e672c76c092dad5 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 15:37:50 +0100 Subject: parisc: Mark ex_table entries 32-bit aligned in assembly.h Add an align statement to tell the linker that all ex_table entries and as such the whole ex_table section should be 32-bit aligned in vmlinux and modules. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/assembly.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 75677b526b2b..74d17d7e759d 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -574,6 +574,7 @@ */ #define ASM_EXCEPTIONTABLE_ENTRY(fault_addr, except_addr) \ .section __ex_table,"aw" ! \ + .align 4 ! \ .word (fault_addr - .), (except_addr - .) ! \ .previous -- cgit v1.2.3 From a80aeb86542a50aa8521729ea4cc731ee7174f03 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 15:39:03 +0100 Subject: parisc: Mark ex_table entries 32-bit aligned in uaccess.h Add an align statement to tell the linker that all ex_table entries and as such the whole ex_table section should be 32-bit aligned in vmlinux and modules. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/uaccess.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 2bf660eabe42..4165079898d9 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -41,6 +41,7 @@ struct exception_table_entry { #define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ ".section __ex_table,\"aw\"\n" \ + ".align 4\n" \ ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ ".previous\n" -- cgit v1.2.3 From 33f806da2df68606f77d7b892cd1298ba3d463e8 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 23:10:20 +0100 Subject: parisc: Mark altinstructions read-only and 32-bit aligned Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/alternative.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/alternative.h b/arch/parisc/include/asm/alternative.h index 1ed45fd085d3..1eb488f25b83 100644 --- a/arch/parisc/include/asm/alternative.h +++ b/arch/parisc/include/asm/alternative.h @@ -34,7 +34,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* Alternative SMP implementation. */ #define ALTERNATIVE(cond, replacement) "!0:" \ - ".section .altinstructions, \"aw\" !" \ + ".section .altinstructions, \"a\" !" \ + ".align 4 !" \ ".word (0b-4-.) !" \ ".hword 1, " __stringify(cond) " !" \ ".word " __stringify(replacement) " !" \ @@ -44,7 +45,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* to replace one single instructions by a new instruction */ #define ALTERNATIVE(from, to, cond, replacement)\ - .section .altinstructions, "aw" ! \ + .section .altinstructions, "a" ! \ + .align 4 ! \ .word (from - .) ! \ .hword (to - from)/4, cond ! \ .word replacement ! \ @@ -52,7 +54,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* to replace multiple instructions by new code */ #define ALTERNATIVE_CODE(from, num_instructions, cond, new_instr_ptr)\ - .section .altinstructions, "aw" ! \ + .section .altinstructions, "a" ! \ + .align 4 ! \ .word (from - .) ! \ .hword -num_instructions, cond ! \ .word (new_instr_ptr - .) ! \ -- cgit v1.2.3 From 07eecff8ae78df7f28800484d31337e1f9bfca3a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 23:14:39 +0100 Subject: parisc: Mark jump_table naturally aligned The jump_table stores two 32-bit words and one 32- (on 32-bit kernel) or one 64-bit word (on 64-bit kernel). Ensure that the last word is always 64-bit aligned on a 64-bit kernel by aligning the whole structure on sizeof(long). Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/jump_label.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h index af2a598bc0f8..94428798b6aa 100644 --- a/arch/parisc/include/asm/jump_label.h +++ b/arch/parisc/include/asm/jump_label.h @@ -15,10 +15,12 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran asm_volatile_goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" + ".align %1\n\t" ".word 1b - ., %l[l_yes] - .\n\t" __stringify(ASM_ULONG_INSN) " %c0 - .\n\t" ".popsection\n\t" - : : "i" (&((char *)key)[branch]) : : l_yes); + : : "i" (&((char *)key)[branch]), "i" (sizeof(long)) + : : l_yes); return false; l_yes: @@ -30,10 +32,12 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool asm_volatile_goto("1:\n\t" "b,n %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" + ".align %1\n\t" ".word 1b - ., %l[l_yes] - .\n\t" __stringify(ASM_ULONG_INSN) " %c0 - .\n\t" ".popsection\n\t" - : : "i" (&((char *)key)[branch]) : : l_yes); + : : "i" (&((char *)key)[branch]), "i" (sizeof(long)) + : : l_yes); return false; l_yes: -- cgit v1.2.3 From b28fc0d8739c03e7b6c44914a9d00d4c6dddc0ea Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 25 Nov 2023 09:11:56 +0100 Subject: parisc: Mark lock_aligned variables 16-byte aligned on SMP On parisc we need 16-byte alignment for variables which are used for locking. Mark the __lock_aligned attribute acordingly so that the .data..lock_aligned section will get that alignment in the generated object files. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/ldcw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index ee9e071859b2..47ebc4c91eaf 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -55,7 +55,7 @@ }) #ifdef CONFIG_SMP -# define __lock_aligned __section(".data..lock_aligned") +# define __lock_aligned __section(".data..lock_aligned") __aligned(16) #endif #endif /* __PARISC_LDCW_H */ -- cgit v1.2.3 From c9fcb2b65c2849e8ff3be23fd8828312fb68dc19 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 25 Nov 2023 09:16:02 +0100 Subject: parisc: Ensure 32-bit alignment on parisc unwind section Make sure the .PARISC.unwind section will be 32-bit aligned. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 58694d1989c2..548051b0b4af 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -130,6 +130,7 @@ SECTIONS RO_DATA(8) /* unwind info */ + . = ALIGN(4); .PARISC.unwind : { __start___unwind = .; *(.PARISC.unwind) -- cgit v1.2.3 From fe76a1349f235969381832c83d703bc911021eb6 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 23:30:49 +0100 Subject: parisc: Use natural CPU alignment for bug_table Make sure that the __bug_table section gets 32- or 64-bit aligned, depending if a 32- or 64-bit kernel is being built. Mark it non-writeable and use .blockz instead of the .org assembler directive to pad the struct. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/bug.h | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 4b6d60b94124..b9cad0bb4461 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -28,13 +28,15 @@ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align %4\n" \ "2:\t" ASM_WORD_INSN "1b, %c0\n" \ - "\t.short %c1, %c2\n" \ - "\t.org 2b+%c3\n" \ + "\t.short %1, %2\n" \ + "\t.blockz %3-2*%4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ - "i" (0), "i" (sizeof(struct bug_entry)) ); \ + "i" (0), "i" (sizeof(struct bug_entry)), \ + "i" (sizeof(long)) ); \ unreachable(); \ } while(0) @@ -51,27 +53,31 @@ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align %4\n" \ "2:\t" ASM_WORD_INSN "1b, %c0\n" \ - "\t.short %c1, %c2\n" \ - "\t.org 2b+%c3\n" \ + "\t.short %1, %2\n" \ + "\t.blockz %3-2*%4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)) ); \ + "i" (sizeof(struct bug_entry)), \ + "i" (sizeof(long)) ); \ } while(0) #else #define __WARN_FLAGS(flags) \ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align %2\n" \ "2:\t" ASM_WORD_INSN "1b\n" \ - "\t.short %c0\n" \ - "\t.org 2b+%c1\n" \ + "\t.short %0\n" \ + "\t.blockz %1-%2-2\n" \ "\t.popsection" \ : : "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)) ); \ + "i" (sizeof(struct bug_entry)), \ + "i" (sizeof(long)) ); \ } while(0) #endif -- cgit v1.2.3 From e5f3e299a2b1e9c3ece24a38adfc089aef307e8a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 23 Nov 2023 20:28:27 +0100 Subject: parisc: Drop the HP-UX ENOSYM and EREMOTERELEASE error codes Those return codes are only defined for the parisc architecture and are leftovers from when we wanted to be HP-UX compatible. They are not returned by any Linux kernel syscall but do trigger problems with the glibc strerrorname_np() and strerror() functions as reported in glibc issue #31080. There is no need to keep them, so simply remove them. Signed-off-by: Helge Deller Reported-by: Bruno Haible Closes: https://sourceware.org/bugzilla/show_bug.cgi?id=31080 Cc: stable@vger.kernel.org --- arch/parisc/include/uapi/asm/errno.h | 2 -- lib/errname.c | 6 ------ tools/arch/parisc/include/uapi/asm/errno.h | 2 -- 3 files changed, 10 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h index 87245c584784..8d94739d75c6 100644 --- a/arch/parisc/include/uapi/asm/errno.h +++ b/arch/parisc/include/uapi/asm/errno.h @@ -75,7 +75,6 @@ /* We now return you to your regularly scheduled HPUX. */ -#define ENOSYM 215 /* symbol does not exist in executable */ #define ENOTSOCK 216 /* Socket operation on non-socket */ #define EDESTADDRREQ 217 /* Destination address required */ #define EMSGSIZE 218 /* Message too long */ @@ -101,7 +100,6 @@ #define ETIMEDOUT 238 /* Connection timed out */ #define ECONNREFUSED 239 /* Connection refused */ #define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ -#define EREMOTERELEASE 240 /* Remote peer released connection */ #define EHOSTDOWN 241 /* Host is down */ #define EHOSTUNREACH 242 /* No route to host */ diff --git a/lib/errname.c b/lib/errname.c index dd1b998552cd..4f9112b38f3a 100644 --- a/lib/errname.c +++ b/lib/errname.c @@ -111,9 +111,6 @@ static const char *names_0[] = { E(ENOSPC), E(ENOSR), E(ENOSTR), -#ifdef ENOSYM - E(ENOSYM), -#endif E(ENOSYS), E(ENOTBLK), E(ENOTCONN), @@ -144,9 +141,6 @@ static const char *names_0[] = { #endif E(EREMOTE), E(EREMOTEIO), -#ifdef EREMOTERELEASE - E(EREMOTERELEASE), -#endif E(ERESTART), E(ERFKILL), E(EROFS), diff --git a/tools/arch/parisc/include/uapi/asm/errno.h b/tools/arch/parisc/include/uapi/asm/errno.h index 87245c584784..8d94739d75c6 100644 --- a/tools/arch/parisc/include/uapi/asm/errno.h +++ b/tools/arch/parisc/include/uapi/asm/errno.h @@ -75,7 +75,6 @@ /* We now return you to your regularly scheduled HPUX. */ -#define ENOSYM 215 /* symbol does not exist in executable */ #define ENOTSOCK 216 /* Socket operation on non-socket */ #define EDESTADDRREQ 217 /* Destination address required */ #define EMSGSIZE 218 /* Message too long */ @@ -101,7 +100,6 @@ #define ETIMEDOUT 238 /* Connection timed out */ #define ECONNREFUSED 239 /* Connection refused */ #define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ -#define EREMOTERELEASE 240 /* Remote peer released connection */ #define EHOSTDOWN 241 /* Host is down */ #define EHOSTUNREACH 242 /* No route to host */ -- cgit v1.2.3 From 43266838515d30dc0c45d5c7e6e7edacee6cce92 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 23 Nov 2023 21:57:19 +0100 Subject: parisc: Reduce size of the bug_table on 64-bit kernel by half Enable GENERIC_BUG_RELATIVE_POINTERS which will store 32-bit relative offsets to the bug address and the source file name instead of 64-bit absolute addresses. This effectively reduces the size of the bug_table[] array by half on 64-bit kernels. Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 7 +++++-- arch/parisc/include/asm/bug.h | 34 +++++++++++++++++----------------- 2 files changed, 22 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index a7c9c0e69e5a..d14ccc948a29 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -115,9 +115,12 @@ config ARCH_HAS_ILOG2_U64 default n config GENERIC_BUG - bool - default y + def_bool y depends on BUG + select GENERIC_BUG_RELATIVE_POINTERS if 64BIT + +config GENERIC_BUG_RELATIVE_POINTERS + bool config GENERIC_HWEIGHT bool diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index b9cad0bb4461..1641ff9a8b83 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -17,26 +17,27 @@ #define PARISC_BUG_BREAK_ASM "break 0x1f, 0x1fff" #define PARISC_BUG_BREAK_INSN 0x03ffe01f /* PARISC_BUG_BREAK_ASM */ -#if defined(CONFIG_64BIT) -#define ASM_WORD_INSN ".dword\t" +#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS +# define __BUG_REL(val) ".word " __stringify(val) " - ." #else -#define ASM_WORD_INSN ".word\t" +# define __BUG_REL(val) ".word " __stringify(val) #endif + #ifdef CONFIG_DEBUG_BUGVERBOSE #define BUG() \ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ - "\t.align %4\n" \ - "2:\t" ASM_WORD_INSN "1b, %c0\n" \ + "\t.align 4\n" \ + "2:\t" __BUG_REL(1b) "\n" \ + "\t" __BUG_REL(%c0) "\n" \ "\t.short %1, %2\n" \ - "\t.blockz %3-2*%4-2*2\n" \ + "\t.blockz %3-2*4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ - "i" (0), "i" (sizeof(struct bug_entry)), \ - "i" (sizeof(long)) ); \ + "i" (0), "i" (sizeof(struct bug_entry)) ); \ unreachable(); \ } while(0) @@ -54,15 +55,15 @@ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ - "\t.align %4\n" \ - "2:\t" ASM_WORD_INSN "1b, %c0\n" \ + "\t.align 4\n" \ + "2:\t" __BUG_REL(1b) "\n" \ + "\t" __BUG_REL(%c0) "\n" \ "\t.short %1, %2\n" \ - "\t.blockz %3-2*%4-2*2\n" \ + "\t.blockz %3-2*4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)), \ - "i" (sizeof(long)) ); \ + "i" (sizeof(struct bug_entry)) ); \ } while(0) #else #define __WARN_FLAGS(flags) \ @@ -71,13 +72,12 @@ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ "\t.align %2\n" \ - "2:\t" ASM_WORD_INSN "1b\n" \ + "2:\t" __BUG_REL(1b) "\n" \ "\t.short %0\n" \ - "\t.blockz %1-%2-2\n" \ + "\t.blockz %1-4-2\n" \ "\t.popsection" \ : : "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)), \ - "i" (sizeof(long)) ); \ + "i" (sizeof(struct bug_entry)) ); \ } while(0) #endif -- cgit v1.2.3 From db2832309a82b9acc4b8cc33a1831d36507ec13e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 24 Nov 2023 08:48:52 +0100 Subject: x86/xen: fix percpu vcpu_info allocation Today the percpu struct vcpu_info is allocated via DEFINE_PER_CPU(), meaning that it could cross a page boundary. In this case registering it with the hypervisor will fail, resulting in a panic(). This can easily be fixed by using DEFINE_PER_CPU_ALIGNED() instead, as struct vcpu_info is guaranteed to have a size of 64 bytes, matching the cache line size of x86 64-bit processors (Xen doesn't support 32-bit processors). Fixes: 5ead97c84fa7 ("xen: Core Xen implementation") Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20231124074852.25161-1-jgross@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 6 +++++- arch/x86/xen/xen-ops.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0337392a3121..3c61bb98c10e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,9 +33,12 @@ EXPORT_SYMBOL_GPL(hypercall_page); * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info * but during boot it is switched to point to xen_vcpu_info. * The pointer is used in xen_evtchn_do_upcall to acknowledge pending events. + * Make sure that xen_vcpu_info doesn't cross a page boundary by making it + * cache-line aligned (the struct is guaranteed to have a size of 64 bytes, + * which matches the cache line size of 64-bit x86 processors). */ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); -DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); +DEFINE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info); /* Linux <-> Xen vCPU id mapping */ DEFINE_PER_CPU(uint32_t, xen_vcpu_id); @@ -160,6 +163,7 @@ void xen_vcpu_setup(int cpu) int err; struct vcpu_info *vcpup; + BUILD_BUG_ON(sizeof(*vcpup) > SMP_CACHE_BYTES); BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); /* diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 408a2aa66c69..a87ab36889e7 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -21,7 +21,7 @@ extern void *xen_initial_gdt; struct trap_info; void xen_copy_trap_info(struct trap_info *traps); -DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info); +DECLARE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info); DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); -- cgit v1.2.3 From 5e1d824f9a283cbf90f25241b66d1f69adb3835b Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Sun, 19 Nov 2023 09:18:02 -0600 Subject: powerpc: Don't clobber f0/vs0 during fp|altivec register save During floating point and vector save to thread data f0/vs0 are clobbered by the FPSCR/VSCR store routine. This has been obvserved to lead to userspace register corruption and application data corruption with io-uring. Fix it by restoring f0/vs0 after FPSCR/VSCR store has completed for all the FP, altivec, VMX register save paths. Tested under QEMU in kvm mode, running on a Talos II workstation with dual POWER9 DD2.2 CPUs. Additional detail (mpe): Typically save_fpu() is called from __giveup_fpu() which saves the FP regs and also *turns off FP* in the tasks MSR, meaning the kernel will reload the FP regs from the thread struct before letting the task use FP again. So in that case save_fpu() is free to clobber f0 because the FP regs no longer hold live values for the task. There is another case though, which is the path via: sys_clone() ... copy_process() dup_task_struct() arch_dup_task_struct() flush_all_to_thread() save_all() That path saves the FP regs but leaves them live. That's meant as an optimisation for a process that's using FP/VSX and then calls fork(), leaving the regs live means the parent process doesn't have to take a fault after the fork to get its FP regs back. The optimisation was added in commit 8792468da5e1 ("powerpc: Add the ability to save FPU without giving it up"). That path does clobber f0, but f0 is volatile across function calls, and typically programs reach copy_process() from userspace via a syscall wrapper function. So in normal usage f0 being clobbered across a syscall doesn't cause visible data corruption. But there is now a new path, because io-uring can call copy_process() via create_io_thread() from the signal handling path. That's OK if the signal is handled as part of syscall return, but it's not OK if the signal is handled due to some other interrupt. That path is: interrupt_return_srr_user() interrupt_exit_user_prepare() interrupt_exit_user_prepare_main() do_notify_resume() get_signal() task_work_run() create_worker_cb() create_io_worker() copy_process() dup_task_struct() arch_dup_task_struct() flush_all_to_thread() save_all() if (tsk->thread.regs->msr & MSR_FP) save_fpu() # f0 is clobbered and potentially live in userspace Note the above discussion applies equally to save_altivec(). Fixes: 8792468da5e1 ("powerpc: Add the ability to save FPU without giving it up") Cc: stable@vger.kernel.org # v4.6+ Closes: https://lore.kernel.org/all/480932026.45576726.1699374859845.JavaMail.zimbra@raptorengineeringinc.com/ Closes: https://lore.kernel.org/linuxppc-dev/480221078.47953493.1700206777956.JavaMail.zimbra@raptorengineeringinc.com/ Tested-by: Timothy Pearson Tested-by: Jens Axboe Signed-off-by: Timothy Pearson [mpe: Reword change log to describe exact path of corruption & other minor tweaks] Signed-off-by: Michael Ellerman Link: https://msgid.link/1921539696.48534988.1700407082933.JavaMail.zimbra@raptorengineeringinc.com --- arch/powerpc/kernel/fpu.S | 13 +++++++++++++ arch/powerpc/kernel/vector.S | 2 ++ 2 files changed, 15 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 6a9acfb690c9..2f8f3f93cbb6 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -23,6 +23,15 @@ #include #ifdef CONFIG_VSX +#define __REST_1FPVSR(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_FPR(n,base); \ + b 3f; \ +2: REST_VSR(n,c,base); \ +3: + #define __REST_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: SAVE_32VSRS(n,c,base); \ 3: #else +#define __REST_1FPVSR(n,b,base) REST_FPR(n, base) #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) #endif +#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base) #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state) SAVE_32FPVSRS(0, R4, R3) mffs fr0 stfd fr0,FPSTATE_FPSCR(r3) + REST_1FPVSR(0, R4, R3) blr EXPORT_SYMBOL(store_fp_state) @@ -138,4 +150,5 @@ _GLOBAL(save_fpu) 2: SAVE_32FPVSRS(0, R4, R6) mffs fr0 stfd fr0,FPSTATE_FPSCR(r6) + REST_1FPVSR(0, R4, R6) blr diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 4094e4c4c77a..80b3f6e476b6 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -33,6 +33,7 @@ _GLOBAL(store_vr_state) mfvscr v0 li r4, VRSTATE_VSCR stvx v0, r4, r3 + lvx v0, 0, r3 blr EXPORT_SYMBOL(store_vr_state) @@ -109,6 +110,7 @@ _GLOBAL(save_altivec) mfvscr v0 li r4,VRSTATE_VSCR stvx v0,r4,r7 + lvx v0,0,r7 blr #ifdef CONFIG_VSX -- cgit v1.2.3 From dc158d23b33df9033bcc8e7117e8591dd2f9d125 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 22 Nov 2023 12:58:11 +1000 Subject: KVM: PPC: Book3S HV: Fix KVM_RUN clobbering FP/VEC user registers Before running a guest, the host process (e.g., QEMU) FP/VEC registers are saved if they were being used, similarly to when the kernel uses FP registers. The guest values are then loaded into regs, and the host process registers will be restored lazily when it uses FP/VEC. KVM HV has a bug here: the host process registers do get saved, but the user MSR bits remain enabled, which indicates the registers are valid for the process. After they are clobbered by running the guest, this valid indication causes the host process to take on the FP/VEC register values of the guest. Fixes: 34e119c96b2b ("KVM: PPC: Book3S HV P9: Reduce mtmsrd instructions required to save host SPRs") Cc: stable@vger.kernel.org # v5.17+ Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://msgid.link/20231122025811.2973-1-npiggin@gmail.com --- arch/powerpc/kernel/process.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 392404688cec..9452a54d356c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1198,11 +1198,11 @@ void kvmppc_save_user_regs(void) usermsr = current->thread.regs->msr; + /* Caller has enabled FP/VEC/VSX/TM in MSR */ if (usermsr & MSR_FP) - save_fpu(current); - + __giveup_fpu(current); if (usermsr & MSR_VEC) - save_altivec(current); + __giveup_altivec(current); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (usermsr & MSR_TM) { -- cgit v1.2.3 From f5259997f3e8d6edfcc2daf5b2c0b34f074d7bc0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 27 Nov 2023 13:00:51 +0100 Subject: arm64: Avoid enabling KPTI unnecessarily Commit 42c5a3b04bf6 refactored the KPTI init code in a way that results in the use of non-global kernel mappings even on systems that have no need for it, and even when KPTI has been disabled explicitly via the command line. Ensure that this only happens when we have decided (based on the detected system-wide CPU features) that KPTI should be enabled. Fixes: 42c5a3b04bf6 ("arm64: Split kpti_install_ng_mappings()") Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20231127120049.2258650-6-ardb@google.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 646591c67e7a..91d2d6714969 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1839,6 +1839,10 @@ static int __init __kpti_install_ng_mappings(void *__unused) static void __init kpti_install_ng_mappings(void) { + /* Check whether KPTI is going to be used */ + if (!cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0)) + return; + /* * We don't need to rewrite the page-tables if either we've done * it already or we have KASLR enabled and therefore have not -- cgit v1.2.3