diff options
Diffstat (limited to 'arch/s390/kernel')
37 files changed, 411 insertions, 516 deletions
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c index 88f0b91d7a73..6252b7d115dd 100644 --- a/arch/s390/kernel/abs_lowcore.c +++ b/arch/s390/kernel/abs_lowcore.c @@ -5,7 +5,6 @@ #include <asm/sections.h> unsigned long __bootdata_preserved(__abs_lowcore); -int __bootdata_preserved(relocate_lowcore); int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc) { diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 8d5d0de35de0..90c0e6408992 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,41 +1,90 @@ // SPDX-License-Identifier: GPL-2.0 +#ifndef pr_fmt +#define pr_fmt(fmt) "alt: " fmt +#endif + #include <linux/uaccess.h> +#include <linux/printk.h> #include <asm/nospec-branch.h> #include <asm/abs_lowcore.h> #include <asm/alternative.h> #include <asm/facility.h> +#include <asm/sections.h> +#include <asm/machine.h> + +#ifndef a_debug +#define a_debug pr_debug +#endif + +#ifndef __kernel_va +#define __kernel_va(x) (void *)(x) +#endif + +unsigned long __bootdata_preserved(machine_features[1]); + +struct alt_debug { + unsigned long facilities[MAX_FACILITY_BIT / BITS_PER_LONG]; + unsigned long mfeatures[MAX_MFEATURE_BIT / BITS_PER_LONG]; + int spec; +}; + +static struct alt_debug __bootdata_preserved(alt_debug); + +static void alternative_dump(u8 *old, u8 *new, unsigned int len, unsigned int type, unsigned int data) +{ + char oinsn[33], ninsn[33]; + unsigned long kptr; + unsigned int pos; + + for (pos = 0; pos < len && 2 * pos < sizeof(oinsn) - 3; pos++) + hex_byte_pack(&oinsn[2 * pos], old[pos]); + oinsn[2 * pos] = 0; + for (pos = 0; pos < len && 2 * pos < sizeof(ninsn) - 3; pos++) + hex_byte_pack(&ninsn[2 * pos], new[pos]); + ninsn[2 * pos] = 0; + kptr = (unsigned long)__kernel_va(old); + a_debug("[%d/%3d] %016lx: %s -> %s\n", type, data, kptr, oinsn, ninsn); +} void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx) { - u8 *instr, *replacement; + struct alt_debug *d; struct alt_instr *a; - bool replace; + bool debug, replace; + u8 *old, *new; /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. */ + d = &alt_debug; for (a = start; a < end; a++) { if (!(a->ctx & ctx)) continue; switch (a->type) { case ALT_TYPE_FACILITY: replace = test_facility(a->data); + debug = __test_facility(a->data, d->facilities); + break; + case ALT_TYPE_FEATURE: + replace = test_machine_feature(a->data); + debug = __test_machine_feature(a->data, d->mfeatures); break; case ALT_TYPE_SPEC: replace = nobp_enabled(); - break; - case ALT_TYPE_LOWCORE: - replace = have_relocated_lowcore(); + debug = d->spec; break; default: replace = false; + debug = false; } if (!replace) continue; - instr = (u8 *)&a->instr_offset + a->instr_offset; - replacement = (u8 *)&a->repl_offset + a->repl_offset; - s390_kernel_write(instr, replacement, a->instrlen); + old = (u8 *)&a->instr_offset + a->instr_offset; + new = (u8 *)&a->repl_offset + a->repl_offset; + if (debug) + alternative_dump(old, new, a->instrlen, a->type, a->data); + s390_kernel_write(old, new, a->instrlen); } } diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 36709112ae7a..49bb197c8c81 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -49,6 +49,7 @@ int main(void) OFFSET(__PT_R14, pt_regs, gprs[14]); OFFSET(__PT_R15, pt_regs, gprs[15]); OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2); + OFFSET(__PT_INT_CODE, pt_regs, int_code); OFFSET(__PT_FLAGS, pt_regs, flags); OFFSET(__PT_CR1, pt_regs, cr1); OFFSET(__PT_LAST_BREAK, pt_regs, last_break); @@ -76,7 +77,8 @@ int main(void) OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr); OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code); OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc); - OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code); + OFFSET(__LC_PGM_CODE, lowcore, pgm_code); + OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_int_code); OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code); OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num); OFFSET(__LC_PER_CODE, lowcore, per_code); @@ -122,7 +124,6 @@ int main(void) OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer); OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); OFFSET(__LC_INT_CLOCK, lowcore, int_clock); - OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock); OFFSET(__LC_CURRENT, lowcore, current_task); OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c index bf983513dd33..03f3a1e52430 100644 --- a/arch/s390/kernel/cert_store.c +++ b/arch/s390/kernel/cert_store.c @@ -235,7 +235,7 @@ static int __diag320(unsigned long subcode, void *addr) { union register_pair rp = { .even = (unsigned long)addr, }; - asm volatile( + asm_inline volatile( " diag %[rp],%[subcode],0x320\n" "0: nopr %%r7\n" EX_TABLE(0b, 0b) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 276cb4c1e11b..4a981266b483 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -246,15 +246,6 @@ bool is_kdump_kernel(void) } EXPORT_SYMBOL_GPL(is_kdump_kernel); -static const char *nt_name(Elf64_Word type) -{ - const char *name = "LINUX"; - - if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) - name = KEXEC_CORE_NOTE_NAME; - return name; -} - /* * Initialize ELF note */ @@ -279,10 +270,8 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, return PTR_ADD(buf, len); } -static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) -{ - return nt_init_name(buf, type, desc, d_len, nt_name(type)); -} +#define nt_init(buf, type, desc) \ + nt_init_name(buf, NT_ ## type, &(desc), sizeof(desc), NN_ ## type) /* * Calculate the size of ELF note @@ -298,10 +287,7 @@ static size_t nt_size_name(int d_len, const char *name) return size; } -static inline size_t nt_size(Elf64_Word type, int d_len) -{ - return nt_size_name(d_len, nt_name(type)); -} +#define nt_size(type, desc) nt_size_name(sizeof(desc), NN_ ## type) /* * Fill ELF notes for one CPU with save area registers @@ -322,18 +308,16 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc)); memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs)); /* Create ELF notes for the CPU */ - ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus)); - ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset)); - ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer)); - ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp)); - ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg)); - ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs)); - ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix)); + ptr = nt_init(ptr, PRSTATUS, nt_prstatus); + ptr = nt_init(ptr, PRFPREG, nt_fpregset); + ptr = nt_init(ptr, S390_TIMER, sa->timer); + ptr = nt_init(ptr, S390_TODCMP, sa->todcmp); + ptr = nt_init(ptr, S390_TODPREG, sa->todpreg); + ptr = nt_init(ptr, S390_CTRS, sa->ctrs); + ptr = nt_init(ptr, S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - ptr = nt_init(ptr, NT_S390_VXRS_HIGH, - &sa->vxrs_high, sizeof(sa->vxrs_high)); - ptr = nt_init(ptr, NT_S390_VXRS_LOW, - &sa->vxrs_low, sizeof(sa->vxrs_low)); + ptr = nt_init(ptr, S390_VXRS_HIGH, sa->vxrs_high); + ptr = nt_init(ptr, S390_VXRS_LOW, sa->vxrs_low); } return ptr; } @@ -346,16 +330,16 @@ static size_t get_cpu_elf_notes_size(void) struct save_area *sa = NULL; size_t size; - size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus)); - size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t)); - size += nt_size(NT_S390_TIMER, sizeof(sa->timer)); - size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp)); - size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg)); - size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs)); - size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix)); + size = nt_size(PRSTATUS, struct elf_prstatus); + size += nt_size(PRFPREG, elf_fpregset_t); + size += nt_size(S390_TIMER, sa->timer); + size += nt_size(S390_TODCMP, sa->todcmp); + size += nt_size(S390_TODPREG, sa->todpreg); + size += nt_size(S390_CTRS, sa->ctrs); + size += nt_size(S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high)); - size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low)); + size += nt_size(S390_VXRS_HIGH, sa->vxrs_high); + size += nt_size(S390_VXRS_LOW, sa->vxrs_low); } return size; @@ -371,7 +355,7 @@ static void *nt_prpsinfo(void *ptr) memset(&prpsinfo, 0, sizeof(prpsinfo)); prpsinfo.pr_sname = 'R'; strcpy(prpsinfo.pr_fname, "vmlinux"); - return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo)); + return nt_init(ptr, PRPSINFO, prpsinfo); } /* @@ -610,7 +594,7 @@ static size_t get_elfcorehdr_size(int phdr_count) /* PT_NOTES */ size += sizeof(Elf64_Phdr); /* nt_prpsinfo */ - size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo)); + size += nt_size(PRPSINFO, struct elf_prpsinfo); /* regsets */ size += get_cpu_cnt() * get_cpu_elf_notes_size(); /* nt_vmcoreinfo */ diff --git a/arch/s390/kernel/diag/diag.c b/arch/s390/kernel/diag/diag.c index e15b8dee3228..56b862ba9be8 100644 --- a/arch/s390/kernel/diag/diag.c +++ b/arch/s390/kernel/diag/diag.c @@ -195,7 +195,7 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad { union register_pair rp = { .even = *subcode, .odd = size }; - asm volatile( + asm_inline volatile( " diag %[addr],%[rp],0x204\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) @@ -286,7 +286,7 @@ int diag224(void *ptr) int rc = -EOPNOTSUPP; diag_stat_inc(DIAG_STAT_X224); - asm volatile("\n" + asm_inline volatile("\n" " diag %[type],%[addr],0x224\n" "0: lhi %[rc],0\n" "1:\n" diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 1ecd0580561f..911b95cd57e5 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -198,13 +198,8 @@ void __noreturn die(struct pt_regs *regs, const char *str) console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); - printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff, + printk("%s: %04x ilc:%d [#%d]", str, regs->int_code & 0xffff, regs->int_code >> 17, ++die_counter); -#ifdef CONFIG_PREEMPT - pr_cont("PREEMPT "); -#elif defined(CONFIG_PREEMPT_RT) - pr_cont("PREEMPT_RT "); -#endif pr_cont("SMP "); if (debug_pagealloc_enabled()) pr_cont("DEBUG_PAGEALLOC"); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 2fa25164df7d..b6d3c7a6209d 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/sched/debug.h> +#include <linux/cpufeature.h> #include <linux/compiler.h> #include <linux/init.h> #include <linux/errno.h> @@ -21,6 +22,7 @@ #include <asm/asm-extable.h> #include <linux/memblock.h> #include <asm/access-regs.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/fpu.h> @@ -36,12 +38,14 @@ #include <asm/boot_data.h> #include "entry.h" -#define decompressor_handled_param(param) \ -static int __init ignore_decompressor_param_##param(char *s) \ +#define __decompressor_handled_param(func, param) \ +static int __init ignore_decompressor_param_##func(char *s) \ { \ return 0; \ } \ -early_param(#param, ignore_decompressor_param_##param) +early_param(#param, ignore_decompressor_param_##func) + +#define decompressor_handled_param(param) __decompressor_handled_param(param, param) decompressor_handled_param(mem); decompressor_handled_param(vmalloc); @@ -51,6 +55,7 @@ decompressor_handled_param(nokaslr); decompressor_handled_param(cmma); decompressor_handled_param(relocate_lowcore); decompressor_handled_param(bootdebug); +__decompressor_handled_param(debug_alternative, debug-alternative); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif @@ -63,21 +68,6 @@ static void __init kasan_early_init(void) #endif } -static void __init reset_tod_clock(void) -{ - union tod_clock clk; - - if (store_tod_clock_ext_cc(&clk) == 0) - return; - /* TOD clock not running. Set the clock to Unix Epoch. */ - if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk)) - disabled_wait(); - - memset(&tod_clock_base, 0, sizeof(tod_clock_base)); - tod_clock_base.tod = TOD_UNIX_EPOCH; - get_lowcore()->last_update_clock = TOD_UNIX_EPOCH; -} - /* * Initialize storage key for kernel pages */ @@ -96,26 +86,6 @@ static noinline __init void init_kernel_storage_key(void) static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); -static noinline __init void detect_machine_type(void) -{ - struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; - - /* Check current-configuration-level */ - if (stsi(NULL, 0, 0, 0) <= 2) { - get_lowcore()->machine_flags |= MACHINE_FLAG_LPAR; - return; - } - /* Get virtual-machine cpu information. */ - if (stsi(vmms, 3, 2, 2) || !vmms->count) - return; - - /* Detect known hypervisors */ - if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) - get_lowcore()->machine_flags |= MACHINE_FLAG_KVM; - else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) - get_lowcore()->machine_flags |= MACHINE_FLAG_VM; -} - /* Remove leading, trailing and double whitespace. */ static inline void strim_all(char *str) { @@ -156,9 +126,9 @@ static noinline __init void setup_arch_string(void) strim_all(hvstr); } else { sprintf(hvstr, "%s", - MACHINE_IS_LPAR ? "LPAR" : - MACHINE_IS_VM ? "z/VM" : - MACHINE_IS_KVM ? "KVM" : "unknown"); + machine_is_lpar() ? "LPAR" : + machine_is_vm() ? "z/VM" : + machine_is_kvm() ? "KVM" : "unknown"); } dump_stack_set_arch_desc("%s (%s)", mstr, hvstr); } @@ -167,9 +137,8 @@ static __init void setup_topology(void) { int max_mnest; - if (!test_facility(11)) + if (!cpu_has_topology()) return; - get_lowcore()->machine_flags |= MACHINE_FLAG_TOPOLOGY; for (max_mnest = 6; max_mnest > 1; max_mnest--) { if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0) break; @@ -218,65 +187,10 @@ static noinline __init void setup_lowcore_early(void) lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); } -static __init void detect_diag9c(void) -{ - unsigned int cpu_address; - int rc; - - cpu_address = stap(); - diag_stat_inc(DIAG_STAT_X09C); - asm volatile( - " diag %2,0,0x9c\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); - if (!rc) - get_lowcore()->machine_flags |= MACHINE_FLAG_DIAG9C; -} - -static __init void detect_machine_facilities(void) -{ - if (test_facility(8)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT1; - system_ctl_set_bit(0, CR0_EDAT_BIT); - } - if (test_facility(78)) - get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT2; - if (test_facility(3)) - get_lowcore()->machine_flags |= MACHINE_FLAG_IDTE; - if (test_facility(50) && test_facility(73)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_TE; - system_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT); - } - if (test_facility(51)) - get_lowcore()->machine_flags |= MACHINE_FLAG_TLB_LC; - if (test_facility(129)) - system_ctl_set_bit(0, CR0_VECTOR_BIT); - if (test_facility(130)) - get_lowcore()->machine_flags |= MACHINE_FLAG_NX; - if (test_facility(133)) - get_lowcore()->machine_flags |= MACHINE_FLAG_GS; - if (test_facility(139) && (tod_clock_base.tod >> 63)) { - /* Enabled signed clock comparator comparisons */ - get_lowcore()->machine_flags |= MACHINE_FLAG_SCC; - clock_comparator_max = -1ULL >> 1; - system_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT); - } - if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_PCI_MIO; - /* the control bit is set during PCI initialization */ - } - if (test_facility(194)) - get_lowcore()->machine_flags |= MACHINE_FLAG_RDP; - if (test_facility(85)) - get_lowcore()->machine_flags |= MACHINE_FLAG_SEQ_INSN; -} - static inline void save_vector_registers(void) { #ifdef CONFIG_CRASH_DUMP - if (test_facility(129)) + if (cpu_has_vx()) save_vx_regs(boot_cpu_vector_save_area); #endif } @@ -308,17 +222,13 @@ static void __init sort_amode31_extable(void) void __init startup_init(void) { kasan_early_init(); - reset_tod_clock(); time_early_init(); init_kernel_storage_key(); lockdep_off(); sort_amode31_extable(); setup_lowcore_early(); - detect_machine_type(); setup_arch_string(); setup_boot_command_line(); - detect_diag9c(); - detect_machine_facilities(); save_vector_registers(); setup_topology(); sclp_early_detect(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4cc3408c4dac..b0c2356697fd 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -29,6 +29,7 @@ #include <asm/nmi.h> #include <asm/nospec-insn.h> #include <asm/lowcore.h> +#include <asm/machine.h> _LPP_OFFSET = __LC_LPP @@ -44,7 +45,7 @@ _LPP_OFFSET = __LC_LPP ALTERNATIVE_2 "b \lpswe;nopr", \ ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \ __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \ - ALT_LOWCORE + ALT_FEATURE(MFEATURE_LOWCORE) .endm .macro MBEAR reg, lowcore @@ -67,7 +68,7 @@ _LPP_OFFSET = __LC_LPP clg %r14,__LC_RESTART_STACK(\lowcore) je \oklabel la %r14,\savearea(\lowcore) - j stack_overflow + j stack_invalid .endm /* @@ -315,7 +316,7 @@ SYM_CODE_START(pgm_check_handler) tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc 2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - # CHECK_VMAP_STACK branches to stack_overflow or 4f + # CHECK_VMAP_STACK branches to stack_invalid or 4f CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f 3: lg %r15,__LC_KERNEL_STACK(%r13) 4: la %r11,STACK_FRAME_OVERHEAD(%r15) @@ -590,11 +591,11 @@ SYM_CODE_END(early_pgm_check_handler) .section .kprobes.text, "ax" /* - * The synchronous or the asynchronous stack overflowed. We are dead. + * The synchronous or the asynchronous stack pointer is invalid. We are dead. * No need to properly save the registers, we are going to panic anyway. * Setup a pt_regs so that show_trace can provide a good call trace. */ -SYM_CODE_START(stack_overflow) +SYM_CODE_START(stack_invalid) GET_LC %r15 lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack la %r11,STACK_FRAME_OVERHEAD(%r15) @@ -604,8 +605,8 @@ SYM_CODE_START(stack_overflow) stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs - jg kernel_stack_overflow -SYM_CODE_END(stack_overflow) + jg kernel_stack_invalid +SYM_CODE_END(stack_invalid) .section .data, "aw" .balign 4 @@ -621,7 +622,7 @@ SYM_DATA_END(daton_psw) .balign 8 #define SYSCALL(esame,emu) .quad __s390x_ ## esame SYM_DATA_START(sys_call_table) -#include "asm/syscall_table.h" +#include <asm/syscall_table.h> SYM_DATA_END(sys_call_table) #undef SYSCALL @@ -629,7 +630,7 @@ SYM_DATA_END(sys_call_table) #define SYSCALL(esame,emu) .quad __s390_ ## emu SYM_DATA_START(sys_call_table_emu) -#include "asm/syscall_table.h" +#include <asm/syscall_table.h> SYM_DATA_END(sys_call_table_emu) #undef SYSCALL #endif diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index a1f28879c87e..dd55cc6bbc28 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -31,7 +31,7 @@ void do_secure_storage_access(struct pt_regs *regs); void do_non_secure_storage_access(struct pt_regs *regs); void do_secure_storage_violation(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); -void kernel_stack_overflow(struct pt_regs * regs); +void kernel_stack_invalid(struct pt_regs *regs); void handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index e540b022ceb2..e94bb98f5231 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/kmsan-checks.h> +#include <linux/cpufeature.h> #include <linux/kprobes.h> #include <linux/execmem.h> #include <trace/syscall.h> @@ -69,7 +70,7 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end) bool ftrace_need_init_nop(void) { - return !MACHINE_HAS_SEQ_INSN; + return !cpu_has_seq_insn(); } int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) @@ -189,7 +190,7 @@ static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec, int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - if (MACHINE_HAS_SEQ_INSN) + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, old_addr, addr); else return ftrace_modify_trampoline_call(rec, old_addr, addr); @@ -213,8 +214,8 @@ static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable) int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - /* Expect brcl 0xf,... for the !MACHINE_HAS_SEQ_INSN case */ - if (MACHINE_HAS_SEQ_INSN) + /* Expect brcl 0xf,... for the !cpu_has_seq_insn() case */ + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, addr, 0); else return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false); @@ -234,7 +235,7 @@ static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long add int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - if (MACHINE_HAS_SEQ_INSN) + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, 0, addr); else return ftrace_make_trampoline_call(rec, addr); diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c index 0b68168d9566..cf26d7a37425 100644 --- a/arch/s390/kernel/guarded_storage.c +++ b/arch/s390/kernel/guarded_storage.c @@ -4,6 +4,7 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/kernel.h> #include <linux/syscalls.h> #include <linux/signal.h> @@ -109,7 +110,7 @@ static int gs_broadcast(void) SYSCALL_DEFINE2(s390_guarded_storage, int, command, struct gs_cb __user *, gs_cb) { - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -EOPNOTSUPP; switch (command) { case GS_ENABLE: diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 396034b2fe67..7edb9ded199c 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -18,12 +18,10 @@ __HEAD SYM_CODE_START(startup_continue) - larl %r1,tod_clock_base - GET_LC %r2 - mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2) # # Setup stack # + GET_LC %r2 larl %r14,init_task stg %r14,__LC_CURRENT(%r2) larl %r15,init_thread_union+STACK_INIT_OFFSET diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c index 7857a7e8e56c..e7b66d046e8d 100644 --- a/arch/s390/kernel/hiperdispatch.c +++ b/arch/s390/kernel/hiperdispatch.c @@ -45,6 +45,7 @@ * therefore delaying the throughput loss caused by using SMP threads. */ +#include <linux/cpufeature.h> #include <linux/cpumask.h> #include <linux/debugfs.h> #include <linux/device.h> @@ -87,7 +88,7 @@ static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn); static int hd_set_hiperdispatch_mode(int enable) { - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) enable = 0; if (hd_enabled == enable) return 0; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 69be2309cde0..3b9d9ccfad63 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -22,6 +22,7 @@ #include <linux/debug_locks.h> #include <linux/vmalloc.h> #include <asm/asm-extable.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ipl.h> #include <asm/smp.h> @@ -185,7 +186,7 @@ static inline int __diag308(unsigned long subcode, unsigned long addr) r1.even = addr; r1.odd = 0; - asm volatile( + asm_inline volatile( " diag %[r1],%[subcode],0x308\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) @@ -685,7 +686,7 @@ static int __init ipl_init(void) goto out; switch (ipl_info.type) { case IPL_TYPE_CCW: - if (MACHINE_IS_VM) + if (machine_is_vm()) rc = sysfs_create_group(&ipl_kset->kobj, &ipl_ccw_attr_group_vm); else @@ -1272,7 +1273,7 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb) ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM; /* VM PARM */ - if (MACHINE_IS_VM && ipl_block_valid && + if (machine_is_vm() && ipl_block_valid && (ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) { ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP; @@ -1286,7 +1287,7 @@ static int __init reipl_nss_init(void) { int rc; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 0; reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL); @@ -1311,8 +1312,8 @@ static int __init reipl_ccw_init(void) return -ENOMEM; rc = sysfs_create_group(&reipl_kset->kobj, - MACHINE_IS_VM ? &reipl_ccw_attr_group_vm - : &reipl_ccw_attr_group_lpar); + machine_is_vm() ? &reipl_ccw_attr_group_vm + : &reipl_ccw_attr_group_lpar); if (rc) return rc; @@ -1987,7 +1988,7 @@ static void vmcmd_run(struct shutdown_trigger *trigger) static int vmcmd_init(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -EOPNOTSUPP; vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj); if (!vmcmd_kset) @@ -2264,7 +2265,7 @@ static void __init strncpy_skip_quote(char *dst, char *src, int n) static int __init vmcmd_on_reboot_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; strncpy_skip_quote(vmcmd_on_reboot, str, VMCMD_MAX_SIZE); vmcmd_on_reboot[VMCMD_MAX_SIZE] = 0; @@ -2275,7 +2276,7 @@ __setup("vmreboot=", vmcmd_on_reboot_setup); static int __init vmcmd_on_panic_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; strncpy_skip_quote(vmcmd_on_panic, str, VMCMD_MAX_SIZE); vmcmd_on_panic[VMCMD_MAX_SIZE] = 0; @@ -2286,7 +2287,7 @@ __setup("vmpanic=", vmcmd_on_panic_setup); static int __init vmcmd_on_halt_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; strncpy_skip_quote(vmcmd_on_halt, str, VMCMD_MAX_SIZE); vmcmd_on_halt[VMCMD_MAX_SIZE] = 0; @@ -2297,7 +2298,7 @@ __setup("vmhalt=", vmcmd_on_halt_setup); static int __init vmcmd_on_poff_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; strncpy_skip_quote(vmcmd_on_poff, str, VMCMD_MAX_SIZE); vmcmd_on_poff[VMCMD_MAX_SIZE] = 0; diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index ef7be599e1f7..bdf9c7cb5685 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -9,6 +9,7 @@ */ #include <linux/kernel_stat.h> +#include <linux/cpufeature.h> #include <linux/interrupt.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> @@ -25,6 +26,7 @@ #include <asm/irq_regs.h> #include <asm/cputime.h> #include <asm/lowcore.h> +#include <asm/machine.h> #include <asm/irq.h> #include <asm/hw_irq.h> #include <asm/stacktrace.h> @@ -84,7 +86,6 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"}, {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"}, {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"}, - {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"}, {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"}, {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"}, {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"}, @@ -149,7 +150,7 @@ void noinstr do_io_irq(struct pt_regs *regs) if (user_mode(regs)) { update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) + if (cpu_has_bear()) current->thread.last_break = regs->last_break; } @@ -164,7 +165,7 @@ void noinstr do_io_irq(struct pt_regs *regs) do_irq_async(regs, THIN_INTERRUPT); else do_irq_async(regs, IO_INTERRUPT); - } while (MACHINE_IS_LPAR && irq_pending(regs)); + } while (machine_is_lpar() && irq_pending(regs)); irq_exit_rcu(); @@ -185,7 +186,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) if (user_mode(regs)) { update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) + if (cpu_has_bear()) current->thread.last_break = regs->last_break; } diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 8b80ea57125f..c450120b4474 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -13,6 +13,7 @@ #include <linux/ptrace.h> #include <linux/preempt.h> #include <linux/stop_machine.h> +#include <linux/cpufeature.h> #include <linux/kdebug.h> #include <linux/uaccess.h> #include <linux/extable.h> @@ -153,7 +154,7 @@ void arch_arm_kprobe(struct kprobe *p) { struct swap_insn_args args = {.p = p, .arm_kprobe = 1}; - if (MACHINE_HAS_SEQ_INSN) { + if (cpu_has_seq_insn()) { swap_instruction(&args); text_poke_sync(); } else { @@ -166,7 +167,7 @@ void arch_disarm_kprobe(struct kprobe *p) { struct swap_insn_args args = {.p = p, .arm_kprobe = 0}; - if (MACHINE_HAS_SEQ_INSN) { + if (cpu_has_seq_insn()) { swap_instruction(&args); text_poke_sync(); } else { diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 8f681ccfb83a..baeb3dcfc1c8 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -13,7 +13,9 @@ #include <linux/reboot.h> #include <linux/ftrace.h> #include <linux/debug_locks.h> +#include <linux/cpufeature.h> #include <asm/guarded_storage.h> +#include <asm/machine.h> #include <asm/pfault.h> #include <asm/cio.h> #include <asm/fpu.h> @@ -94,7 +96,7 @@ static noinline void __machine_kdump(void *image) mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK); if (cpu_has_vx()) save_vx_regs((__vector128 *) mcesa->vector_save_area); - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { local_ctl_store(2, &cr2_old.reg); cr2_new = cr2_old; cr2_new.gse = 1; @@ -178,7 +180,7 @@ void arch_kexec_unprotect_crashkres(void) static int machine_kexec_prepare_kdump(void) { #ifdef CONFIG_CRASH_DUMP - if (MACHINE_IS_VM) + if (machine_is_vm()) diag10_range(PFN_DOWN(crashk_res.start), PFN_DOWN(crashk_res.end - crashk_res.start + 1)); return 0; diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index fbd218b6fc8e..3da371c144eb 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -9,6 +9,7 @@ */ #include <linux/kernel_stat.h> +#include <linux/cpufeature.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/entry-common.h> @@ -45,7 +46,7 @@ static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); static inline int nmi_needs_mcesa(void) { - return cpu_has_vx() || MACHINE_HAS_GS; + return cpu_has_vx() || cpu_has_gs(); } /* @@ -61,7 +62,7 @@ void __init nmi_alloc_mcesa_early(u64 *mcesad) if (!nmi_needs_mcesa()) return; *mcesad = __pa(&boot_mcesa); - if (MACHINE_HAS_GS) + if (cpu_has_gs()) *mcesad |= ilog2(MCESA_MAX_SIZE); } @@ -73,14 +74,14 @@ int nmi_alloc_mcesa(u64 *mcesad) *mcesad = 0; if (!nmi_needs_mcesa()) return 0; - size = MACHINE_HAS_GS ? MCESA_MAX_SIZE : MCESA_MIN_SIZE; + size = cpu_has_gs() ? MCESA_MAX_SIZE : MCESA_MIN_SIZE; origin = kmalloc(size, GFP_KERNEL); if (!origin) return -ENOMEM; /* The pointer is stored with mcesa_bits ORed in */ kmemleak_not_leak(origin); *mcesad = __pa(origin); - if (MACHINE_HAS_GS) + if (cpu_has_gs()) *mcesad |= ilog2(MCESA_MAX_SIZE); return 0; } diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 10725f5a6f0f..63875270941b 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -518,7 +518,8 @@ static void paicrypt_have_samples(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event CRYPTO_ALL is allowed. */ -static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, save old values. diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index a8f0bad99cf0..fd14d5ebccbc 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -542,7 +542,8 @@ static void paiext_have_samples(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event NNPA_ALL is allowed. */ -static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, save old values. diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 5ce9a795a0fe..54e281436a28 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/stop_machine.h> +#include <linux/cpufeature.h> #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/random.h> @@ -19,6 +20,7 @@ #include <linux/cpu.h> #include <linux/smp.h> #include <asm/text-patching.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/facility.h> #include <asm/elf.h> @@ -72,7 +74,7 @@ void notrace stop_machine_yield(const struct cpumask *cpumask) this_cpu = smp_processor_id(); if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) { __this_cpu_write(cpu_relax_retry, 0); - cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false); + cpu = cpumask_next_wrap(this_cpu, cpumask); if (cpu >= nr_cpu_ids) return; if (arch_vcpu_is_preempted(cpu)) @@ -209,14 +211,14 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_DFP; /* huge page support */ - if (MACHINE_HAS_EDAT1) + if (cpu_has_edat1()) elf_hwcap |= HWCAP_HPAGE; /* 64-bit register support for 31-bit processes */ elf_hwcap |= HWCAP_HIGH_GPRS; /* transactional execution */ - if (MACHINE_HAS_TE) + if (machine_has_tx()) elf_hwcap |= HWCAP_TE; /* vector */ @@ -244,10 +246,10 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_NNPA; /* guarded storage */ - if (MACHINE_HAS_GS) + if (cpu_has_gs()) elf_hwcap |= HWCAP_GS; - if (MACHINE_HAS_PCI_MIO) + if (test_machine_feature(MFEATURE_PCI_MIO)) elf_hwcap |= HWCAP_PCI_MIO; /* virtualization support */ diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 1cfed8b710b8..34b8d9e745df 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -7,10 +7,10 @@ * Martin Schwidefsky (schwidefsky@de.ibm.com) */ -#include "asm/ptrace.h" #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/smp.h> #include <linux/errno.h> @@ -31,6 +31,9 @@ #include <asm/unistd.h> #include <asm/runtime_instr.h> #include <asm/facility.h> +#include <asm/machine.h> +#include <asm/ptrace.h> +#include <asm/rwonce.h> #include <asm/fpu.h> #include "entry.h" @@ -60,7 +63,7 @@ void update_cr_regs(struct task_struct *task) cr0_new = cr0_old; cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ - if (MACHINE_HAS_TE) { + if (machine_has_tx()) { /* Set or clear transaction execution TXC bit 8. */ cr0_new.tcx = 1; if (task->thread.per_flags & PER_FLAG_NO_TE) @@ -75,7 +78,7 @@ void update_cr_regs(struct task_struct *task) } } /* Take care of enable/disable of guarded storage. */ - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { cr2_new.gse = 0; if (task->thread.gs_cb) cr2_new.gse = 1; @@ -470,18 +473,18 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_GET_LAST_BREAK: return put_user(child->thread.last_break, (unsigned long __user *)data); case PTRACE_ENABLE_TE: - if (!MACHINE_HAS_TE) + if (!machine_has_tx()) return -EIO; child->thread.per_flags &= ~PER_FLAG_NO_TE; return 0; case PTRACE_DISABLE_TE: - if (!MACHINE_HAS_TE) + if (!machine_has_tx()) return -EIO; child->thread.per_flags |= PER_FLAG_NO_TE; child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; return 0; case PTRACE_TE_ABORT_RAND: - if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE)) + if (!machine_has_tx() || (child->thread.per_flags & PER_FLAG_NO_TE)) return -EIO; switch (data) { case 0UL: @@ -1033,7 +1036,7 @@ static int s390_gs_cb_get(struct task_struct *target, { struct gs_cb *data = target->thread.gs_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) return -ENODATA; @@ -1050,7 +1053,7 @@ static int s390_gs_cb_set(struct task_struct *target, struct gs_cb gs_cb = { }, *data = NULL; int rc; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!target->thread.gs_cb) { data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -1087,7 +1090,7 @@ static int s390_gs_bc_get(struct task_struct *target, { struct gs_cb *data = target->thread.gs_bc_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) return -ENODATA; @@ -1101,7 +1104,7 @@ static int s390_gs_bc_set(struct task_struct *target, { struct gs_cb *data = target->thread.gs_bc_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) { data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -1571,5 +1574,5 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) addr = kernel_stack_pointer(regs) + n * sizeof(long); if (!regs_within_kernel_stack(regs, addr)) return 0; - return *(unsigned long *)addr; + return READ_ONCE_NOCHECK(addr); } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d78bcfe707b5..b6686d63b754 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -54,6 +54,7 @@ #include <asm/archrandom.h> #include <asm/boot_data.h> +#include <asm/machine.h> #include <asm/ipl.h> #include <asm/facility.h> #include <asm/smp.h> @@ -180,8 +181,6 @@ unsigned long __bootdata_preserved(MODULES_END); struct lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); -DEFINE_STATIC_KEY_FALSE(cpu_has_bear); - /* * The Write Back bit position in the physaddr is given by the SLPC PCI. * Leaving the mask zero always uses write through which is safe @@ -251,7 +250,7 @@ static void __init conmode_default(void) char query_buffer[1024]; char *ptr; - if (MACHINE_IS_VM) { + if (machine_is_vm()) { cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); console_devno = simple_strtoul(query_buffer + 5, NULL, 16); ptr = strstr(query_buffer, "SUBCHANNEL ="); @@ -289,7 +288,7 @@ static void __init conmode_default(void) SET_CONSOLE_SCLP; #endif } - } else if (MACHINE_IS_KVM) { + } else if (machine_is_kvm()) { if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE)) SET_CONSOLE_VT220; else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE)) @@ -652,7 +651,7 @@ static void __init reserve_crashkernel(void) return; } - if (!oldmem_data.start && MACHINE_IS_VM) + if (!oldmem_data.start && machine_is_vm()) diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; @@ -898,12 +897,12 @@ void __init setup_arch(char **cmdline_p) /* * print what head.S has found out about the machine */ - if (MACHINE_IS_VM) + if (machine_is_vm()) pr_info("Linux is running as a z/VM " "guest operating system in 64-bit mode\n"); - else if (MACHINE_IS_KVM) + else if (machine_is_kvm()) pr_info("Linux is running under KVM in 64-bit mode\n"); - else if (MACHINE_IS_LPAR) + else if (machine_is_lpar()) pr_info("Linux is running natively in 64-bit mode\n"); else pr_info("Linux is running as a guest in 64-bit mode\n"); @@ -911,7 +910,7 @@ void __init setup_arch(char **cmdline_p) if (!boot_earlyprintk) boot_rb_foreach(print_rb_entry); - if (have_relocated_lowcore()) + if (machine_has_relocated_lowcore()) pr_info("Lowcore relocated to 0x%px\n", get_lowcore()); log_component_list(); @@ -961,7 +960,7 @@ void __init setup_arch(char **cmdline_p) setup_uv(); dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); - if (MACHINE_HAS_EDAT2) + if (cpu_has_edat2()) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); reserve_crashkernel(); @@ -981,10 +980,6 @@ void __init setup_arch(char **cmdline_p) numa_setup(); smp_detect_cpus(); topology_init_early(); - - if (test_facility(193)) - static_branch_enable(&cpu_has_bear); - setup_protection_map(); /* * Create kernel page tables. diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 7b08399b0846..f2f05c5277f4 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -18,6 +18,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/cpufeature.h> #include <linux/workqueue.h> #include <linux/memblock.h> #include <linux/export.h> @@ -38,6 +39,7 @@ #include <linux/kprobes.h> #include <asm/access-regs.h> #include <asm/asm-offsets.h> +#include <asm/machine.h> #include <asm/ctlreg.h> #include <asm/pfault.h> #include <asm/diag.h> @@ -416,7 +418,7 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted); void notrace smp_yield_cpu(int cpu) { - if (!MACHINE_HAS_DIAG9C) + if (!machine_has_diag9c()) return; diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" @@ -561,10 +563,10 @@ int smp_store_status(int cpu) if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; - if (!cpu_has_vx() && !MACHINE_HAS_GS) + if (!cpu_has_vx() && !cpu_has_gs()) return 0; pa = lc->mcesad & MCESA_ORIGIN_MASK; - if (MACHINE_HAS_GS) + if (cpu_has_gs()) pa |= lc->mcesad & MCESA_LC_MASK; if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index 5ec28028315b..4fee74553ca2 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -12,6 +12,7 @@ * platform. */ +#include <linux/cpufeature.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/mm.h> @@ -81,25 +82,35 @@ SYSCALL_DEFINE0(ni_syscall) return -ENOSYS; } -static void do_syscall(struct pt_regs *regs) +void noinstr __do_syscall(struct pt_regs *regs, int per_trap) { unsigned long nr; + add_random_kstack_offset(); + enter_from_user_mode(regs); + regs->psw = get_lowcore()->svc_old_psw; + regs->int_code = get_lowcore()->svc_int_code; + update_timer_sys(); + if (cpu_has_bear()) + current->thread.last_break = regs->last_break; + local_irq_enable(); + regs->orig_gpr2 = regs->gprs[2]; + if (unlikely(per_trap)) + set_thread_flag(TIF_PER_TRAP); + regs->flags = 0; + set_pt_regs_flag(regs, PIF_SYSCALL); nr = regs->int_code & 0xffff; - if (!nr) { + if (likely(!nr)) { nr = regs->gprs[1] & 0xffff; regs->int_code &= ~0xffffUL; regs->int_code |= nr; } - regs->gprs[2] = nr; - if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) { regs->psw.addr = current->restart_block.arch_data; current->restart_block.arch_data = 1; } nr = syscall_enter_from_user_mode_work(regs, nr); - /* * In the s390 ptrace ABI, both the syscall number and the return value * use gpr2. However, userspace puts the syscall number either in the @@ -107,37 +118,11 @@ static void do_syscall(struct pt_regs *regs) * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here * and if set, the syscall will be skipped. */ - if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET))) goto out; regs->gprs[2] = -ENOSYS; - if (likely(nr >= NR_syscalls)) - goto out; - do { + if (likely(nr < NR_syscalls)) regs->gprs[2] = current->thread.sys_call_table[nr](regs); - } while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART)); out: - syscall_exit_to_user_mode_work(regs); -} - -void noinstr __do_syscall(struct pt_regs *regs, int per_trap) -{ - add_random_kstack_offset(); - enter_from_user_mode(regs); - regs->psw = get_lowcore()->svc_old_psw; - regs->int_code = get_lowcore()->svc_int_code; - update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) - current->thread.last_break = regs->last_break; - - local_irq_enable(); - regs->orig_gpr2 = regs->gprs[2]; - - if (per_trap) - set_thread_flag(TIF_PER_TRAP); - - regs->flags = 0; - set_pt_regs_flag(regs, PIF_SYSCALL); - do_syscall(regs); - exit_to_user_mode(); + syscall_exit_to_user_mode(regs); } diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index e9115b4d8b63..a4569b96ef06 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 464 common getxattrat sys_getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 88055f58fbda..1ea84e942bd4 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -5,6 +5,7 @@ * Martin Schwidefsky <schwidefsky@de.ibm.com>, */ +#include <linux/cpufeature.h> #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -15,54 +16,17 @@ #include <linux/export.h> #include <linux/slab.h> #include <asm/asm-extable.h> +#include <asm/machine.h> #include <asm/ebcdic.h> #include <asm/debug.h> #include <asm/sysinfo.h> #include <asm/cpcmd.h> #include <asm/topology.h> #include <asm/fpu.h> +#include <asm/asm.h> int topology_max_mnest; -static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl) -{ - int r0 = (fc << 28) | sel1; - int rc = 0; - - asm volatile( - " lr 0,%[r0]\n" - " lr 1,%[r1]\n" - " stsi 0(%[sysinfo])\n" - "0: jz 2f\n" - "1: lhi %[rc],%[retval]\n" - "2: lr %[r0],0\n" - EX_TABLE(0b, 1b) - : [r0] "+d" (r0), [rc] "+d" (rc) - : [r1] "d" (sel2), - [sysinfo] "a" (sysinfo), - [retval] "K" (-EOPNOTSUPP) - : "cc", "0", "1", "memory"); - *lvl = ((unsigned int) r0) >> 28; - return rc; -} - -/* - * stsi - store system information - * - * Returns the current configuration level if function code 0 was specified. - * Otherwise returns 0 on success or a negative value on error. - */ -int stsi(void *sysinfo, int fc, int sel1, int sel2) -{ - int lvl, rc; - - rc = __stsi(sysinfo, fc, sel1, sel2, &lvl); - if (rc) - return rc; - return fc ? 0 : lvl; -} -EXPORT_SYMBOL(stsi); - #ifdef CONFIG_PROC_FS static bool convert_ext_name(unsigned char encoding, char *name, size_t len) @@ -154,7 +118,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) int i; seq_putc(m, '\n'); - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return; if (stsi(info, 15, 1, topology_max_mnest)) return; @@ -415,7 +379,7 @@ static struct service_level service_level_vm = { static __init int create_proc_service_level(void) { proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops); - if (MACHINE_IS_VM) + if (machine_is_vm()) register_service_level(&service_level_vm); return 0; } @@ -559,7 +523,7 @@ static __init int stsi_init_debugfs(void) sf = &stsi_file[i]; debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops); } - if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) { + if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && cpu_has_topology()) { char link_to[10]; sprintf(link_to, "15_1_%d", topology_mnest_limit()); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e9f47c3a6197..c900deddd36d 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -54,10 +54,10 @@ #include <asm/cio.h> #include "entry.h" -union tod_clock tod_clock_base __section(".data"); +union tod_clock __bootdata_preserved(tod_clock_base); EXPORT_SYMBOL_GPL(tod_clock_base); -u64 clock_comparator_max = -1ULL; +u64 __bootdata_preserved(clock_comparator_max); EXPORT_SYMBOL_GPL(clock_comparator_max); static DEFINE_PER_CPU(struct clock_event_device, comparators); @@ -79,12 +79,10 @@ void __init time_early_init(void) { struct ptff_qto qto; struct ptff_qui qui; - int cs; /* Initialize TOD steering parameters */ tod_steering_end = tod_clock_base.tod; - for (cs = 0; cs < CS_BASES; cs++) - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end; if (!test_facility(28)) return; @@ -373,7 +371,6 @@ static void clock_sync_global(long delta) { unsigned long now, adj; struct ptff_qto qto; - int cs; /* Fixup the monotonic sched clock. */ tod_clock_base.eitod += delta; @@ -389,10 +386,8 @@ static void clock_sync_global(long delta) panic("TOD clock sync offset %li is too large to drift\n", tod_steering_delta); tod_steering_end = now + (abs(tod_steering_delta) << 15); - for (cs = 0; cs < CS_BASES; cs++) { - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; - vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta; - } + vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_steering_delta = tod_steering_delta; /* Update LPAR offset. */ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 211cc8382e4a..3df048e190b1 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -6,6 +6,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/cpufeature.h> #include <linux/workqueue.h> #include <linux/memblock.h> #include <linux/uaccess.h> @@ -240,7 +241,7 @@ int topology_set_cpu_management(int fc) { int cpu, rc; - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return -EOPNOTSUPP; if (fc) rc = ptf(PTF_VERTICAL); @@ -315,13 +316,13 @@ static int __arch_update_cpu_topology(void) hd_status = 0; rc = 0; mutex_lock(&smp_cpu_state_mutex); - if (MACHINE_HAS_TOPOLOGY) { + if (cpu_has_topology()) { rc = 1; store_topology(info); tl_to_masks(info); } update_cpu_masks(); - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) topology_update_polarization_simple(); if (cpu_management == 1) hd_status = hd_enable_hiperdispatch(); @@ -376,7 +377,7 @@ static void set_topology_timer(void) void topology_expect_change(void) { - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return; /* This is racy, but it doesn't matter since it is just a heuristic. * Worst case is that we poll in a higher frequency for a bit longer. @@ -500,7 +501,7 @@ int topology_cpu_init(struct cpu *cpu) int rc; rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); - if (rc || !MACHINE_HAS_TOPOLOGY) + if (rc || !cpu_has_topology()) return rc; rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); if (rc) @@ -569,12 +570,12 @@ void __init topology_init_early(void) set_sched_topology(s390_topology); if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { - if (MACHINE_HAS_TOPOLOGY) + if (cpu_has_topology()) topology_mode = TOPOLOGY_MODE_HW; else topology_mode = TOPOLOGY_MODE_SINGLE; } - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) goto out; tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); info = tl_info; @@ -596,7 +597,7 @@ static inline int topology_get_mode(int enabled) { if (!enabled) return TOPOLOGY_MODE_SINGLE; - return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; + return cpu_has_topology() ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; } static inline int topology_is_enabled(void) @@ -686,7 +687,7 @@ static int __init topology_init(void) int rc = 0; timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); - if (MACHINE_HAS_TOPOLOGY) + if (cpu_has_topology()) set_topology_timer(); else topology_update_polarization_simple(); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index b746213d3110..19687dab32f7 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -3,18 +3,13 @@ * S390 version * Copyright IBM Corp. 1999, 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * * Derived from "arch/i386/kernel/traps.c" * Copyright (C) 1991, 1992 Linus Torvalds */ -/* - * 'Traps.c' handles hardware traps and faults after we have saved some - * state in 'asm.s'. - */ -#include "asm/irqflags.h" -#include "asm/ptrace.h" +#include <linux/cpufeature.h> #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/randomize_kstack.h> @@ -29,6 +24,8 @@ #include <linux/entry-common.h> #include <linux/kmsan.h> #include <asm/asm-extable.h> +#include <asm/irqflags.h> +#include <asm/ptrace.h> #include <asm/vtime.h> #include <asm/fpu.h> #include <asm/fault.h> @@ -42,7 +39,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs) address = current->thread.trap_tdb.data[3]; else address = regs->psw.addr; - return (void __user *) (address - (regs->int_code >> 16)); + return (void __user *)(address - (regs->int_code >> 16)); } #ifdef CONFIG_GENERIC_BUG @@ -57,16 +54,15 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) if (user_mode(regs)) { force_sig_fault(si_signo, si_code, get_trap_ip(regs)); report_user_fault(regs, si_signo, 0); - } else { + } else { if (!fixup_exception(regs)) die(regs, str); - } + } } static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { - if (notify_die(DIE_TRAP, str, regs, 0, - regs->int_code, si_signo) == NOTIFY_STOP) + if (notify_die(DIE_TRAP, str, regs, 0, regs->int_code, si_signo) == NOTIFY_STOP) return; do_report_trap(regs, si_signo, si_code, str); } @@ -78,8 +74,7 @@ void do_per_trap(struct pt_regs *regs) return; if (!current->ptrace) return; - force_sig_fault(SIGTRAP, TRAP_HWBKPT, - (void __force __user *) current->thread.per_event.address); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __force __user *)current->thread.per_event.address); } NOKPROBE_SYMBOL(do_per_trap); @@ -98,36 +93,25 @@ static void name(struct pt_regs *regs) \ do_trap(regs, signr, sicode, str); \ } -DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, - "addressing exception") -DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, - "execute exception") -DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, - "fixpoint divide exception") -DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, - "fixpoint overflow exception") -DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, - "HFP overflow exception") -DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, - "HFP underflow exception") -DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, - "HFP significance exception") -DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, - "HFP divide exception") -DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, - "HFP square root exception") -DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, - "operand exception") -DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, - "privileged operation") -DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, - "special operation exception") -DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, - "transaction constraint exception") +DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, "addressing exception") +DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, "fixpoint divide exception") +DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, "execute exception") +DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, "HFP divide exception") +DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, "HFP overflow exception") +DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, "HFP significance exception") +DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, "HFP square root exception") +DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, "HFP underflow exception") +DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, "operand exception") +DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, "fixpoint overflow exception") +DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, "privileged operation") +DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, "special operation exception") +DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception"); +DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, "transaction constraint exception") static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc) { int si_code = 0; + /* FPC[2] is Data Exception Code */ if ((fpc & 0x00000300) == 0) { /* bits 6 and 7 of DXC are 0 iff IEEE exception */ @@ -153,36 +137,35 @@ static void translation_specification_exception(struct pt_regs *regs) static void illegal_op(struct pt_regs *regs) { - __u8 opcode[6]; - __u16 __user *location; int is_uprobe_insn = 0; + u16 __user *location; int signal = 0; + u16 opcode; location = get_trap_ip(regs); - if (user_mode(regs)) { - if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) + if (get_user(opcode, location)) return; - if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { + if (opcode == S390_BREAKPOINT_U16) { if (current->ptrace) force_sig_fault(SIGTRAP, TRAP_BRKPT, location); else signal = SIGILL; #ifdef CONFIG_UPROBES - } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) { + } else if (opcode == UPROBE_SWBP_INSN) { is_uprobe_insn = 1; #endif - } else + } else { signal = SIGILL; + } } /* - * We got either an illegal op in kernel mode, or user space trapped + * This is either an illegal op in kernel mode, or user space trapped * on a uprobes illegal instruction. See if kprobes or uprobes picks * it up. If not, SIGILL. */ if (is_uprobe_insn || !user_mode(regs)) { - if (notify_die(DIE_BPT, "bpt", regs, 0, - 3, SIGTRAP) != NOTIFY_STOP) + if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; } if (signal) @@ -190,18 +173,10 @@ static void illegal_op(struct pt_regs *regs) } NOKPROBE_SYMBOL(illegal_op); -DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, - "specification exception"); - static void vector_exception(struct pt_regs *regs) { int si_code, vic; - if (!cpu_has_vx()) { - do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation"); - return; - } - /* get vector interrupt code from fpc */ save_user_fpu_regs(); vic = (current->thread.ufpu.fpc & 0xf00) >> 8; @@ -249,7 +224,6 @@ static void monitor_event_exception(struct pt_regs *regs) { if (user_mode(regs)) return; - switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) { case BUG_TRAP_TYPE_NONE: fixup_exception(regs); @@ -262,7 +236,7 @@ static void monitor_event_exception(struct pt_regs *regs) } } -void kernel_stack_overflow(struct pt_regs *regs) +void kernel_stack_invalid(struct pt_regs *regs) { /* * Normally regs are unpoisoned by the generic entry code, but @@ -270,12 +244,12 @@ void kernel_stack_overflow(struct pt_regs *regs) */ kmsan_unpoison_entry_regs(regs); bust_spinlocks(1); - printk("Kernel stack overflow.\n"); + pr_emerg("Kernel stack pointer invalid\n"); show_regs(regs); bust_spinlocks(0); - panic("Corrupt kernel stack, can't continue."); + panic("Invalid kernel stack pointer, cannot continue"); } -NOKPROBE_SYMBOL(kernel_stack_overflow); +NOKPROBE_SYMBOL(kernel_stack_invalid); static void __init test_monitor_call(void) { @@ -283,7 +257,7 @@ static void __init test_monitor_call(void) if (!IS_ENABLED(CONFIG_BUG)) return; - asm volatile( + asm_inline volatile( " mc 0,0\n" "0: lhi %[val],0\n" "1:\n" @@ -323,7 +297,6 @@ void noinstr __do_pgm_check(struct pt_regs *regs) teid.val = lc->trans_exc_code; regs->int_code = lc->pgm_int_code; regs->int_parm_long = teid.val; - /* * In case of a guest fault, short-circuit the fault handler and return. * This way the sie64a() function will return 0; fault address and @@ -336,23 +309,19 @@ void noinstr __do_pgm_check(struct pt_regs *regs) current->thread.gmap_int_code = regs->int_code & 0xffff; return; } - state = irqentry_enter(regs); - if (user_mode(regs)) { update_timer_sys(); - if (!static_branch_likely(&cpu_has_bear)) { + if (!cpu_has_bear()) { if (regs->last_break < 4096) regs->last_break = 1; } current->thread.last_break = regs->last_break; } - if (lc->pgm_code & 0x0200) { /* transaction abort */ current->thread.trap_tdb = lc->pgm_tdb; } - if (lc->pgm_code & PGM_INT_CODE_PER) { if (user_mode(regs)) { struct per_event *ev = ¤t->thread.per_event; @@ -368,11 +337,9 @@ void noinstr __do_pgm_check(struct pt_regs *regs) goto out; } } - if (!irqs_disabled_flags(regs->psw.mask)) trace_hardirqs_on(); __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); - trapnr = regs->int_code & PGM_INT_CODE_MASK; if (trapnr) pgm_check_table[trapnr](regs); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 9f05df2da2f7..9a5d5be8acf4 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -206,6 +206,39 @@ int uv_convert_from_secure_pte(pte_t pte) return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte))); } +/** + * should_export_before_import - Determine whether an export is needed + * before an import-like operation + * @uvcb: the Ultravisor control block of the UVC to be performed + * @mm: the mm of the process + * + * Returns whether an export is needed before every import-like operation. + * This is needed for shared pages, which don't trigger a secure storage + * exception when accessed from a different guest. + * + * Although considered as one, the Unpin Page UVC is not an actual import, + * so it is not affected. + * + * No export is needed also when there is only one protected VM, because the + * page cannot belong to the wrong VM in that case (there is no "other VM" + * it can belong to). + * + * Return: true if an export is needed before every import, otherwise false. + */ +static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +{ + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (uv_has_feature(BIT_UV_FEAT_MISC)) + return false; + if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) + return false; + return atomic_read(&mm->context.protected_count) > 1; +} + /* * Calculate the expected ref_count for a folio that would otherwise have no * further pins. This was cribbed from similar functions in other places in @@ -228,7 +261,7 @@ static int expected_folio_refs(struct folio *folio) } /** - * make_folio_secure() - make a folio secure + * __make_folio_secure() - make a folio secure * @folio: the folio to make secure * @uvcb: the uvcb that describes the UVC to be used * @@ -237,20 +270,18 @@ static int expected_folio_refs(struct folio *folio) * * Return: 0 on success; * -EBUSY if the folio is in writeback or has too many references; - * -E2BIG if the folio is large; * -EAGAIN if the UVC needs to be attempted again; * -ENXIO if the address is not mapped; * -EINVAL if the UVC failed for other reasons. * * Context: The caller must hold exactly one extra reference on the folio - * (it's the same logic as split_folio()) + * (it's the same logic as split_folio()), and the folio must be + * locked. */ -int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) +static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) { int expected, cc = 0; - if (folio_test_large(folio)) - return -E2BIG; if (folio_test_writeback(folio)) return -EBUSY; expected = expected_folio_refs(folio) + 1; @@ -277,7 +308,98 @@ int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) return -EAGAIN; return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } -EXPORT_SYMBOL_GPL(make_folio_secure); + +static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb) +{ + int rc; + + if (!folio_trylock(folio)) + return -EAGAIN; + if (should_export_before_import(uvcb, mm)) + uv_convert_from_secure(folio_to_phys(folio)); + rc = __make_folio_secure(folio, uvcb); + folio_unlock(folio); + + return rc; +} + +/** + * s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split. + * @mm: the mm containing the folio to work on + * @folio: the folio + * @split: whether to split a large folio + * + * Context: Must be called while holding an extra reference to the folio; + * the mm lock should not be held. + * Return: 0 if the folio was split successfully; + * -EAGAIN if the folio was not split successfully but another attempt + * can be made, or if @split was set to false; + * -EINVAL in case of other errors. See split_folio(). + */ +static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split) +{ + int rc; + + lockdep_assert_not_held(&mm->mmap_lock); + folio_wait_writeback(folio); + lru_add_drain_all(); + if (split) { + folio_lock(folio); + rc = split_folio(folio); + folio_unlock(folio); + + if (rc != -EBUSY) + return rc; + } + return -EAGAIN; +} + +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb) +{ + struct vm_area_struct *vma; + struct folio_walk fw; + struct folio *folio; + int rc; + + mmap_read_lock(mm); + vma = vma_lookup(mm, hva); + if (!vma) { + mmap_read_unlock(mm); + return -EFAULT; + } + folio = folio_walk_start(&fw, vma, hva, 0); + if (!folio) { + mmap_read_unlock(mm); + return -ENXIO; + } + + folio_get(folio); + /* + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. + * If userspace plays dirty tricks and decides to map huge pages at a + * later point in time, it will receive a segmentation fault or + * KVM_RUN will return -EFAULT. + */ + if (folio_test_hugetlb(folio)) + rc = -EFAULT; + else if (folio_test_large(folio)) + rc = -E2BIG; + else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID)) + rc = -ENXIO; + else + rc = make_folio_secure(mm, folio, uvcb); + folio_walk_end(&fw, vma); + mmap_read_unlock(mm); + + if (rc == -E2BIG || rc == -EBUSY) + rc = s390_wiggle_split_folio(mm, folio, rc == -E2BIG); + folio_put(folio); + + return rc; +} +EXPORT_SYMBOL_GPL(make_hva_secure); /* * To be called with the folio locked or with an extra reference! This will diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 598b512cde01..70c8f9ad13cd 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -16,8 +16,8 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/smp.h> -#include <linux/time_namespace.h> #include <linux/random.h> +#include <linux/vdso_datastore.h> #include <vdso/datapage.h> #include <asm/vdso/vsyscall.h> #include <asm/alternative.h> @@ -26,85 +26,6 @@ extern char vdso64_start[], vdso64_end[]; extern char vdso32_start[], vdso32_end[]; -static struct vm_special_mapping vvar_mapping; - -static union vdso_data_store vdso_data_store __page_aligned_data; - -struct vdso_data *vdso_data = vdso_data_store.data; - -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -/* - * The VVAR page layout depends on whether a task belongs to the root or - * non-root time namespace. Whenever a task changes its namespace, the VVAR - * page tables are cleared and then they will be re-faulted with a - * corresponding layout. - * See also the comment near timens_setup_vdso_data() for details. - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - VMA_ITERATOR(vmi, mm, 0); - struct vm_area_struct *vma; - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (!vma_is_special_mapping(vma, &vvar_mapping)) - continue; - zap_vma_pages(vma); - break; - } - mmap_read_unlock(mm); - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long addr, pfn; - vm_fault_t err; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - pfn = virt_to_pfn(vdso_data); - if (timens_page) { - /* - * Fault in VVAR page too, since it will be accessed - * to get clock data anyway. - */ - addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - err = vmf_insert_pfn(vma, addr, pfn); - if (unlikely(err & VM_FAULT_ERROR)) - return err; - pfn = page_to_pfn(timens_page); - } - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - return vmf_insert_pfn(vma, vmf->address, pfn); -} - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) { @@ -112,11 +33,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static struct vm_special_mapping vvar_mapping = { - .name = "[vvar]", - .fault = vvar_fault, -}; - static struct vm_special_mapping vdso64_mapping = { .name = "[vdso]", .mremap = vdso_mremap, @@ -142,7 +58,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) struct vm_area_struct *vma; int rc; - BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); if (mmap_write_lock_killable(mm)) return -EINTR; @@ -157,14 +73,11 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) rc = vvar_start; if (IS_ERR_VALUE(vvar_start)) goto out; - vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE, - VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| - VM_PFNMAP, - &vvar_mapping); + vma = vdso_install_vvar_mapping(mm, vvar_start); rc = PTR_ERR(vma); if (IS_ERR(vma)) goto out; - vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE; + vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE; /* VM_MAYWRITE for COW so gdb can set breakpoints */ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, VM_READ|VM_EXEC| @@ -220,7 +133,7 @@ unsigned long vdso_text_size(void) unsigned long vdso_size(void) { - return vdso_text_size() + VVAR_NR_PAGES * PAGE_SIZE; + return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE; } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 2c5afb88d298..1e4ddd1a683f 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso32 = vdso_user_wrapper-32.o note-32.o # Build rules diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index c916c4f73f76..9630d58c2080 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -6,16 +6,15 @@ #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") OUTPUT_ARCH(s390:31-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index ad206f2068d8..d8f0df742809 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index ec42b7d9cb53..e4f6551ae898 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -7,17 +7,15 @@ #include <asm/vdso/vsyscall.h> #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); - PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text |