From a670468f5e0b5fad4db6e4d195f15915dc2a35c1 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 21 Aug 2018 21:53:06 -0700 Subject: mm: zero out the vma in vma_init() Rather than in vm_area_alloc(). To ensure that the various oddball stack-based vmas are in a good state. Some of the callers were zeroing them out, others were not. Acked-by: Kirill A. Shutemov Cc: Russell King Cc: Dmitry Vyukov Cc: Oleg Nesterov Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 5ee74c113381..8c760effa42e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -310,8 +310,9 @@ static struct kmem_cache *mm_cachep; struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) { - struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + struct vm_area_struct *vma; + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) vma_init(vma, mm); return vma; -- cgit v1.2.3 From eff4345e7fba0aac8665f00e8599b36946d9aaec Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Aug 2018 21:55:17 -0700 Subject: crash_core: use VMCOREINFO_SYMBOL_ARRAY() for swapper_pg_dir This is preparation for allowing CRASH_CORE to be enabled for any architecture. swapper_pg_dir is always either an array or a macro expanding to NULL. In the latter case, VMCOREINFO_SYMBOL() won't work, as it tries to take the address of the given symbol: #define VMCOREINFO_SYMBOL(name) \ vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) Instead, use VMCOREINFO_SYMBOL_ARRAY(), which uses the value: #define VMCOREINFO_SYMBOL_ARRAY(name) \ vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name) This is the same thing for the array case but isn't an error for the macro case. Link: http://lkml.kernel.org/r/c05f9781ec204f40fc96f95086e7b6de6a3eb2c3.1532563124.git.osandov@fb.com Signed-off-by: Omar Sandoval Cc: Alexey Dobriyan Cc: Bhupesh Sharma Cc: Eric Biederman Cc: James Morse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/crash_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/crash_core.c b/kernel/crash_core.c index b66aced5e8c2..e7b4025c7b24 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -401,7 +401,7 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_SYMBOL(init_uts_ns); VMCOREINFO_SYMBOL(node_online_map); #ifdef CONFIG_MMU - VMCOREINFO_SYMBOL(swapper_pg_dir); + VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir); #endif VMCOREINFO_SYMBOL(_stext); VMCOREINFO_SYMBOL(vmap_area_list); -- cgit v1.2.3 From 23c85094fe1895caefdd19ef624ee687ec5f4507 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Aug 2018 21:55:20 -0700 Subject: proc/kcore: add vmcoreinfo note to /proc/kcore The vmcoreinfo information is useful for runtime debugging tools, not just for crash dumps. A lot of this information can be determined by other means, but this is much more convenient, and it only adds a page at most to the file. Link: http://lkml.kernel.org/r/fddbcd08eed76344863303878b12de1c1e2a04b6.1531953780.git.osandov@fb.com Signed-off-by: Omar Sandoval Cc: Alexey Dobriyan Cc: Bhupesh Sharma Cc: Eric Biederman Cc: James Morse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/Kconfig | 1 + fs/proc/kcore.c | 18 ++++++++++++++++-- include/linux/crash_core.h | 2 ++ kernel/crash_core.c | 4 ++-- 4 files changed, 21 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 0eaeb41453f5..817c02b13b1d 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -31,6 +31,7 @@ config PROC_FS config PROC_KCORE bool "/proc/kcore support" if !ARM depends on PROC_FS && MMU + select CRASH_CORE help Provides a virtual ELF core file of the live kernel. This can be read with gdb and other ELF tools. No modifications can be diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 758c14e46a44..80464432dfe6 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -10,6 +10,7 @@ * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar */ +#include #include #include #include @@ -81,10 +82,13 @@ static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len, } *phdrs_len = *nphdr * sizeof(struct elf_phdr); - *notes_len = (3 * (sizeof(struct elf_note) + ALIGN(sizeof(CORE_STR), 4)) + + *notes_len = (4 * sizeof(struct elf_note) + + 3 * ALIGN(sizeof(CORE_STR), 4) + + VMCOREINFO_NOTE_NAME_BYTES + ALIGN(sizeof(struct elf_prstatus), 4) + ALIGN(sizeof(struct elf_prpsinfo), 4) + - ALIGN(arch_task_struct_size, 4)); + ALIGN(arch_task_struct_size, 4) + + ALIGN(vmcoreinfo_size, 4)); *data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len + *notes_len); return *data_offset + size; @@ -406,6 +410,16 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) sizeof(prpsinfo)); append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current, arch_task_struct_size); + /* + * vmcoreinfo_size is mostly constant after init time, but it + * can be changed by crash_save_vmcoreinfo(). Racing here with a + * panic on another CPU before the machine goes down is insanely + * unlikely, but it's better to not leave potential buffer + * overflows lying around, regardless. + */ + append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0, + vmcoreinfo_data, + min(vmcoreinfo_size, notes_len - i)); tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos); if (copy_to_user(buffer, notes + *fpos - notes_offset, tsz)) { diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index b511f6d24b42..525510a9f965 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -60,6 +60,8 @@ phys_addr_t paddr_vmcoreinfo_note(void); #define VMCOREINFO_CONFIG(name) \ vmcoreinfo_append_str("CONFIG_%s=y\n", #name) +extern unsigned char *vmcoreinfo_data; +extern size_t vmcoreinfo_size; extern u32 *vmcoreinfo_note; Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, diff --git a/kernel/crash_core.c b/kernel/crash_core.c index e7b4025c7b24..a683bfb0530f 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -14,8 +14,8 @@ #include /* vmcoreinfo stuff */ -static unsigned char *vmcoreinfo_data; -static size_t vmcoreinfo_size; +unsigned char *vmcoreinfo_data; +size_t vmcoreinfo_size; u32 *vmcoreinfo_note; /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ -- cgit v1.2.3 From fc37191272a972d449bab3d8247cf52cadf5d4e6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 21 Aug 2018 21:55:38 -0700 Subject: userns: use refcount_t for reference counting instead atomic_t refcount_t type and corresponding API should be used instead of atomic_t wh en the variable is used as a reference counter. This avoids accidental refcounter overflows that might lead to use-after-free situations. Link: http://lkml.kernel.org/r/20180703200141.28415-6-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Suggested-by: Peter Zijlstra Acked-by: Peter Zijlstra (Intel) Reviewed-by: Andrew Morton Cc: "Eric W. Biederman" Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/user.h | 5 +++-- kernel/user.c | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 96fe289c4c6e..39ad98c09c58 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -4,6 +4,7 @@ #include #include +#include #include struct key; @@ -12,7 +13,7 @@ struct key; * Some day this will be a full-fledged user tracking system.. */ struct user_struct { - atomic_t __count; /* reference count */ + refcount_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ #ifdef CONFIG_FANOTIFY @@ -59,7 +60,7 @@ extern struct user_struct root_user; extern struct user_struct * alloc_uid(kuid_t); static inline struct user_struct *get_uid(struct user_struct *u) { - atomic_inc(&u->__count); + refcount_inc(&u->__count); return u; } extern void free_uid(struct user_struct *); diff --git a/kernel/user.c b/kernel/user.c index 36288d840675..5f65ef195259 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -96,7 +96,7 @@ static DEFINE_SPINLOCK(uidhash_lock); /* root_user.__count is 1, for init task cred */ struct user_struct root_user = { - .__count = ATOMIC_INIT(1), + .__count = REFCOUNT_INIT(1), .processes = ATOMIC_INIT(1), .sigpending = ATOMIC_INIT(0), .locked_shm = 0, @@ -123,7 +123,7 @@ static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent) hlist_for_each_entry(user, hashent, uidhash_node) { if (uid_eq(user->uid, uid)) { - atomic_inc(&user->__count); + refcount_inc(&user->__count); return user; } } @@ -170,7 +170,7 @@ void free_uid(struct user_struct *up) return; local_irq_save(flags); - if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) + if (refcount_dec_and_lock(&up->__count, &uidhash_lock)) free_user(up, flags); else local_irq_restore(flags); @@ -191,7 +191,7 @@ struct user_struct *alloc_uid(kuid_t uid) goto out_unlock; new->uid = uid; - atomic_set(&new->__count, 1); + refcount_set(&new->__count, 1); ratelimit_state_init(&new->ratelimit, HZ, 100); ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE); -- cgit v1.2.3 From ce0a568d327968367f294ea2425fe2c6f4f33ed2 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Tue, 21 Aug 2018 21:55:42 -0700 Subject: userns: use irqsave variant of refcount_dec_and_lock() The irqsave variant of refcount_dec_and_lock handles irqsave/restore when taking/releasing the spin lock. With this variant the call of local_irq_save/restore is no longer required. [bigeasy@linutronix.de: s@atomic_dec_and_lock@refcount_dec_and_lock@g] Link: http://lkml.kernel.org/r/20180703200141.28415-7-bigeasy@linutronix.de Signed-off-by: Anna-Maria Gleixner Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Cc: "Eric W. Biederman" Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/user.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/user.c b/kernel/user.c index 5f65ef195259..0df9b1640b2a 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -169,11 +169,8 @@ void free_uid(struct user_struct *up) if (!up) return; - local_irq_save(flags); - if (refcount_dec_and_lock(&up->__count, &uidhash_lock)) + if (refcount_dec_and_lock_irqsave(&up->__count, &uidhash_lock, &flags)) free_user(up, flags); - else - local_irq_restore(flags); } struct user_struct *alloc_uid(kuid_t uid) -- cgit v1.2.3 From 91bc9aaf746ae41016bd6b61a48133e162542574 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 21 Aug 2018 21:55:49 -0700 Subject: kernel/crash_core.c: print timestamp using time64_t The get_seconds() call returns a 32-bit timestamp on some architectures, and will overflow in the future. The newer ktime_get_real_seconds() always returns a 64-bit timestamp that does not suffer from this problem. Link: http://lkml.kernel.org/r/20180618150329.941903-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Reviewed-by: Andrew Morton Cc: Dave Young Cc: Baoquan He Cc: "Kirill A. Shutemov" Cc: Petr Tesarik Cc: Marc-Andr Lureau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/crash_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/crash_core.c b/kernel/crash_core.c index a683bfb0530f..933cb3e45b98 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -344,7 +344,7 @@ void crash_save_vmcoreinfo(void) if (vmcoreinfo_data_safecopy) vmcoreinfo_data = vmcoreinfo_data_safecopy; - vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); + vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds()); update_vmcoreinfo_note(); } -- cgit v1.2.3 From a2e514453861dd39b53b7a50b6771bd3f9852078 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 21 Aug 2018 21:55:52 -0700 Subject: kernel/hung_task.c: allow to set checking interval separately from timeout Currently task hung checking interval is equal to timeout, as the result hung is detected anywhere between timeout and 2*timeout. This is fine for most interactive environments, but this hurts automated testing setups (syzbot). In an automated setup we need to strictly order CPU lockup < RCU stall < workqueue lockup < task hung < silent loss, so that RCU stall is not detected as task hung and task hung is not detected as silent machine loss. The large variance in task hung detection timeout requires setting silent machine loss timeout to a very large value (e.g. if task hung is 3 mins, then silent loss need to be set to ~7 mins). The additional 3 minutes significantly reduce testing efficiency because usually we crash kernel within a minute, and this can add hours to bug localization process as it needs to do dozens of tests. Allow setting checking interval separately from timeout. This allows to set timeout to, say, 3 minutes, but checking interval to 10 secs. The interval is controlled via a new hung_task_check_interval_secs sysctl, similar to the existing hung_task_timeout_secs sysctl. The default value of 0 results in the current behavior: checking interval is equal to timeout. [akpm@linux-foundation.org: update hung_task_timeout_max's comment] Link: http://lkml.kernel.org/r/20180611111004.203513-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Paul E. McKenney Cc: Tetsuo Handa Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/kernel.txt | 15 ++++++++++++++- include/linux/sched.h | 1 + include/linux/sched/sysctl.h | 1 + kernel/fork.c | 1 + kernel/hung_task.c | 15 ++++++++++++++- kernel/sysctl.c | 13 ++++++++++++- 6 files changed, 43 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 59585030cbaf..9d38e75b19a0 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -38,6 +38,7 @@ show up in /proc/sys/kernel: - hung_task_panic - hung_task_check_count - hung_task_timeout_secs +- hung_task_check_interval_secs - hung_task_warnings - hyperv_record_panic_msg - kexec_load_disabled @@ -355,7 +356,7 @@ This file shows up if CONFIG_DETECT_HUNG_TASK is enabled. hung_task_timeout_secs: -Check interval. When a task in D state did not get scheduled +When a task in D state did not get scheduled for more than this value report a warning. This file shows up if CONFIG_DETECT_HUNG_TASK is enabled. @@ -364,6 +365,18 @@ Possible values to set are in range {0..LONG_MAX/HZ}. ============================================================== +hung_task_check_interval_secs: + +Hung task check interval. If hung task checking is enabled +(see hung_task_timeout_secs), the check is done every +hung_task_check_interval_secs seconds. +This file shows up if CONFIG_DETECT_HUNG_TASK is enabled. + +0 (default): means use hung_task_timeout_secs as checking interval. +Possible values to set are in range {0..LONG_MAX/HZ}. + +============================================================== + hung_task_warnings: The maximum number of warnings to report. During a check interval diff --git a/include/linux/sched.h b/include/linux/sched.h index 789923fbee3a..58eb3a2bc695 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -853,6 +853,7 @@ struct task_struct { #endif #ifdef CONFIG_DETECT_HUNG_TASK unsigned long last_switch_count; + unsigned long last_switch_time; #endif /* Filesystem information: */ struct fs_struct *fs; diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 913488d828cb..a9c32daeb9d8 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -10,6 +10,7 @@ struct ctl_table; extern int sysctl_hung_task_check_count; extern unsigned int sysctl_hung_task_panic; extern unsigned long sysctl_hung_task_timeout_secs; +extern unsigned long sysctl_hung_task_check_interval_secs; extern int sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, void __user *buffer, diff --git a/kernel/fork.c b/kernel/fork.c index 8c760effa42e..8bcef2413f1b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1302,6 +1302,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) tsk->nvcsw = tsk->nivcsw = 0; #ifdef CONFIG_DETECT_HUNG_TASK tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; + tsk->last_switch_time = 0; #endif tsk->mm = NULL; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 32b479468e4d..b9132d1269ef 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -40,6 +40,11 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; */ unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT; +/* + * Zero (default value) means use sysctl_hung_task_timeout_secs: + */ +unsigned long __read_mostly sysctl_hung_task_check_interval_secs; + int __read_mostly sysctl_hung_task_warnings = 10; static int __read_mostly did_panic; @@ -98,8 +103,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) if (switch_count != t->last_switch_count) { t->last_switch_count = switch_count; + t->last_switch_time = jiffies; return; } + if (time_is_after_jiffies(t->last_switch_time + timeout * HZ)) + return; trace_sched_process_hang(t); @@ -245,8 +253,13 @@ static int watchdog(void *dummy) for ( ; ; ) { unsigned long timeout = sysctl_hung_task_timeout_secs; - long t = hung_timeout_jiffies(hung_last_checked, timeout); + unsigned long interval = sysctl_hung_task_check_interval_secs; + long t; + if (interval == 0) + interval = timeout; + interval = min_t(unsigned long, interval, timeout); + t = hung_timeout_jiffies(hung_last_checked, interval); if (t <= 0) { if (!atomic_xchg(&reset_hung_task, 0)) check_hung_uninterruptible_tasks(timeout); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f22f76b7a138..dbdcb6673b27 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -145,7 +145,10 @@ static int minolduid; static int ngroups_max = NGROUPS_MAX; static const int cap_last_cap = CAP_LAST_CAP; -/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */ +/* + * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs + * and hung_task_check_interval_secs + */ #ifdef CONFIG_DETECT_HUNG_TASK static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); #endif @@ -1090,6 +1093,14 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dohung_task_timeout_secs, .extra2 = &hung_task_timeout_max, }, + { + .procname = "hung_task_check_interval_secs", + .data = &sysctl_hung_task_check_interval_secs, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_dohung_task_timeout_secs, + .extra2 = &hung_task_timeout_max, + }, { .procname = "hung_task_warnings", .data = &sysctl_hung_task_warnings, -- cgit v1.2.3 From 7290d58095712a89f845e1bca05334796dd49ed2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 21 Aug 2018 21:56:09 -0700 Subject: module: use relative references for __ksymtab entries An ordinary arm64 defconfig build has ~64 KB worth of __ksymtab entries, each consisting of two 64-bit fields containing absolute references, to the symbol itself and to a char array containing its name, respectively. When we build the same configuration with KASLR enabled, we end up with an additional ~192 KB of relocations in the .init section, i.e., one 24 byte entry for each absolute reference, which all need to be processed at boot time. Given how the struct kernel_symbol that describes each entry is completely local to module.c (except for the references emitted by EXPORT_SYMBOL() itself), we can easily modify it to contain two 32-bit relative references instead. This reduces the size of the __ksymtab section by 50% for all 64-bit architectures, and gets rid of the runtime relocations entirely for architectures implementing KASLR, either via standard PIE linking (arm64) or using custom host tools (x86). Note that the binary search involving __ksymtab contents relies on each section being sorted by symbol name. This is implemented based on the input section names, not the names in the ksymtab entries, so this patch does not interfere with that. Given that the use of place-relative relocations requires support both in the toolchain and in the module loader, we cannot enable this feature for all architectures. So make it dependent on whether CONFIG_HAVE_ARCH_PREL32_RELOCATIONS is defined. Link: http://lkml.kernel.org/r/20180704083651.24360-4-ard.biesheuvel@linaro.org Signed-off-by: Ard Biesheuvel Acked-by: Jessica Yu Acked-by: Michael Ellerman Reviewed-by: Will Deacon Acked-by: Ingo Molnar Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: James Morris Cc: James Morris Cc: Josh Poimboeuf Cc: Kees Cook Cc: Nicolas Pitre Cc: Paul Mackerras Cc: Petr Mladek Cc: Russell King Cc: "Serge E. Hallyn" Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Thomas Garnier Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/Kbuild | 1 + arch/x86/include/asm/export.h | 5 ----- include/asm-generic/export.h | 12 +++++++++-- include/linux/compiler.h | 19 ++++++++++++++++++ include/linux/export.h | 46 ++++++++++++++++++++++++++++++++----------- kernel/module.c | 32 ++++++++++++++++++++++++------ 6 files changed, 91 insertions(+), 24 deletions(-) delete mode 100644 arch/x86/include/asm/export.h (limited to 'kernel') diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index de690c2d2e33..a0ab9ab61c75 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -8,5 +8,6 @@ generated-y += xen-hypercalls.h generic-y += dma-contiguous.h generic-y += early_ioremap.h +generic-y += export.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h deleted file mode 100644 index 2a51d66689c5..000000000000 --- a/arch/x86/include/asm/export.h +++ /dev/null @@ -1,5 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifdef CONFIG_64BIT -#define KSYM_ALIGN 16 -#endif -#include diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h index 68efb950a918..4d73e6e3c66c 100644 --- a/include/asm-generic/export.h +++ b/include/asm-generic/export.h @@ -5,12 +5,10 @@ #define KSYM_FUNC(x) x #endif #ifdef CONFIG_64BIT -#define __put .quad #ifndef KSYM_ALIGN #define KSYM_ALIGN 8 #endif #else -#define __put .long #ifndef KSYM_ALIGN #define KSYM_ALIGN 4 #endif @@ -19,6 +17,16 @@ #define KCRC_ALIGN 4 #endif +.macro __put, val, name +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS + .long \val - ., \name - . +#elif defined(CONFIG_64BIT) + .quad \val, \name +#else + .long \val, \name +#endif +.endm + /* * note on .section use: @progbits vs %progbits nastiness doesn't matter, * since we immediately emit into those sections anyway. diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c8eab637a2a7..681d866efb1e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -280,6 +280,25 @@ unsigned long read_word_at_a_time(const void *addr) #endif /* __KERNEL__ */ +/* + * Force the compiler to emit 'sym' as a symbol, so that we can reference + * it from inline assembler. Necessary in case 'sym' could be inlined + * otherwise, or eliminated entirely due to lack of references that are + * visible to the compiler. + */ +#define __ADDRESSABLE(sym) \ + static void * __attribute__((section(".discard.addressable"), used)) \ + __PASTE(__addressable_##sym, __LINE__) = (void *)&sym; + +/** + * offset_to_ptr - convert a relative memory offset to an absolute pointer + * @off: the address of the 32-bit offset value + */ +static inline void *offset_to_ptr(const int *off) +{ + return (void *)((unsigned long)off + *off); +} + #endif /* __ASSEMBLY__ */ #ifndef __optimize diff --git a/include/linux/export.h b/include/linux/export.h index ea7df303d68d..ae072bc5aacf 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -18,12 +18,6 @@ #define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x) #ifndef __ASSEMBLY__ -struct kernel_symbol -{ - unsigned long value; - const char *name; -}; - #ifdef MODULE extern struct module __this_module; #define THIS_MODULE (&__this_module) @@ -54,17 +48,47 @@ extern struct module __this_module; #define __CRC_SYMBOL(sym, sec) #endif +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +#include +/* + * Emit the ksymtab entry as a pair of relative references: this reduces + * the size by half on 64-bit architectures, and eliminates the need for + * absolute relocations that require runtime processing on relocatable + * kernels. + */ +#define __KSYMTAB_ENTRY(sym, sec) \ + __ADDRESSABLE(sym) \ + asm(" .section \"___ksymtab" sec "+" #sym "\", \"a\" \n" \ + " .balign 8 \n" \ + "__ksymtab_" #sym ": \n" \ + " .long " #sym "- . \n" \ + " .long __kstrtab_" #sym "- . \n" \ + " .previous \n") + +struct kernel_symbol { + int value_offset; + int name_offset; +}; +#else +#define __KSYMTAB_ENTRY(sym, sec) \ + static const struct kernel_symbol __ksymtab_##sym \ + __attribute__((section("___ksymtab" sec "+" #sym), used)) \ + = { (unsigned long)&sym, __kstrtab_##sym } + +struct kernel_symbol { + unsigned long value; + const char *name; +}; +#endif + /* For every exported symbol, place a struct in the __ksymtab section */ #define ___EXPORT_SYMBOL(sym, sec) \ extern typeof(sym) sym; \ __CRC_SYMBOL(sym, sec) \ static const char __kstrtab_##sym[] \ - __attribute__((section("__ksymtab_strings"), aligned(1))) \ + __attribute__((section("__ksymtab_strings"), used, aligned(1))) \ = #sym; \ - static const struct kernel_symbol __ksymtab_##sym \ - __used \ - __attribute__((section("___ksymtab" sec "+" #sym), used)) \ - = { (unsigned long)&sym, __kstrtab_##sym } + __KSYMTAB_ENTRY(sym, sec) #if defined(__DISABLE_EXPORTS) diff --git a/kernel/module.c b/kernel/module.c index b046a32520d8..6746c85511fe 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -529,12 +529,30 @@ static bool check_symbol(const struct symsearch *syms, return true; } +static unsigned long kernel_symbol_value(const struct kernel_symbol *sym) +{ +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS + return (unsigned long)offset_to_ptr(&sym->value_offset); +#else + return sym->value; +#endif +} + +static const char *kernel_symbol_name(const struct kernel_symbol *sym) +{ +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS + return offset_to_ptr(&sym->name_offset); +#else + return sym->name; +#endif +} + static int cmp_name(const void *va, const void *vb) { const char *a; const struct kernel_symbol *b; a = va; b = vb; - return strcmp(a, b->name); + return strcmp(a, kernel_symbol_name(b)); } static bool find_symbol_in_section(const struct symsearch *syms, @@ -2170,7 +2188,7 @@ void *__symbol_get(const char *symbol) sym = NULL; preempt_enable(); - return sym ? (void *)sym->value : NULL; + return sym ? (void *)kernel_symbol_value(sym) : NULL; } EXPORT_SYMBOL_GPL(__symbol_get); @@ -2200,10 +2218,12 @@ static int verify_export_symbols(struct module *mod) for (i = 0; i < ARRAY_SIZE(arr); i++) { for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { - if (find_symbol(s->name, &owner, NULL, true, false)) { + if (find_symbol(kernel_symbol_name(s), &owner, NULL, + true, false)) { pr_err("%s: exports duplicate symbol %s" " (owned by %s)\n", - mod->name, s->name, module_name(owner)); + mod->name, kernel_symbol_name(s), + module_name(owner)); return -ENOEXEC; } } @@ -2252,7 +2272,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) ksym = resolve_symbol_wait(mod, info, name); /* Ok if resolved. */ if (ksym && !IS_ERR(ksym)) { - sym[i].st_value = ksym->value; + sym[i].st_value = kernel_symbol_value(ksym); break; } @@ -2516,7 +2536,7 @@ static int is_exported(const char *name, unsigned long value, ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); else ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); - return ks != NULL && ks->value == value; + return ks != NULL && kernel_symbol_value(ks) == value; } /* As per nm */ -- cgit v1.2.3 From 1b1eeca7e4c19fa76d409d4c7b338dba21f2df45 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 21 Aug 2018 21:56:13 -0700 Subject: init: allow initcall tables to be emitted using relative references Allow the initcall tables to be emitted using relative references that are only half the size on 64-bit architectures and don't require fixups at runtime on relocatable kernels. Link: http://lkml.kernel.org/r/20180704083651.24360-5-ard.biesheuvel@linaro.org Acked-by: James Morris Acked-by: Sergey Senozhatsky Acked-by: Petr Mladek Acked-by: Michael Ellerman Acked-by: Ingo Molnar Signed-off-by: Ard Biesheuvel Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: James Morris Cc: Jessica Yu Cc: Josh Poimboeuf Cc: Kees Cook Cc: Nicolas Pitre Cc: Paul Mackerras Cc: Russell King Cc: "Serge E. Hallyn" Cc: Steven Rostedt Cc: Thomas Garnier Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init.h | 44 +++++++++++++++++++++++++++++++++----------- init/main.c | 32 ++++++++++++++++---------------- kernel/printk/printk.c | 16 +++++++++------- security/security.c | 17 ++++++++++------- 4 files changed, 68 insertions(+), 41 deletions(-) (limited to 'kernel') diff --git a/include/linux/init.h b/include/linux/init.h index bc27cf03c41e..2538d176dd1f 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -116,8 +116,24 @@ typedef int (*initcall_t)(void); typedef void (*exitcall_t)(void); -extern initcall_t __con_initcall_start[], __con_initcall_end[]; -extern initcall_t __security_initcall_start[], __security_initcall_end[]; +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +typedef int initcall_entry_t; + +static inline initcall_t initcall_from_entry(initcall_entry_t *entry) +{ + return offset_to_ptr(entry); +} +#else +typedef initcall_t initcall_entry_t; + +static inline initcall_t initcall_from_entry(initcall_entry_t *entry) +{ + return *entry; +} +#endif + +extern initcall_entry_t __con_initcall_start[], __con_initcall_end[]; +extern initcall_entry_t __security_initcall_start[], __security_initcall_end[]; /* Used for contructor calls. */ typedef void (*ctor_fn_t)(void); @@ -167,9 +183,20 @@ extern bool initcall_debug; * as KEEP() in the linker script. */ -#define __define_initcall(fn, id) \ +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +#define ___define_initcall(fn, id, __sec) \ + __ADDRESSABLE(fn) \ + asm(".section \"" #__sec ".init\", \"a\" \n" \ + "__initcall_" #fn #id ": \n" \ + ".long " #fn " - . \n" \ + ".previous \n"); +#else +#define ___define_initcall(fn, id, __sec) \ static initcall_t __initcall_##fn##id __used \ - __attribute__((__section__(".initcall" #id ".init"))) = fn; + __attribute__((__section__(#__sec ".init"))) = fn; +#endif + +#define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id) /* * Early initcalls run before initializing SMP. @@ -208,13 +235,8 @@ extern bool initcall_debug; #define __exitcall(fn) \ static exitcall_t __exitcall_##fn __exit_call = fn -#define console_initcall(fn) \ - static initcall_t __initcall_##fn \ - __used __section(.con_initcall.init) = fn - -#define security_initcall(fn) \ - static initcall_t __initcall_##fn \ - __used __section(.security_initcall.init) = fn +#define console_initcall(fn) ___define_initcall(fn,, .con_initcall) +#define security_initcall(fn) ___define_initcall(fn,, .security_initcall) struct obs_kernel_param { const char *str; diff --git a/init/main.c b/init/main.c index b729e1f22838..3a6ce89e128f 100644 --- a/init/main.c +++ b/init/main.c @@ -902,18 +902,18 @@ int __init_or_module do_one_initcall(initcall_t fn) } -extern initcall_t __initcall_start[]; -extern initcall_t __initcall0_start[]; -extern initcall_t __initcall1_start[]; -extern initcall_t __initcall2_start[]; -extern initcall_t __initcall3_start[]; -extern initcall_t __initcall4_start[]; -extern initcall_t __initcall5_start[]; -extern initcall_t __initcall6_start[]; -extern initcall_t __initcall7_start[]; -extern initcall_t __initcall_end[]; - -static initcall_t *initcall_levels[] __initdata = { +extern initcall_entry_t __initcall_start[]; +extern initcall_entry_t __initcall0_start[]; +extern initcall_entry_t __initcall1_start[]; +extern initcall_entry_t __initcall2_start[]; +extern initcall_entry_t __initcall3_start[]; +extern initcall_entry_t __initcall4_start[]; +extern initcall_entry_t __initcall5_start[]; +extern initcall_entry_t __initcall6_start[]; +extern initcall_entry_t __initcall7_start[]; +extern initcall_entry_t __initcall_end[]; + +static initcall_entry_t *initcall_levels[] __initdata = { __initcall0_start, __initcall1_start, __initcall2_start, @@ -939,7 +939,7 @@ static char *initcall_level_names[] __initdata = { static void __init do_initcall_level(int level) { - initcall_t *fn; + initcall_entry_t *fn; strcpy(initcall_command_line, saved_command_line); parse_args(initcall_level_names[level], @@ -950,7 +950,7 @@ static void __init do_initcall_level(int level) trace_initcall_level(initcall_level_names[level]); for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++) - do_one_initcall(*fn); + do_one_initcall(initcall_from_entry(fn)); } static void __init do_initcalls(void) @@ -981,11 +981,11 @@ static void __init do_basic_setup(void) static void __init do_pre_smp_initcalls(void) { - initcall_t *fn; + initcall_entry_t *fn; trace_initcall_level("early"); for (fn = __initcall_start; fn < __initcall0_start; fn++) - do_one_initcall(*fn); + do_one_initcall(initcall_from_entry(fn)); } /* diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 90b6ab01db59..918f386b2f6e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2788,7 +2788,8 @@ EXPORT_SYMBOL(unregister_console); void __init console_init(void) { int ret; - initcall_t *call; + initcall_t call; + initcall_entry_t *ce; /* Setup the default TTY line discipline. */ n_tty_init(); @@ -2797,13 +2798,14 @@ void __init console_init(void) * set up the console device so that later boot sequences can * inform about problems etc.. */ - call = __con_initcall_start; + ce = __con_initcall_start; trace_initcall_level("console"); - while (call < __con_initcall_end) { - trace_initcall_start((*call)); - ret = (*call)(); - trace_initcall_finish((*call), ret); - call++; + while (ce < __con_initcall_end) { + call = initcall_from_entry(ce); + trace_initcall_start(call); + ret = call(); + trace_initcall_finish(call, ret); + ce++; } } diff --git a/security/security.c b/security/security.c index 47cfff01d7ec..736e78da1ab9 100644 --- a/security/security.c +++ b/security/security.c @@ -48,14 +48,17 @@ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] = static void __init do_security_initcalls(void) { int ret; - initcall_t *call; - call = __security_initcall_start; + initcall_t call; + initcall_entry_t *ce; + + ce = __security_initcall_start; trace_initcall_level("security"); - while (call < __security_initcall_end) { - trace_initcall_start((*call)); - ret = (*call) (); - trace_initcall_finish((*call), ret); - call++; + while (ce < __security_initcall_end) { + call = initcall_from_entry(ce); + trace_initcall_start(call); + ret = call(); + trace_initcall_finish(call, ret); + ce++; } } -- cgit v1.2.3 From 46e0c9be206fa7b11aca75da2d6b8535d0139752 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 21 Aug 2018 21:56:22 -0700 Subject: kernel: tracepoints: add support for relative references To avoid the need for relocating absolute references to tracepoint structures at boot time when running relocatable kernels (which may take a disproportionate amount of space), add the option to emit these tables as relative references instead. Link: http://lkml.kernel.org/r/20180704083651.24360-7-ard.biesheuvel@linaro.org Acked-by: Michael Ellerman Acked-by: Ingo Molnar Acked-by: Steven Rostedt (VMware) Signed-off-by: Ard Biesheuvel Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: James Morris Cc: James Morris Cc: Jessica Yu Cc: Josh Poimboeuf Cc: Kees Cook Cc: Nicolas Pitre Cc: Paul Mackerras Cc: Petr Mladek Cc: Russell King Cc: "Serge E. Hallyn" Cc: Sergey Senozhatsky Cc: Thomas Garnier Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/tracepoint.h | 19 ++++++++++++++---- kernel/tracepoint.c | 49 ++++++++++++++++++++++++---------------------- 2 files changed, 41 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index d9a084c72541..7f2e16e76ac4 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -249,6 +249,19 @@ extern void syscall_unregfunc(void); return static_key_false(&__tracepoint_##name.key); \ } +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +#define __TRACEPOINT_ENTRY(name) \ + asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \ + " .balign 4 \n" \ + " .long __tracepoint_" #name " - . \n" \ + " .previous \n") +#else +#define __TRACEPOINT_ENTRY(name) \ + static struct tracepoint * const __tracepoint_ptr_##name __used \ + __attribute__((section("__tracepoints_ptrs"))) = \ + &__tracepoint_##name +#endif + /* * We have no guarantee that gcc and the linker won't up-align the tracepoint * structures, so we create an array of pointers that will be used for iteration @@ -258,11 +271,9 @@ extern void syscall_unregfunc(void); static const char __tpstrtab_##name[] \ __attribute__((section("__tracepoints_strings"))) = #name; \ struct tracepoint __tracepoint_##name \ - __attribute__((section("__tracepoints"))) = \ + __attribute__((section("__tracepoints"), used)) = \ { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\ - static struct tracepoint * const __tracepoint_ptr_##name __used \ - __attribute__((section("__tracepoints_ptrs"))) = \ - &__tracepoint_##name; + __TRACEPOINT_ENTRY(name); #define DEFINE_TRACE(name) \ DEFINE_TRACE_FN(name, NULL, NULL); diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 96db841bf0fc..bf2c06ef9afc 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -371,6 +371,27 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data) } EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); +static void for_each_tracepoint_range(struct tracepoint * const *begin, + struct tracepoint * const *end, + void (*fct)(struct tracepoint *tp, void *priv), + void *priv) +{ + if (!begin) + return; + + if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) { + const int *iter; + + for (iter = (const int *)begin; iter < (const int *)end; iter++) + fct(offset_to_ptr(iter), priv); + } else { + struct tracepoint * const *iter; + + for (iter = begin; iter < end; iter++) + fct(*iter, priv); + } +} + #ifdef CONFIG_MODULES bool trace_module_has_bad_taint(struct module *mod) { @@ -435,15 +456,9 @@ EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier); * Ensure the tracer unregistered the module's probes before the module * teardown is performed. Prevents leaks of probe and data pointers. */ -static void tp_module_going_check_quiescent(struct tracepoint * const *begin, - struct tracepoint * const *end) +static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv) { - struct tracepoint * const *iter; - - if (!begin) - return; - for (iter = begin; iter < end; iter++) - WARN_ON_ONCE((*iter)->funcs); + WARN_ON_ONCE(tp->funcs); } static int tracepoint_module_coming(struct module *mod) @@ -494,8 +509,9 @@ static void tracepoint_module_going(struct module *mod) * Called the going notifier before checking for * quiescence. */ - tp_module_going_check_quiescent(mod->tracepoints_ptrs, - mod->tracepoints_ptrs + mod->num_tracepoints); + for_each_tracepoint_range(mod->tracepoints_ptrs, + mod->tracepoints_ptrs + mod->num_tracepoints, + tp_module_going_check_quiescent, NULL); break; } } @@ -547,19 +563,6 @@ static __init int init_tracepoints(void) __initcall(init_tracepoints); #endif /* CONFIG_MODULES */ -static void for_each_tracepoint_range(struct tracepoint * const *begin, - struct tracepoint * const *end, - void (*fct)(struct tracepoint *tp, void *priv), - void *priv) -{ - struct tracepoint * const *iter; - - if (!begin) - return; - for (iter = begin; iter < end; iter++) - fct(*iter, priv); -} - /** * for_each_kernel_tracepoint - iteration on all kernel tracepoints * @fct: callback -- cgit v1.2.3 From e05a8e4d88d16e088d83ce679ac3343ac66c936b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 21 Aug 2018 21:56:34 -0700 Subject: sched/wait: assert the wait_queue_head lock is held in __wake_up_common Better ensure we actually hold the lock using lockdep than just commenting on it. Due to the various exported _locked interfaces it is far too easy to get the locking wrong. Link: http://lkml.kernel.org/r/20171214152344.6880-4-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Ingo Molnar Cc: Al Viro Cc: Andrea Arcangeli Cc: Ingo Molnar Cc: Jason Baron Cc: Matthew Wilcox Cc: Mike Rapoport Cc: Peter Zijlstra Cc: Davidlohr Bueso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched/wait.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 870f97b313e3..5dd47f1103d1 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -69,6 +69,8 @@ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, wait_queue_entry_t *curr, *next; int cnt = 0; + lockdep_assert_held(&wq_head->lock); + if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) { curr = list_next_entry(bookmark, entry); -- cgit v1.2.3 From 52cba1a274818c3ee6299c1a1b476e7bc5037a2f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 21:59:51 -0700 Subject: signal: make force_sigsegv() void Patch series "signal: refactor some functions", v3. This series refactors a bunch of functions in signal.c to simplify parts of the code. The greatest single change is declaring the static do_sigpending() helper as void which makes it possible to remove a bunch of unnecessary checks in the syscalls later on. This patch (of 17): force_sigsegv() returned 0 unconditionally so it doesn't make sense to have it return at all. In addition, there are no callers that check force_sigsegv()'s return value. Link: http://lkml.kernel.org/r/20180602103653.18181-2-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Al Viro Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/signal.h | 2 +- kernel/signal.c | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 113d1ad1ced7..e138ac16c650 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -314,7 +314,7 @@ int force_sig_pkuerr(void __user *addr, u32 pkey); int force_sig_ptrace_errno_trap(int errno, void __user *addr); extern int send_sig_info(int, struct siginfo *, struct task_struct *); -extern int force_sigsegv(int, struct task_struct *); +extern void force_sigsegv(int sig, struct task_struct *p); extern int force_sig_info(int, struct siginfo *, struct task_struct *); extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); diff --git a/kernel/signal.c b/kernel/signal.c index 8d8a940422a8..8a828baa0f93 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1458,8 +1458,7 @@ send_sig(int sig, struct task_struct *p, int priv) return send_sig_info(sig, __si_special(priv), p); } -void -force_sig(int sig, struct task_struct *p) +void force_sig(int sig, struct task_struct *p) { force_sig_info(sig, SEND_SIG_PRIV, p); } @@ -1470,8 +1469,7 @@ force_sig(int sig, struct task_struct *p) * the problem was already a SIGSEGV, we'll want to * make sure we don't even try to deliver the signal.. */ -int -force_sigsegv(int sig, struct task_struct *p) +void force_sigsegv(int sig, struct task_struct *p) { if (sig == SIGSEGV) { unsigned long flags; @@ -1480,7 +1478,6 @@ force_sigsegv(int sig, struct task_struct *p) spin_unlock_irqrestore(&p->sighand->siglock, flags); } force_sig(SIGSEGV, p); - return 0; } int force_sig_fault(int sig, int code, void __user *addr -- cgit v1.2.3 From bb17fcca078fc56ea7c7611cbe92687021cf6a31 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 21:59:55 -0700 Subject: signal: make kill_as_cred_perm() return bool kill_as_cred_perm() already behaves like a boolean function. Let's actually declare it as such too. Link: http://lkml.kernel.org/r/20180602103653.18181-3-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 8a828baa0f93..5b0fb57b23f0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1339,14 +1339,15 @@ static int kill_proc_info(int sig, struct siginfo *info, pid_t pid) return error; } -static int kill_as_cred_perm(const struct cred *cred, - struct task_struct *target) +static inline bool kill_as_cred_perm(const struct cred *cred, + struct task_struct *target) { const struct cred *pcred = __task_cred(target); - if (!uid_eq(cred->euid, pcred->suid) && !uid_eq(cred->euid, pcred->uid) && - !uid_eq(cred->uid, pcred->suid) && !uid_eq(cred->uid, pcred->uid)) - return 0; - return 1; + + return uid_eq(cred->euid, pcred->suid) || + uid_eq(cred->euid, pcred->uid) || + uid_eq(cred->uid, pcred->suid) || + uid_eq(cred->uid, pcred->uid); } /* like kill_pid_info(), but doesn't use uid/euid of "current" */ -- cgit v1.2.3 From 6527de953354e550bf6d941fdcd1aaf832317e8d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 21:59:59 -0700 Subject: signal: make may_ptrace_stop() return bool may_ptrace_stop() already behaves like a boolean function. Let's actually declare it as such too. Link: http://lkml.kernel.org/r/20180602103653.18181-4-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 5b0fb57b23f0..f03886cd064e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1875,10 +1875,10 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, spin_unlock_irqrestore(&sighand->siglock, flags); } -static inline int may_ptrace_stop(void) +static inline bool may_ptrace_stop(void) { if (!likely(current->ptrace)) - return 0; + return false; /* * Are we in the middle of do_coredump? * If so and our tracer is also part of the coredump stopping @@ -1894,9 +1894,9 @@ static inline int may_ptrace_stop(void) */ if (unlikely(current->mm->core_state) && unlikely(current->mm == current->parent->mm)) - return 0; + return false; - return 1; + return true; } /* -- cgit v1.2.3 From b1d294c80393e7ba18b3c646fe9d8e6a206c7988 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 22:00:02 -0700 Subject: signal: make do_sigpending() void do_sigpending() returned 0 unconditionally so it doesn't make sense to have it return at all. This allows us to simplify a bunch of syscall callers. Link: http://lkml.kernel.org/r/20180602103653.18181-5-christian@brauner.io Signed-off-by: Christian Brauner Acked-by: Al Viro Acked-by: Oleg Nesterov Reviewed-by: Andrew Morton Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index f03886cd064e..10918c7b2ee3 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2753,7 +2753,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset, } #endif -static int do_sigpending(sigset_t *set) +static void do_sigpending(sigset_t *set) { spin_lock_irq(¤t->sighand->siglock); sigorsets(set, ¤t->pending.signal, @@ -2762,7 +2762,6 @@ static int do_sigpending(sigset_t *set) /* Outside the lock because only this thread touches it. */ sigandsets(set, ¤t->blocked, set); - return 0; } /** @@ -2774,15 +2773,16 @@ static int do_sigpending(sigset_t *set) SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize) { sigset_t set; - int err; if (sigsetsize > sizeof(*uset)) return -EINVAL; - err = do_sigpending(&set); - if (!err && copy_to_user(uset, &set, sigsetsize)) - err = -EFAULT; - return err; + do_sigpending(&set); + + if (copy_to_user(uset, &set, sigsetsize)) + return -EFAULT; + + return 0; } #ifdef CONFIG_COMPAT @@ -2790,15 +2790,13 @@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset, compat_size_t, sigsetsize) { sigset_t set; - int err; if (sigsetsize > sizeof(*uset)) return -EINVAL; - err = do_sigpending(&set); - if (!err) - err = put_compat_sigset(uset, &set, sigsetsize); - return err; + do_sigpending(&set); + + return put_compat_sigset(uset, &set, sigsetsize); } #endif @@ -3560,25 +3558,26 @@ int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp) SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, uset) { sigset_t set; - int err; if (sizeof(old_sigset_t) > sizeof(*uset)) return -EINVAL; - err = do_sigpending(&set); - if (!err && copy_to_user(uset, &set, sizeof(old_sigset_t))) - err = -EFAULT; - return err; + do_sigpending(&set); + + if (copy_to_user(uset, &set, sizeof(old_sigset_t))) + return -EFAULT; + + return 0; } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set32) { sigset_t set; - int err = do_sigpending(&set); - if (!err) - err = put_user(set.sig[0], set32); - return err; + + do_sigpending(&set); + + return put_user(set.sig[0], set32); } #endif -- cgit v1.2.3 From d8f993b3dba05ff354f9b9592afc44b1b3dbfd46 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 22:00:07 -0700 Subject: signal: simplify rt_sigaction() The goto is not needed and does not add any clarity. Simply return -EINVAL on unexpected sigset_t struct size directly. Link: http://lkml.kernel.org/r/20180602103653.18181-6-christian@brauner.io Signed-off-by: Christian Brauner Acked-by: Oleg Nesterov Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 10918c7b2ee3..adeee5095039 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3648,25 +3648,23 @@ SYSCALL_DEFINE4(rt_sigaction, int, sig, size_t, sigsetsize) { struct k_sigaction new_sa, old_sa; - int ret = -EINVAL; + int ret; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(sigset_t)) - goto out; + return -EINVAL; - if (act) { - if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa))) - return -EFAULT; - } + if (act && copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa))) + return -EFAULT; ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL); + if (ret) + return ret; - if (!ret && oact) { - if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa))) - return -EFAULT; - } -out: - return ret; + if (oact && copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa))) + return -EFAULT; + + return 0; } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig, -- cgit v1.2.3 From 2a9b90940957d3c0f744011b02b4575323be5947 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 22:00:11 -0700 Subject: signal: make kill_ok_by_cred() return bool kill_ok_by_cred() already behaves like a boolean function. Let's actually declare it as such too. Link: http://lkml.kernel.org/r/20180602103653.18181-7-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index adeee5095039..0e1bb87415c4 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -717,21 +717,16 @@ static inline bool si_fromuser(const struct siginfo *info) /* * called with RCU read lock from check_kill_permission() */ -static int kill_ok_by_cred(struct task_struct *t) +static bool kill_ok_by_cred(struct task_struct *t) { const struct cred *cred = current_cred(); const struct cred *tcred = __task_cred(t); - if (uid_eq(cred->euid, tcred->suid) || - uid_eq(cred->euid, tcred->uid) || - uid_eq(cred->uid, tcred->suid) || - uid_eq(cred->uid, tcred->uid)) - return 1; - - if (ns_capable(tcred->user_ns, CAP_KILL)) - return 1; - - return 0; + return uid_eq(cred->euid, tcred->suid) || + uid_eq(cred->euid, tcred->uid) || + uid_eq(cred->uid, tcred->suid) || + uid_eq(cred->uid, tcred->uid) || + ns_capable(tcred->user_ns, CAP_KILL); } /* -- cgit v1.2.3 From e4a8b4efbfdf8ce86b1d410cc96d2790a05f1fd5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 22:00:15 -0700 Subject: signal: make sig_handler_ignored() return bool sig_handler_ignored() already behaves like a boolean function. Let's actually declare it as such too. Link: http://lkml.kernel.org/r/20180602103653.18181-8-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 0e1bb87415c4..a032a9bae5fb 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -65,11 +65,11 @@ static void __user *sig_handler(struct task_struct *t, int sig) return t->sighand->action[sig - 1].sa.sa_handler; } -static int sig_handler_ignored(void __user *handler, int sig) +static inline bool sig_handler_ignored(void __user *handler, int sig) { /* Is it explicitly or implicitly ignored? */ return handler == SIG_IGN || - (handler == SIG_DFL && sig_kernel_ignore(sig)); + (handler == SIG_DFL && sig_kernel_ignore(sig)); } static int sig_task_ignored(struct task_struct *t, int sig, bool force) -- cgit v1.2.3 From 41aaa481197dc566f691d403e0247bb53a017770 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 22:00:19 -0700 Subject: signal: make sig_task_ignored() return bool sig_task_ignored() already behaves like a boolean function. Let's actually declare it as such too. Link: http://lkml.kernel.org/r/20180602103653.18181-9-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index a032a9bae5fb..6e92adddd667 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -72,7 +72,7 @@ static inline bool sig_handler_ignored(void __user *handler, int sig) (handler == SIG_DFL && sig_kernel_ignore(sig)); } -static int sig_task_ignored(struct task_struct *t, int sig, bool force) +static bool sig_task_ignored(struct task_struct *t, int sig, bool force) { void __user *handler; @@ -80,7 +80,7 @@ static int sig_task_ignored(struct task_struct *t, int sig, bool force) if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && handler == SIG_DFL && !(force && sig_kernel_only(sig))) - return 1; + return true; return sig_handler_ignored(handler, sig); } -- cgit v1.2.3 From 6a0cdcd78892d4508e77cbec913eaf099ab4a8e9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 22:00:23 -0700 Subject: signal: make sig_ignored() return bool sig_ignored() already behaves like a boolean function. Let's actually declare it as such too. Link: http://lkml.kernel.org/r/20180602103653.18181-10-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 6e92adddd667..bcd4272639ca 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -85,7 +85,7 @@ static bool sig_task_ignored(struct task_struct *t, int sig, bool force) return sig_handler_ignored(handler, sig); } -static int sig_ignored(struct task_struct *t, int sig, bool force) +static bool sig_ignored(struct task_struct *t, int sig, bool force) { /* * Blocked signals are never ignored, since the @@ -93,7 +93,7 @@ static int sig_ignored(struct task_struct *t, int sig, bool force) * unblocked. */ if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) - return 0; + return false; /* * Tracers may want to know about even ignored signal unless it @@ -101,7 +101,7 @@ static int sig_ignored(struct task_struct *t, int sig, bool force) * by SIGNAL_UNKILLABLE task. */ if (t->ptrace && sig != SIGKILL) - return 0; + return false; return sig_task_ignored(t, sig, force); } -- cgit v1.2.3 From 938696a82974c33b93be9657b72521bc84f5c587 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 22:00:27 -0700 Subject: signal: make has_pending_signals() return bool has_pending_signals() already behaves like a boolean function. Let's actually declare it as such too. Link: http://lkml.kernel.org/r/20180602103653.18181-11-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index bcd4272639ca..e00e96a7b24f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -110,7 +110,7 @@ static bool sig_ignored(struct task_struct *t, int sig, bool force) * Re-calculate pending state from the set of locally pending * signals, globally pending signals, and blocked signals. */ -static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked) +static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked) { unsigned long ready; long i; -- cgit v1.2.3 From 09ae854edb2d275d98a874c21c24c58cee4df14e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 22:00:30 -0700 Subject: signal: make recalc_sigpending_tsk() return bool recalc_sigpending_tsk() already behaves like a boolean function. Let's actually declare it as such too. Link: http://lkml.kernel.org/r/20180602103653.18181-12-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index e00e96a7b24f..7eec2dba597e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -138,20 +138,21 @@ static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked) #define PENDING(p,b) has_pending_signals(&(p)->signal, (b)) -static int recalc_sigpending_tsk(struct task_struct *t) +static bool recalc_sigpending_tsk(struct task_struct *t) { if ((t->jobctl & JOBCTL_PENDING_MASK) || PENDING(&t->pending, &t->blocked) || PENDING(&t->signal->shared_pending, &t->blocked)) { set_tsk_thread_flag(t, TIF_SIGPENDING); - return 1; + return true; } + /* * We must never clear the flag in another thread, or in current * when it's possible the current syscall is returning -ERESTART*. * So we don't clear it here, and only callers who know they should do. */ - return 0; + return false; } /* -- cgit v1.2.3 From 67a48a24478841525ba73ec6d64caaf079cf3b7c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 22:00:34 -0700 Subject: signal: make unhandled_signal() return bool unhandled_signal() already behaves like a boolean function. Let's actually declare it as such too. All callers treat it as such too. Link: http://lkml.kernel.org/r/20180602103653.18181-13-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/signal.h | 2 +- kernel/signal.c | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/include/linux/signal.h b/include/linux/signal.h index 3c5200137b24..fa23cae66758 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -287,7 +287,7 @@ static inline void disallow_signal(int sig) extern struct kmem_cache *sighand_cachep; -int unhandled_signal(struct task_struct *tsk, int sig); +extern bool unhandled_signal(struct task_struct *tsk, int sig); /* * In POSIX a signal is sent either to a specific thread (Linux task) diff --git a/kernel/signal.c b/kernel/signal.c index 7eec2dba597e..c10c09fc4ec3 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -505,13 +505,15 @@ flush_signal_handlers(struct task_struct *t, int force_default) } } -int unhandled_signal(struct task_struct *tsk, int sig) +bool unhandled_signal(struct task_struct *tsk, int sig) { void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler; if (is_global_init(tsk)) - return 1; + return true; + if (handler != SIG_IGN && handler != SIG_DFL) - return 0; + return false; + /* if ptraced, let the tracer determine */ return !tsk->ptrace; } -- cgit v1.2.3 From 8f11351eee9524c179361fcbf1538c993f2390e4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 22:00:38 -0700 Subject: signal: make flush_sigqueue_mask() void The return value of flush_sigqueue_mask() is never checked anywhere. Link: http://lkml.kernel.org/r/20180602103653.18181-14-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index c10c09fc4ec3..fe0e9e82ce44 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -687,14 +687,14 @@ void signal_wake_up_state(struct task_struct *t, unsigned int state) * * All callers must be holding the siglock. */ -static int flush_sigqueue_mask(sigset_t *mask, struct sigpending *s) +static void flush_sigqueue_mask(sigset_t *mask, struct sigpending *s) { struct sigqueue *q, *n; sigset_t m; sigandsets(&m, mask, &s->signal); if (sigisemptyset(&m)) - return 0; + return; sigandnsets(&s->signal, &s->signal, mask); list_for_each_entry_safe(q, n, &s->list, list) { @@ -703,7 +703,6 @@ static int flush_sigqueue_mask(sigset_t *mask, struct sigpending *s) __sigqueue_free(q); } } - return 1; } static inline int is_si_special(const struct siginfo *info) -- cgit v1.2.3 From acd14e62f075f44cd04e1ac2920a515f9b80146e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 22:00:42 -0700 Subject: signal: make wants_signal() return bool wants_signal() already behaves like a boolean function. Let's actually declare it as such too. Link: http://lkml.kernel.org/r/20180602103653.18181-15-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index fe0e9e82ce44..0e48dbc6649e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -879,16 +879,20 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force) * as soon as they're available, so putting the signal on the shared queue * will be equivalent to sending it to one such thread. */ -static inline int wants_signal(int sig, struct task_struct *p) +static inline bool wants_signal(int sig, struct task_struct *p) { if (sigismember(&p->blocked, sig)) - return 0; + return false; + if (p->flags & PF_EXITING) - return 0; + return false; + if (sig == SIGKILL) - return 1; + return true; + if (task_is_stopped_or_traced(p)) - return 0; + return false; + return task_curr(p) || !signal_pending(p); } -- cgit v1.2.3 From a19e2c01f71bb4b8442db450200a01a28db36a04 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 22:00:46 -0700 Subject: signal: make legacy_queue() return bool legacy_queue() already behaves like a boolean function. Let's actually declare it as such too. Link: http://lkml.kernel.org/r/20180602103653.18181-16-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 0e48dbc6649e..3de1ba2af032 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -972,7 +972,7 @@ static void complete_signal(int sig, struct task_struct *p, int group) return; } -static inline int legacy_queue(struct sigpending *signals, int sig) +static inline bool legacy_queue(struct sigpending *signals, int sig) { return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); } -- cgit v1.2.3 From f99e9d8c5c7f5539e6d97fd04a4ad213e52daac0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 22:00:50 -0700 Subject: signal: make sigkill_pending() return bool sigkill_pending() already behaves like a boolean function. Let's actually declare it as such too. Link: http://lkml.kernel.org/r/20180602103653.18181-17-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 3de1ba2af032..9f9b4183178e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1904,10 +1904,10 @@ static inline bool may_ptrace_stop(void) * Return non-zero if there is a SIGKILL that should be waking us up. * Called with the siglock held. */ -static int sigkill_pending(struct task_struct *tsk) +static bool sigkill_pending(struct task_struct *tsk) { - return sigismember(&tsk->pending.signal, SIGKILL) || - sigismember(&tsk->signal->shared_pending.signal, SIGKILL); + return sigismember(&tsk->pending.signal, SIGKILL) || + sigismember(&tsk->signal->shared_pending.signal, SIGKILL); } /* -- cgit v1.2.3 From 20ab7218d2507b2dbec9c053c2f1b96054e266b6 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 21 Aug 2018 22:00:54 -0700 Subject: signal: make get_signal() return bool make get_signal() already behaves like a boolean function. Let's actually declare it as such too. Link: http://lkml.kernel.org/r/20180602103653.18181-18-christian@brauner.io Signed-off-by: Christian Brauner Reviewed-by: Andrew Morton Cc: Al Viro Cc: Eric W. Biederman Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Morris Cc: Kees Cook Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/signal.h | 2 +- kernel/signal.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/include/linux/signal.h b/include/linux/signal.h index fa23cae66758..e3d9e0640e6e 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -265,7 +265,7 @@ extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; -extern int get_signal(struct ksignal *ksig); +extern bool get_signal(struct ksignal *ksig); extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping); extern void exit_signals(struct task_struct *tsk); extern void kernel_sigaction(int, __sighandler_t); diff --git a/kernel/signal.c b/kernel/signal.c index 9f9b4183178e..786bacc60649 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2287,7 +2287,7 @@ static int ptrace_signal(int signr, siginfo_t *info) return signr; } -int get_signal(struct ksignal *ksig) +bool get_signal(struct ksignal *ksig) { struct sighand_struct *sighand = current->sighand; struct signal_struct *signal = current->signal; @@ -2297,7 +2297,7 @@ int get_signal(struct ksignal *ksig) task_work_run(); if (unlikely(uprobe_deny_signal())) - return 0; + return false; /* * Do this once, we can't return to user-mode if freezing() == T. -- cgit v1.2.3 From 06e62a46bbba20aa5286102016a04214bb446141 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 21 Aug 2018 22:00:58 -0700 Subject: fork: don't copy inconsistent signal handler state to child Before this change, if a multithreaded process forks while one of its threads is changing a signal handler using sigaction(), the memcpy() in copy_sighand() can race with the struct assignment in do_sigaction(). It isn't clear whether this can cause corruption of the userspace signal handler pointer, but it definitely can cause inconsistency between different fields of struct sigaction. Take the appropriate spinlock to avoid this. I have tested that this patch prevents inconsistency between sa_sigaction and sa_flags, which is possible before this patch. Link: http://lkml.kernel.org/r/20180702145108.73189-1-jannh@google.com Signed-off-by: Jann Horn Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Rik van Riel Cc: "Peter Zijlstra (Intel)" Cc: Kees Cook Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 8bcef2413f1b..23eb960c701d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1427,7 +1427,9 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) return -ENOMEM; atomic_set(&sig->count, 1); + spin_lock_irq(¤t->sighand->siglock); memcpy(sig->action, current->sighand->action, sizeof(sig->action)); + spin_unlock_irq(¤t->sighand->siglock); return 0; } -- cgit v1.2.3 From 5f733e8a2dd130eaeed24cbaef49d349206cdb8b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 21 Aug 2018 22:01:06 -0700 Subject: kernel/sysctl.c: fix typos in comments Fix a few typos/spellos in kernel/sysctl.c. Link: http://lkml.kernel.org/r/bb09a8b9-f984-6dd4-b07b-3ecaf200862e@infradead.org Signed-off-by: Randy Dunlap Acked-by: Kees Cook Cc: "Luis R. Rodriguez" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index dbdcb6673b27..71ceb6c13c1a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -225,7 +225,7 @@ static int proc_dopipe_max_size(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #ifdef CONFIG_MAGIC_SYSRQ -/* Note: sysrq code uses it's own private copy */ +/* Note: sysrq code uses its own private copy */ static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE; static int sysrq_sysctl_handler(struct ctl_table *table, int write, @@ -1976,13 +1976,13 @@ static void warn_sysctl_write(struct ctl_table *table) } /** - * proc_first_pos_non_zero_ignore - check if firs position is allowed + * proc_first_pos_non_zero_ignore - check if first position is allowed * @ppos: file position * @table: the sysctl table * * Returns true if the first position is non-zero and the sysctl_writes_strict * mode indicates this is not allowed for numeric input types. String proc - * hadlers can ignore the return value. + * handlers can ignore the return value. */ static bool proc_first_pos_non_zero_ignore(loff_t *ppos, struct ctl_table *table) -- cgit v1.2.3