From a52f60fa2905b4abb26235d0a11cff13ced92709 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:49 -0500 Subject: reboot: Remove the unreachable panic after do_exit in reboot(2) Link: https://lkml.kernel.org/r/20211020174406.17889-3-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- kernel/reboot.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/reboot.c b/kernel/reboot.c index f7440c0c7e43..d6e0f9fb7f04 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -359,7 +359,6 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, case LINUX_REBOOT_CMD_HALT: kernel_halt(); do_exit(0); - panic("cannot halt"); case LINUX_REBOOT_CMD_POWER_OFF: kernel_power_off(); -- cgit v1.2.3 From 111e70490d2a673730b89c010b61cea2d982d121 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:58 -0500 Subject: exit/kthread: Have kernel threads return instead of calling do_exit In 2009 Oleg reworked[1] the kernel threads so that it is not necessary to call do_exit if you are not using kthread_stop(). Remove the explicit calls of do_exit and complete_and_exit (with a NULL completion) that were previously necessary. [1] 63706172f332 ("kthreads: rework kthread_stop()") Link: https://lkml.kernel.org/r/20211020174406.17889-12-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- drivers/firmware/stratix10-svc.c | 4 ++-- drivers/soc/ti/wkup_m3_ipc.c | 2 +- fs/ocfs2/journal.c | 5 +---- kernel/kthread.c | 2 +- net/batman-adv/tp_meter.c | 2 +- 5 files changed, 6 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 2a7687911c09..29c0a616b317 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -520,7 +520,7 @@ static int svc_normal_to_secure_thread(void *data) * physical address of memory block reserved by secure monitor software at * secure world. * - * svc_normal_to_secure_shm_thread() calls do_exit() directly since it is a + * svc_normal_to_secure_shm_thread() terminates directly since it is a * standlone thread for which no one will call kthread_stop() or return when * 'kthread_should_stop()' is true. */ @@ -544,7 +544,7 @@ static int svc_normal_to_secure_shm_thread(void *data) } complete(&sh_mem->sync_complete); - do_exit(0); + return 0; } /** diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c index 09abd17065ba..0733443a2631 100644 --- a/drivers/soc/ti/wkup_m3_ipc.c +++ b/drivers/soc/ti/wkup_m3_ipc.c @@ -426,7 +426,7 @@ static void wkup_m3_rproc_boot_thread(struct wkup_m3_ipc *m3_ipc) else m3_ipc_state = m3_ipc; - do_exit(0); + return 0; } static int wkup_m3_ipc_probe(struct platform_device *pdev) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 4f15750aac5d..329986f12db3 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1497,10 +1497,7 @@ bail: if (quota_enabled) kfree(rm_quota); - /* no one is callint kthread_stop() for us so the kthread() api - * requires that we call do_exit(). And it isn't exported, but - * complete_and_exit() seems to be a minimal wrapper around it. */ - complete_and_exit(NULL, status); + return status; } void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) diff --git a/kernel/kthread.c b/kernel/kthread.c index 5b37a8567168..33e17beaa682 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -433,7 +433,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), * If thread is going to be bound on a particular cpu, give its node * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE. * When woken, the thread will run @threadfn() with @data as its - * argument. @threadfn() can either call do_exit() directly if it is a + * argument. @threadfn() can either return directly if it is a * standalone thread for which no one will call kthread_stop(), or * return when 'kthread_should_stop()' is true (which means * kthread_stop() has been called). The return value should be zero diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 56b9fe97b3b4..1252540cde17 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -890,7 +890,7 @@ out: batadv_tp_vars_put(tp_vars); - do_exit(0); + return 0; } /** -- cgit v1.2.3 From 26d5badbccddcc063dc5174a2baffd13a23322aa Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:43:59 -0500 Subject: signal: Implement force_fatal_sig Add a simple helper force_fatal_sig that causes a signal to be delivered to a process as if the signal handler was set to SIG_DFL. Reimplement force_sigsegv based upon this new helper. This fixes force_sigsegv so that when it forces the default signal handler to be used the code now forces the signal to be unblocked as well. Reusing the tested logic in force_sig_info_to_task that was built for force_sig_seccomp this makes the implementation trivial. This is interesting both because it makes force_sigsegv simpler and because there are a couple of buggy places in the kernel that call do_exit(SIGILL) or do_exit(SIGSYS) because there is no straight forward way today for those places to simply force the exit of a process with the chosen signal. Creating force_fatal_sig allows those places to be implemented with normal signal exits. Link: https://lkml.kernel.org/r/20211020174406.17889-13-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- include/linux/sched/signal.h | 1 + kernel/signal.c | 26 +++++++++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index e5f4ce622ee6..e2dc9f119ada 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -338,6 +338,7 @@ extern int kill_pid(struct pid *pid, int sig, int priv); extern __must_check bool do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int); +extern void force_fatal_sig(int); extern int send_sig(int, struct task_struct *, int); extern int zap_other_threads(struct task_struct *p); extern struct sigqueue *sigqueue_alloc(void); diff --git a/kernel/signal.c b/kernel/signal.c index 952741f6d0f9..6a5e1802b9a2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1662,6 +1662,19 @@ void force_sig(int sig) } EXPORT_SYMBOL(force_sig); +void force_fatal_sig(int sig) +{ + struct kernel_siginfo info; + + clear_siginfo(&info); + info.si_signo = sig; + info.si_errno = 0; + info.si_code = SI_KERNEL; + info.si_pid = 0; + info.si_uid = 0; + force_sig_info_to_task(&info, current, true); +} + /* * When things go south during signal handling, we * will force a SIGSEGV. And if the signal that caused @@ -1670,15 +1683,10 @@ EXPORT_SYMBOL(force_sig); */ void force_sigsegv(int sig) { - struct task_struct *p = current; - - if (sig == SIGSEGV) { - unsigned long flags; - spin_lock_irqsave(&p->sighand->siglock, flags); - p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL; - spin_unlock_irqrestore(&p->sighand->siglock, flags); - } - force_sig(SIGSEGV); + if (sig == SIGSEGV) + force_fatal_sig(SIGSEGV); + else + force_sig(SIGSEGV); } int force_sig_fault_to_task(int sig, int code, void __user *addr -- cgit v1.2.3 From 941edc5bf174b67f94db19817cbeab0a93e0c32a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 20 Oct 2021 12:44:00 -0500 Subject: exit/syscall_user_dispatch: Send ordinary signals on failure Use force_fatal_sig instead of calling do_exit directly. This ensures the ordinary signal handling path gets invoked, core dumps as appropriate get created, and for multi-threaded processes all of the threads are terminated not just a single thread. When asked Gabriel Krisman Bertazi said [1]: > ebiederm@xmission.com (Eric W. Biederman) asked: > > > Why does do_syscal_user_dispatch call do_exit(SIGSEGV) and > > do_exit(SIGSYS) instead of force_sig(SIGSEGV) and force_sig(SIGSYS)? > > > > Looking at the code these cases are not expected to happen, so I would > > be surprised if userspace depends on any particular behaviour on the > > failure path so I think we can change this. > > Hi Eric, > > There is not really a good reason, and the use case that originated the > feature doesn't rely on it. > > Unless I'm missing yet another problem and others correct me, I think > it makes sense to change it as you described. > > > Is using do_exit in this way something you copied from seccomp? > > I'm not sure, its been a while, but I think it might be just that. The > first prototype of SUD was implemented as a seccomp mode. If at some point it becomes interesting we could relax "force_fatal_sig(SIGSEGV)" to instead say "force_sig_fault(SIGSEGV, SEGV_MAPERR, sd->selector)". I avoid doing that in this patch to avoid making it possible to catch currently uncatchable signals. Cc: Gabriel Krisman Bertazi Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski [1] https://lkml.kernel.org/r/87mtr6gdvi.fsf@collabora.com Link: https://lkml.kernel.org/r/20211020174406.17889-14-ebiederm@xmission.com Signed-off-by: Eric W. Biederman --- kernel/entry/syscall_user_dispatch.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c index c240302f56e2..4508201847d2 100644 --- a/kernel/entry/syscall_user_dispatch.c +++ b/kernel/entry/syscall_user_dispatch.c @@ -47,14 +47,18 @@ bool syscall_user_dispatch(struct pt_regs *regs) * access_ok() is performed once, at prctl time, when * the selector is loaded by userspace. */ - if (unlikely(__get_user(state, sd->selector))) - do_exit(SIGSEGV); + if (unlikely(__get_user(state, sd->selector))) { + force_fatal_sig(SIGSEGV); + return true; + } if (likely(state == SYSCALL_DISPATCH_FILTER_ALLOW)) return false; - if (state != SYSCALL_DISPATCH_FILTER_BLOCK) - do_exit(SIGSYS); + if (state != SYSCALL_DISPATCH_FILTER_BLOCK) { + force_fatal_sig(SIGSYS); + return true; + } } sd->on_dispatch = true; -- cgit v1.2.3 From 00b06da29cf9dc633cdba87acd3f57f4df3fd5c7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 29 Oct 2021 09:14:19 -0500 Subject: signal: Add SA_IMMUTABLE to ensure forced siganls do not get changed As Andy pointed out that there are races between force_sig_info_to_task and sigaction[1] when force_sig_info_task. As Kees discovered[2] ptrace is also able to change these signals. In the case of seeccomp killing a process with a signal it is a security violation to allow the signal to be caught or manipulated. Solve this problem by introducing a new flag SA_IMMUTABLE that prevents sigaction and ptrace from modifying these forced signals. This flag is carefully made kernel internal so that no new ABI is introduced. Longer term I think this can be solved by guaranteeing short circuit delivery of signals in this case. Unfortunately reliable and guaranteed short circuit delivery of these signals is still a ways off from being implemented, tested, and merged. So I have implemented a much simpler alternative for now. [1] https://lkml.kernel.org/r/b5d52d25-7bde-4030-a7b1-7c6f8ab90660@www.fastmail.com [2] https://lkml.kernel.org/r/202110281136.5CE65399A7@keescook Cc: stable@vger.kernel.org Fixes: 307d522f5eb8 ("signal/seccomp: Refactor seccomp signal and coredump generation") Tested-by: Andrea Righi Tested-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/linux/signal_types.h | 3 +++ include/uapi/asm-generic/signal-defs.h | 1 + kernel/signal.c | 8 +++++++- 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h index 34cb28b8f16c..a70b2bdbf4d9 100644 --- a/include/linux/signal_types.h +++ b/include/linux/signal_types.h @@ -70,6 +70,9 @@ struct ksignal { int sig; }; +/* Used to kill the race between sigaction and forced signals */ +#define SA_IMMUTABLE 0x00800000 + #ifndef __ARCH_UAPI_SA_FLAGS #ifdef SA_RESTORER #define __ARCH_UAPI_SA_FLAGS SA_RESTORER diff --git a/include/uapi/asm-generic/signal-defs.h b/include/uapi/asm-generic/signal-defs.h index fe929e7b77ca..7572f2f46ee8 100644 --- a/include/uapi/asm-generic/signal-defs.h +++ b/include/uapi/asm-generic/signal-defs.h @@ -45,6 +45,7 @@ #define SA_UNSUPPORTED 0x00000400 #define SA_EXPOSE_TAGBITS 0x00000800 /* 0x00010000 used on mips */ +/* 0x00800000 used for internal SA_IMMUTABLE */ /* 0x01000000 used on x86 */ /* 0x02000000 used on x86 */ /* diff --git a/kernel/signal.c b/kernel/signal.c index 6a5e1802b9a2..056a107e3cbc 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1336,6 +1336,7 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, bool blocked = sigismember(&t->blocked, sig); if (blocked || ignored || sigdfl) { action->sa.sa_handler = SIG_DFL; + action->sa.sa_flags |= SA_IMMUTABLE; if (blocked) { sigdelset(&t->blocked, sig); recalc_sigpending_and_wake(t); @@ -2760,7 +2761,8 @@ relock: if (!signr) break; /* will return 0 */ - if (unlikely(current->ptrace) && signr != SIGKILL) { + if (unlikely(current->ptrace) && (signr != SIGKILL) && + !(sighand->action[signr -1].sa.sa_flags & SA_IMMUTABLE)) { signr = ptrace_signal(signr, &ksig->info); if (!signr) continue; @@ -4110,6 +4112,10 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) k = &p->sighand->action[sig-1]; spin_lock_irq(&p->sighand->siglock); + if (k->sa.sa_flags & SA_IMMUTABLE) { + spin_unlock_irq(&p->sighand->siglock); + return -EINVAL; + } if (oact) *oact = *k; -- cgit v1.2.3