From a52f60fa2905b4abb26235d0a11cff13ced92709 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 20 Oct 2021 12:43:49 -0500
Subject: reboot: Remove the unreachable panic after do_exit in reboot(2)

Link: https://lkml.kernel.org/r/20211020174406.17889-3-ebiederm@xmission.com
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 kernel/reboot.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/reboot.c b/kernel/reboot.c
index f7440c0c7e43..d6e0f9fb7f04 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -359,7 +359,6 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 	case LINUX_REBOOT_CMD_HALT:
 		kernel_halt();
 		do_exit(0);
-		panic("cannot halt");
 
 	case LINUX_REBOOT_CMD_POWER_OFF:
 		kernel_power_off();
-- 
cgit v1.2.3


From 111e70490d2a673730b89c010b61cea2d982d121 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 20 Oct 2021 12:43:58 -0500
Subject: exit/kthread: Have kernel threads return instead of calling do_exit

In 2009 Oleg reworked[1] the kernel threads so that it is not
necessary to call do_exit if you are not using kthread_stop().  Remove
the explicit calls of do_exit and complete_and_exit (with a NULL
completion) that were previously necessary.

[1] 63706172f332 ("kthreads: rework kthread_stop()")
Link: https://lkml.kernel.org/r/20211020174406.17889-12-ebiederm@xmission.com
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 drivers/firmware/stratix10-svc.c | 4 ++--
 drivers/soc/ti/wkup_m3_ipc.c     | 2 +-
 fs/ocfs2/journal.c               | 5 +----
 kernel/kthread.c                 | 2 +-
 net/batman-adv/tp_meter.c        | 2 +-
 5 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c
index 2a7687911c09..29c0a616b317 100644
--- a/drivers/firmware/stratix10-svc.c
+++ b/drivers/firmware/stratix10-svc.c
@@ -520,7 +520,7 @@ static int svc_normal_to_secure_thread(void *data)
  * physical address of memory block reserved by secure monitor software at
  * secure world.
  *
- * svc_normal_to_secure_shm_thread() calls do_exit() directly since it is a
+ * svc_normal_to_secure_shm_thread() terminates directly since it is a
  * standlone thread for which no one will call kthread_stop() or return when
  * 'kthread_should_stop()' is true.
  */
@@ -544,7 +544,7 @@ static int svc_normal_to_secure_shm_thread(void *data)
 	}
 
 	complete(&sh_mem->sync_complete);
-	do_exit(0);
+	return 0;
 }
 
 /**
diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c
index 09abd17065ba..0733443a2631 100644
--- a/drivers/soc/ti/wkup_m3_ipc.c
+++ b/drivers/soc/ti/wkup_m3_ipc.c
@@ -426,7 +426,7 @@ static void wkup_m3_rproc_boot_thread(struct wkup_m3_ipc *m3_ipc)
 	else
 		m3_ipc_state = m3_ipc;
 
-	do_exit(0);
+	return 0;
 }
 
 static int wkup_m3_ipc_probe(struct platform_device *pdev)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4f15750aac5d..329986f12db3 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1497,10 +1497,7 @@ bail:
 	if (quota_enabled)
 		kfree(rm_quota);
 
-	/* no one is callint kthread_stop() for us so the kthread() api
-	 * requires that we call do_exit().  And it isn't exported, but
-	 * complete_and_exit() seems to be a minimal wrapper around it. */
-	complete_and_exit(NULL, status);
+	return status;
 }
 
 void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 5b37a8567168..33e17beaa682 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -433,7 +433,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
  * If thread is going to be bound on a particular cpu, give its node
  * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE.
  * When woken, the thread will run @threadfn() with @data as its
- * argument. @threadfn() can either call do_exit() directly if it is a
+ * argument. @threadfn() can either return directly if it is a
  * standalone thread for which no one will call kthread_stop(), or
  * return when 'kthread_should_stop()' is true (which means
  * kthread_stop() has been called).  The return value should be zero
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 56b9fe97b3b4..1252540cde17 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -890,7 +890,7 @@ out:
 
 	batadv_tp_vars_put(tp_vars);
 
-	do_exit(0);
+	return 0;
 }
 
 /**
-- 
cgit v1.2.3


From 26d5badbccddcc063dc5174a2baffd13a23322aa Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 20 Oct 2021 12:43:59 -0500
Subject: signal: Implement force_fatal_sig

Add a simple helper force_fatal_sig that causes a signal to be
delivered to a process as if the signal handler was set to SIG_DFL.

Reimplement force_sigsegv based upon this new helper.  This fixes
force_sigsegv so that when it forces the default signal handler
to be used the code now forces the signal to be unblocked as well.

Reusing the tested logic in force_sig_info_to_task that was built for
force_sig_seccomp this makes the implementation trivial.

This is interesting both because it makes force_sigsegv simpler and
because there are a couple of buggy places in the kernel that call
do_exit(SIGILL) or do_exit(SIGSYS) because there is no straight
forward way today for those places to simply force the exit of a
process with the chosen signal.  Creating force_fatal_sig allows
those places to be implemented with normal signal exits.

Link: https://lkml.kernel.org/r/20211020174406.17889-13-ebiederm@xmission.com
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/sched/signal.h |  1 +
 kernel/signal.c              | 26 +++++++++++++++++---------
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index e5f4ce622ee6..e2dc9f119ada 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -338,6 +338,7 @@ extern int kill_pid(struct pid *pid, int sig, int priv);
 extern __must_check bool do_notify_parent(struct task_struct *, int);
 extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
 extern void force_sig(int);
+extern void force_fatal_sig(int);
 extern int send_sig(int, struct task_struct *, int);
 extern int zap_other_threads(struct task_struct *p);
 extern struct sigqueue *sigqueue_alloc(void);
diff --git a/kernel/signal.c b/kernel/signal.c
index 952741f6d0f9..6a5e1802b9a2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1662,6 +1662,19 @@ void force_sig(int sig)
 }
 EXPORT_SYMBOL(force_sig);
 
+void force_fatal_sig(int sig)
+{
+	struct kernel_siginfo info;
+
+	clear_siginfo(&info);
+	info.si_signo = sig;
+	info.si_errno = 0;
+	info.si_code = SI_KERNEL;
+	info.si_pid = 0;
+	info.si_uid = 0;
+	force_sig_info_to_task(&info, current, true);
+}
+
 /*
  * When things go south during signal handling, we
  * will force a SIGSEGV. And if the signal that caused
@@ -1670,15 +1683,10 @@ EXPORT_SYMBOL(force_sig);
  */
 void force_sigsegv(int sig)
 {
-	struct task_struct *p = current;
-
-	if (sig == SIGSEGV) {
-		unsigned long flags;
-		spin_lock_irqsave(&p->sighand->siglock, flags);
-		p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
-		spin_unlock_irqrestore(&p->sighand->siglock, flags);
-	}
-	force_sig(SIGSEGV);
+	if (sig == SIGSEGV)
+		force_fatal_sig(SIGSEGV);
+	else
+		force_sig(SIGSEGV);
 }
 
 int force_sig_fault_to_task(int sig, int code, void __user *addr
-- 
cgit v1.2.3


From 941edc5bf174b67f94db19817cbeab0a93e0c32a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 20 Oct 2021 12:44:00 -0500
Subject: exit/syscall_user_dispatch: Send ordinary signals on failure

Use force_fatal_sig instead of calling do_exit directly.  This ensures
the ordinary signal handling path gets invoked, core dumps as
appropriate get created, and for multi-threaded processes all of the
threads are terminated not just a single thread.

When asked Gabriel Krisman Bertazi <krisman@collabora.com> said [1]:
> ebiederm@xmission.com (Eric W. Biederman) asked:
>
> > Why does do_syscal_user_dispatch call do_exit(SIGSEGV) and
> > do_exit(SIGSYS) instead of force_sig(SIGSEGV) and force_sig(SIGSYS)?
> >
> > Looking at the code these cases are not expected to happen, so I would
> > be surprised if userspace depends on any particular behaviour on the
> > failure path so I think we can change this.
>
> Hi Eric,
>
> There is not really a good reason, and the use case that originated the
> feature doesn't rely on it.
>
> Unless I'm missing yet another problem and others correct me, I think
> it makes sense to change it as you described.
>
> > Is using do_exit in this way something you copied from seccomp?
>
> I'm not sure, its been a while, but I think it might be just that.  The
> first prototype of SUD was implemented as a seccomp mode.

If at some point it becomes interesting we could relax
"force_fatal_sig(SIGSEGV)" to instead say
"force_sig_fault(SIGSEGV, SEGV_MAPERR, sd->selector)".

I avoid doing that in this patch to avoid making it possible
to catch currently uncatchable signals.

Cc: Gabriel Krisman Bertazi <krisman@collabora.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
[1] https://lkml.kernel.org/r/87mtr6gdvi.fsf@collabora.com
Link: https://lkml.kernel.org/r/20211020174406.17889-14-ebiederm@xmission.com
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 kernel/entry/syscall_user_dispatch.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
index c240302f56e2..4508201847d2 100644
--- a/kernel/entry/syscall_user_dispatch.c
+++ b/kernel/entry/syscall_user_dispatch.c
@@ -47,14 +47,18 @@ bool syscall_user_dispatch(struct pt_regs *regs)
 		 * access_ok() is performed once, at prctl time, when
 		 * the selector is loaded by userspace.
 		 */
-		if (unlikely(__get_user(state, sd->selector)))
-			do_exit(SIGSEGV);
+		if (unlikely(__get_user(state, sd->selector))) {
+			force_fatal_sig(SIGSEGV);
+			return true;
+		}
 
 		if (likely(state == SYSCALL_DISPATCH_FILTER_ALLOW))
 			return false;
 
-		if (state != SYSCALL_DISPATCH_FILTER_BLOCK)
-			do_exit(SIGSYS);
+		if (state != SYSCALL_DISPATCH_FILTER_BLOCK) {
+			force_fatal_sig(SIGSYS);
+			return true;
+		}
 	}
 
 	sd->on_dispatch = true;
-- 
cgit v1.2.3


From 00b06da29cf9dc633cdba87acd3f57f4df3fd5c7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 29 Oct 2021 09:14:19 -0500
Subject: signal: Add SA_IMMUTABLE to ensure forced siganls do not get changed

As Andy pointed out that there are races between
force_sig_info_to_task and sigaction[1] when force_sig_info_task.  As
Kees discovered[2] ptrace is also able to change these signals.

In the case of seeccomp killing a process with a signal it is a
security violation to allow the signal to be caught or manipulated.

Solve this problem by introducing a new flag SA_IMMUTABLE that
prevents sigaction and ptrace from modifying these forced signals.
This flag is carefully made kernel internal so that no new ABI is
introduced.

Longer term I think this can be solved by guaranteeing short circuit
delivery of signals in this case.  Unfortunately reliable and
guaranteed short circuit delivery of these signals is still a ways off
from being implemented, tested, and merged.  So I have implemented a much
simpler alternative for now.

[1] https://lkml.kernel.org/r/b5d52d25-7bde-4030-a7b1-7c6f8ab90660@www.fastmail.com
[2] https://lkml.kernel.org/r/202110281136.5CE65399A7@keescook
Cc: stable@vger.kernel.org
Fixes: 307d522f5eb8 ("signal/seccomp: Refactor seccomp signal and coredump generation")
Tested-by: Andrea Righi <andrea.righi@canonical.com>
Tested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/signal_types.h           | 3 +++
 include/uapi/asm-generic/signal-defs.h | 1 +
 kernel/signal.c                        | 8 +++++++-
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
index 34cb28b8f16c..a70b2bdbf4d9 100644
--- a/include/linux/signal_types.h
+++ b/include/linux/signal_types.h
@@ -70,6 +70,9 @@ struct ksignal {
 	int sig;
 };
 
+/* Used to kill the race between sigaction and forced signals */
+#define SA_IMMUTABLE		0x00800000
+
 #ifndef __ARCH_UAPI_SA_FLAGS
 #ifdef SA_RESTORER
 #define __ARCH_UAPI_SA_FLAGS	SA_RESTORER
diff --git a/include/uapi/asm-generic/signal-defs.h b/include/uapi/asm-generic/signal-defs.h
index fe929e7b77ca..7572f2f46ee8 100644
--- a/include/uapi/asm-generic/signal-defs.h
+++ b/include/uapi/asm-generic/signal-defs.h
@@ -45,6 +45,7 @@
 #define SA_UNSUPPORTED	0x00000400
 #define SA_EXPOSE_TAGBITS	0x00000800
 /* 0x00010000 used on mips */
+/* 0x00800000 used for internal SA_IMMUTABLE */
 /* 0x01000000 used on x86 */
 /* 0x02000000 used on x86 */
 /*
diff --git a/kernel/signal.c b/kernel/signal.c
index 6a5e1802b9a2..056a107e3cbc 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1336,6 +1336,7 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, bool
 	blocked = sigismember(&t->blocked, sig);
 	if (blocked || ignored || sigdfl) {
 		action->sa.sa_handler = SIG_DFL;
+		action->sa.sa_flags |= SA_IMMUTABLE;
 		if (blocked) {
 			sigdelset(&t->blocked, sig);
 			recalc_sigpending_and_wake(t);
@@ -2760,7 +2761,8 @@ relock:
 		if (!signr)
 			break; /* will return 0 */
 
-		if (unlikely(current->ptrace) && signr != SIGKILL) {
+		if (unlikely(current->ptrace) && (signr != SIGKILL) &&
+		    !(sighand->action[signr -1].sa.sa_flags & SA_IMMUTABLE)) {
 			signr = ptrace_signal(signr, &ksig->info);
 			if (!signr)
 				continue;
@@ -4110,6 +4112,10 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 	k = &p->sighand->action[sig-1];
 
 	spin_lock_irq(&p->sighand->siglock);
+	if (k->sa.sa_flags & SA_IMMUTABLE) {
+		spin_unlock_irq(&p->sighand->siglock);
+		return -EINVAL;
+	}
 	if (oact)
 		*oact = *k;
 
-- 
cgit v1.2.3