From 9f76d59173d9d146e96c66886b671c1915a5c5e5 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 5 Jan 2023 14:44:03 +0100 Subject: timers: Prevent union confusion from unexpected restart_syscall() The nanosleep syscalls use the restart_block mechanism, with a quirk: The `type` and `rmtp`/`compat_rmtp` fields are set up unconditionally on syscall entry, while the rest of the restart_block is only set up in the unlikely case that the syscall is actually interrupted by a signal (or pseudo-signal) that doesn't have a signal handler. If the restart_block was set up by a previous syscall (futex(..., FUTEX_WAIT, ...) or poll()) and hasn't been invalidated somehow since then, this will clobber some of the union fields used by futex_wait_restart() and do_restart_poll(). If userspace afterwards wrongly calls the restart_syscall syscall, futex_wait_restart()/do_restart_poll() will read struct fields that have been clobbered. This doesn't actually lead to anything particularly interesting because none of the union fields contain trusted kernel data, and futex(..., FUTEX_WAIT, ...) and poll() aren't syscalls where it makes much sense to apply seccomp filters to their arguments. So the current consequences are just of the "if userspace does bad stuff, it can damage itself, and that's not a problem" flavor. But still, it seems like a hazard for future developers, so invalidate the restart_block when partly setting it up in the nanosleep syscalls. Signed-off-by: Jann Horn Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230105134403.754986-1-jannh@google.com --- kernel/time/hrtimer.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/time/hrtimer.c') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 3ae661ab6260..e4f0e3b0c4f4 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2126,6 +2126,7 @@ SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp, if (!timespec64_valid(&tu)) return -EINVAL; + current->restart_block.fn = do_no_restart_syscall; current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; current->restart_block.nanosleep.rmtp = rmtp; return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, @@ -2147,6 +2148,7 @@ SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, if (!timespec64_valid(&tu)) return -EINVAL; + current->restart_block.fn = do_no_restart_syscall; current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; current->restart_block.nanosleep.compat_rmtp = rmtp; return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, -- cgit v1.2.3 From c14fd3dcacaa480394d3ac0b4a91a7d17a4b5516 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 23 Jan 2023 09:32:05 -0800 Subject: hrtimer: Rely on rt_task() for DL tasks too Checking dl_task() is redundant as rt_task() returns true for deadline tasks too. Signed-off-by: Davidlohr Bueso Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230123173206.6764-2-dave@stgolabs.net --- kernel/time/hrtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time/hrtimer.c') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index e4f0e3b0c4f4..667b713bab42 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2089,7 +2089,7 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, u64 slack; slack = current->timer_slack_ns; - if (dl_task(current) || rt_task(current)) + if (rt_task(current)) slack = 0; hrtimer_init_sleeper_on_stack(&t, clockid, mode); -- cgit v1.2.3 From 0c52310f260014d95c1310364379772cb74cf82d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 23 Jan 2023 09:32:06 -0800 Subject: hrtimer: Ignore slack time for RT tasks in schedule_hrtimeout_range() While in theory the timer can be triggered before expires + delta, for the cases of RT tasks they really have no business giving any lenience for extra slack time, so override any passed value by the user and always use zero for schedule_hrtimeout_range() calls. Furthermore, this is similar to what the nanosleep(2) family already does with current->timer_slack_ns. Signed-off-by: Davidlohr Bueso Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230123173206.6764-3-dave@stgolabs.net --- kernel/time/hrtimer.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'kernel/time/hrtimer.c') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 667b713bab42..e8c08292defc 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2272,7 +2272,7 @@ void __init hrtimers_init(void) /** * schedule_hrtimeout_range_clock - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) + * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks * @mode: timer mode * @clock_id: timer clock to be used */ @@ -2299,6 +2299,13 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, return -EINTR; } + /* + * Override any slack passed by the user if under + * rt contraints. + */ + if (rt_task(current)) + delta = 0; + hrtimer_init_sleeper_on_stack(&t, clock_id, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_sleeper_start_expires(&t, mode); @@ -2318,7 +2325,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); /** * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) + * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks * @mode: timer mode * * Make the current task sleep until the given expiry time has @@ -2326,7 +2333,8 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); * the current task state has been set (see set_current_state()). * * The @delta argument gives the kernel the freedom to schedule the - * actual wakeup to a time that is both power and performance friendly. + * actual wakeup to a time that is both power and performance friendly + * for regular (non RT/DL) tasks. * The kernel give the normal best effort behavior for "@expires+@delta", * but may decide to fire the timer earlier, but no earlier than @expires. * -- cgit v1.2.3