diff options
-rw-r--r-- | include/linux/cgroup-defs.h | 28 | ||||
-rw-r--r-- | include/linux/cgroup.h | 43 | ||||
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | include/linux/sched/jobctl.h | 2 | ||||
-rw-r--r-- | kernel/cgroup/Makefile | 2 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 110 | ||||
-rw-r--r-- | kernel/cgroup/freezer.c | 317 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/signal.c | 70 |
9 files changed, 566 insertions, 10 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 7d57890cec67..77258d276f93 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -65,6 +65,12 @@ enum { * specified at mount time and thus is implemented here. */ CGRP_CPUSET_CLONE_CHILDREN, + + /* Control group has to be frozen. */ + CGRP_FREEZE, + + /* Cgroup is frozen. */ + CGRP_FROZEN, }; /* cgroup_root->flags */ @@ -317,6 +323,25 @@ struct cgroup_rstat_cpu { struct cgroup *updated_next; /* NULL iff not on the list */ }; +struct cgroup_freezer_state { + /* Should the cgroup and its descendants be frozen. */ + bool freeze; + + /* Should the cgroup actually be frozen? */ + int e_freeze; + + /* Fields below are protected by css_set_lock */ + + /* Number of frozen descendant cgroups */ + int nr_frozen_descendants; + + /* + * Number of tasks, which are counted as frozen: + * frozen, SIGSTOPped, and PTRACEd. + */ + int nr_frozen_tasks; +}; + struct cgroup { /* self css with NULL ->ss, points back to this cgroup */ struct cgroup_subsys_state self; @@ -453,6 +478,9 @@ struct cgroup { /* If there is block congestion on this cgroup. */ atomic_t congestion_count; + /* Used to store internal freezer state */ + struct cgroup_freezer_state freezer; + /* ids of the ancestors at each level including self */ int ancestor_ids[]; }; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 81f58b4a5418..3e2efd412dfa 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -881,4 +881,47 @@ static inline void put_cgroup_ns(struct cgroup_namespace *ns) free_cgroup_ns(ns); } +#ifdef CONFIG_CGROUPS + +void cgroup_enter_frozen(void); +void cgroup_leave_frozen(bool always_leave); +void cgroup_update_frozen(struct cgroup *cgrp); +void cgroup_freeze(struct cgroup *cgrp, bool freeze); +void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src, + struct cgroup *dst); +void cgroup_freezer_frozen_exit(struct task_struct *task); +static inline bool cgroup_task_freeze(struct task_struct *task) +{ + bool ret; + + if (task->flags & PF_KTHREAD) + return false; + + rcu_read_lock(); + ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags); + rcu_read_unlock(); + + return ret; +} + +static inline bool cgroup_task_frozen(struct task_struct *task) +{ + return task->frozen; +} + +#else /* !CONFIG_CGROUPS */ + +static inline void cgroup_enter_frozen(void) { } +static inline void cgroup_leave_frozen(bool always_leave) { } +static inline bool cgroup_task_freeze(struct task_struct *task) +{ + return false; +} +static inline bool cgroup_task_frozen(struct task_struct *task) +{ + return false; +} + +#endif /* !CONFIG_CGROUPS */ + #endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 1549584a1538..45b2199bd683 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -726,6 +726,8 @@ struct task_struct { #ifdef CONFIG_CGROUPS /* disallow userland-initiated cgroup migration */ unsigned no_cgroup_migration:1; + /* task is frozen/stopped (used by the cgroup freezer) */ + unsigned frozen:1; #endif #ifdef CONFIG_BLK_CGROUP /* to be used once the psi infrastructure lands upstream. */ diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h index 98228bd48aee..fa067de9f1a9 100644 --- a/include/linux/sched/jobctl.h +++ b/include/linux/sched/jobctl.h @@ -18,6 +18,7 @@ struct task_struct; #define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ +#define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) @@ -26,6 +27,7 @@ struct task_struct; #define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) +#define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile index 8d5689ca94b9..5d7a76bfbbb7 100644 --- a/kernel/cgroup/Makefile +++ b/kernel/cgroup/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o +obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o freezer.o obj-$(CONFIG_CGROUP_FREEZER) += legacy_freezer.o obj-$(CONFIG_CGROUP_PIDS) += pids.o diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 786ceef2f222..6895464b54c6 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2435,8 +2435,15 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx) get_css_set(to_cset); to_cset->nr_tasks++; css_set_move_task(task, from_cset, to_cset, true); - put_css_set_locked(from_cset); from_cset->nr_tasks--; + /* + * If the source or destination cgroup is frozen, + * the task might require to change its state. + */ + cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp, + to_cset->dfl_cgrp); + put_css_set_locked(from_cset); + } } spin_unlock_irq(&css_set_lock); @@ -3477,8 +3484,11 @@ static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of, static int cgroup_events_show(struct seq_file *seq, void *v) { - seq_printf(seq, "populated %d\n", - cgroup_is_populated(seq_css(seq)->cgroup)); + struct cgroup *cgrp = seq_css(seq)->cgroup; + + seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp)); + seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags)); + return 0; } @@ -3540,6 +3550,40 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) } #endif +static int cgroup_freeze_show(struct seq_file *seq, void *v) +{ + struct cgroup *cgrp = seq_css(seq)->cgroup; + + seq_printf(seq, "%d\n", cgrp->freezer.freeze); + + return 0; +} + +static ssize_t cgroup_freeze_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct cgroup *cgrp; + ssize_t ret; + int freeze; + + ret = kstrtoint(strstrip(buf), 0, &freeze); + if (ret) + return ret; + + if (freeze < 0 || freeze > 1) + return -ERANGE; + + cgrp = cgroup_kn_lock_live(of->kn, false); + if (!cgrp) + return -ENOENT; + + cgroup_freeze(cgrp, freeze); + + cgroup_kn_unlock(of->kn); + + return nbytes; +} + static int cgroup_file_open(struct kernfs_open_file *of) { struct cftype *cft = of->kn->priv; @@ -4684,6 +4728,12 @@ static struct cftype cgroup_base_files[] = { .seq_show = cgroup_stat_show, }, { + .name = "cgroup.freeze", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = cgroup_freeze_show, + .write = cgroup_freeze_write, + }, + { .name = "cpu.stat", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = cpu_stat_show, @@ -5033,12 +5083,29 @@ static struct cgroup *cgroup_create(struct cgroup *parent) if (ret) goto out_psi_free; + /* + * New cgroup inherits effective freeze counter, and + * if the parent has to be frozen, the child has too. + */ + cgrp->freezer.e_freeze = parent->freezer.e_freeze; + if (cgrp->freezer.e_freeze) + set_bit(CGRP_FROZEN, &cgrp->flags); + spin_lock_irq(&css_set_lock); for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { cgrp->ancestor_ids[tcgrp->level] = tcgrp->id; - if (tcgrp != cgrp) + if (tcgrp != cgrp) { tcgrp->nr_descendants++; + + /* + * If the new cgroup is frozen, all ancestor cgroups + * get a new frozen descendant, but their state can't + * change because of this. + */ + if (cgrp->freezer.e_freeze) + tcgrp->freezer.nr_frozen_descendants++; + } } spin_unlock_irq(&css_set_lock); @@ -5329,6 +5396,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) { tcgrp->nr_descendants--; tcgrp->nr_dying_descendants++; + /* + * If the dying cgroup is frozen, decrease frozen descendants + * counters of ancestor cgroups. + */ + if (test_bit(CGRP_FROZEN, &cgrp->flags)) + tcgrp->freezer.nr_frozen_descendants--; } spin_unlock_irq(&css_set_lock); @@ -5782,6 +5855,29 @@ void cgroup_post_fork(struct task_struct *child) cset->nr_tasks++; css_set_move_task(child, NULL, cset, false); } + + /* + * If the cgroup has to be frozen, the new task has too. + * Let's set the JOBCTL_TRAP_FREEZE jobctl bit to get + * the task into the frozen state. + */ + if (unlikely(cgroup_task_freeze(child))) { + struct cgroup *cgrp; + + spin_lock(&child->sighand->siglock); + WARN_ON_ONCE(child->frozen); + cgrp = cset->dfl_cgrp; + child->jobctl |= JOBCTL_TRAP_FREEZE; + spin_unlock(&child->sighand->siglock); + + /* + * Calling cgroup_update_frozen() isn't required here, + * because it will be called anyway a bit later + * from do_freezer_trap(). So we avoid cgroup's + * transient switch from the frozen state and back. + */ + } + spin_unlock_irq(&css_set_lock); } @@ -5830,6 +5926,12 @@ void cgroup_exit(struct task_struct *tsk) spin_lock_irq(&css_set_lock); css_set_move_task(tsk, cset, NULL, false); cset->nr_tasks--; + + if (unlikely(cgroup_task_frozen(tsk))) + cgroup_freezer_frozen_exit(tsk); + else if (unlikely(cgroup_task_freeze(tsk))) + cgroup_update_frozen(task_dfl_cgroup(tsk)); + spin_unlock_irq(&css_set_lock); } else { get_css_set(cset); diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c new file mode 100644 index 000000000000..9d8cda478fc9 --- /dev/null +++ b/kernel/cgroup/freezer.c @@ -0,0 +1,317 @@ +//SPDX-License-Identifier: GPL-2.0 +#include <linux/cgroup.h> +#include <linux/sched.h> +#include <linux/sched/task.h> +#include <linux/sched/signal.h> + +#include "cgroup-internal.h" + +/* + * Propagate the cgroup frozen state upwards by the cgroup tree. + */ +static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) +{ + int desc = 1; + + /* + * If the new state is frozen, some freezing ancestor cgroups may change + * their state too, depending on if all their descendants are frozen. + * + * Otherwise, all ancestor cgroups are forced into the non-frozen state. + */ + while ((cgrp = cgroup_parent(cgrp))) { + if (frozen) { + cgrp->freezer.nr_frozen_descendants += desc; + if (!test_bit(CGRP_FROZEN, &cgrp->flags) && + test_bit(CGRP_FREEZE, &cgrp->flags) && + cgrp->freezer.nr_frozen_descendants == + cgrp->nr_descendants) { + set_bit(CGRP_FROZEN, &cgrp->flags); + cgroup_file_notify(&cgrp->events_file); + desc++; + } + } else { + cgrp->freezer.nr_frozen_descendants -= desc; + if (test_bit(CGRP_FROZEN, &cgrp->flags)) { + clear_bit(CGRP_FROZEN, &cgrp->flags); + cgroup_file_notify(&cgrp->events_file); + desc++; + } + } + } +} + +/* + * Revisit the cgroup frozen state. + * Checks if the cgroup is really frozen and perform all state transitions. + */ +void cgroup_update_frozen(struct cgroup *cgrp) +{ + bool frozen; + + lockdep_assert_held(&css_set_lock); + + /* + * If the cgroup has to be frozen (CGRP_FREEZE bit set), + * and all tasks are frozen and/or stopped, let's consider + * the cgroup frozen. Otherwise it's not frozen. + */ + frozen = test_bit(CGRP_FREEZE, &cgrp->flags) && + cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp); + + if (frozen) { + /* Already there? */ + if (test_bit(CGRP_FROZEN, &cgrp->flags)) + return; + + set_bit(CGRP_FROZEN, &cgrp->flags); + } else { + /* Already there? */ + if (!test_bit(CGRP_FROZEN, &cgrp->flags)) + return; + + clear_bit(CGRP_FROZEN, &cgrp->flags); + } + cgroup_file_notify(&cgrp->events_file); + + /* Update the state of ancestor cgroups. */ + cgroup_propagate_frozen(cgrp, frozen); +} + +/* + * Increment cgroup's nr_frozen_tasks. + */ +static void cgroup_inc_frozen_cnt(struct cgroup *cgrp) +{ + cgrp->freezer.nr_frozen_tasks++; +} + +/* + * Decrement cgroup's nr_frozen_tasks. + */ +static void cgroup_dec_frozen_cnt(struct cgroup *cgrp) +{ + cgrp->freezer.nr_frozen_tasks--; + WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0); +} + +/* + * Enter frozen/stopped state, if not yet there. Update cgroup's counters, + * and revisit the state of the cgroup, if necessary. + */ +void cgroup_enter_frozen(void) +{ + struct cgroup *cgrp; + + if (current->frozen) + return; + + spin_lock_irq(&css_set_lock); + current->frozen = true; + cgrp = task_dfl_cgroup(current); + cgroup_inc_frozen_cnt(cgrp); + cgroup_update_frozen(cgrp); + spin_unlock_irq(&css_set_lock); +} + +/* + * Conditionally leave frozen/stopped state. Update cgroup's counters, + * and revisit the state of the cgroup, if necessary. + * + * If always_leave is not set, and the cgroup is freezing, + * we're racing with the cgroup freezing. In this case, we don't + * drop the frozen counter to avoid a transient switch to + * the unfrozen state. + */ +void cgroup_leave_frozen(bool always_leave) +{ + struct cgroup *cgrp; + + spin_lock_irq(&css_set_lock); + cgrp = task_dfl_cgroup(current); + if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) { + cgroup_dec_frozen_cnt(cgrp); + cgroup_update_frozen(cgrp); + WARN_ON_ONCE(!current->frozen); + current->frozen = false; + } + spin_unlock_irq(&css_set_lock); + + if (unlikely(current->frozen)) { + /* + * If the task remained in the frozen state, + * make sure it won't reach userspace without + * entering the signal handling loop. + */ + spin_lock_irq(¤t->sighand->siglock); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + } +} + +/* + * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE + * jobctl bit. + */ +static void cgroup_freeze_task(struct task_struct *task, bool freeze) +{ + unsigned long flags; + + /* If the task is about to die, don't bother with freezing it. */ + if (!lock_task_sighand(task, &flags)) + return; + + if (freeze) { + task->jobctl |= JOBCTL_TRAP_FREEZE; + signal_wake_up(task, false); + } else { + task->jobctl &= ~JOBCTL_TRAP_FREEZE; + wake_up_process(task); + } + + unlock_task_sighand(task, &flags); +} + +/* + * Freeze or unfreeze all tasks in the given cgroup. + */ +static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) +{ + struct css_task_iter it; + struct task_struct *task; + + lockdep_assert_held(&cgroup_mutex); + + spin_lock_irq(&css_set_lock); + if (freeze) + set_bit(CGRP_FREEZE, &cgrp->flags); + else + clear_bit(CGRP_FREEZE, &cgrp->flags); + spin_unlock_irq(&css_set_lock); + + css_task_iter_start(&cgrp->self, 0, &it); + while ((task = css_task_iter_next(&it))) { + /* + * Ignore kernel threads here. Freezing cgroups containing + * kthreads isn't supported. + */ + if (task->flags & PF_KTHREAD) + continue; + cgroup_freeze_task(task, freeze); + } + css_task_iter_end(&it); + + /* + * Cgroup state should be revisited here to cover empty leaf cgroups + * and cgroups which descendants are already in the desired state. + */ + spin_lock_irq(&css_set_lock); + if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants) + cgroup_update_frozen(cgrp); + spin_unlock_irq(&css_set_lock); +} + +/* + * Adjust the task state (freeze or unfreeze) and revisit the state of + * source and destination cgroups. + */ +void cgroup_freezer_migrate_task(struct task_struct *task, + struct cgroup *src, struct cgroup *dst) +{ + lockdep_assert_held(&css_set_lock); + + /* + * Kernel threads are not supposed to be frozen at all. + */ + if (task->flags & PF_KTHREAD) + return; + + /* + * Adjust counters of freezing and frozen tasks. + * Note, that if the task is frozen, but the destination cgroup is not + * frozen, we bump both counters to keep them balanced. + */ + if (task->frozen) { + cgroup_inc_frozen_cnt(dst); + cgroup_dec_frozen_cnt(src); + } + cgroup_update_frozen(dst); + cgroup_update_frozen(src); + + /* + * Force the task to the desired state. + */ + cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags)); +} + +void cgroup_freezer_frozen_exit(struct task_struct *task) +{ + struct cgroup *cgrp = task_dfl_cgroup(task); + + lockdep_assert_held(&css_set_lock); + + cgroup_dec_frozen_cnt(cgrp); + cgroup_update_frozen(cgrp); +} + +void cgroup_freeze(struct cgroup *cgrp, bool freeze) +{ + struct cgroup_subsys_state *css; + struct cgroup *dsct; + bool applied = false; + + lockdep_assert_held(&cgroup_mutex); + + /* + * Nothing changed? Just exit. + */ + if (cgrp->freezer.freeze == freeze) + return; + + cgrp->freezer.freeze = freeze; + + /* + * Propagate changes downwards the cgroup tree. + */ + css_for_each_descendant_pre(css, &cgrp->self) { + dsct = css->cgroup; + + if (cgroup_is_dead(dsct)) + continue; + + if (freeze) { + dsct->freezer.e_freeze++; + /* + * Already frozen because of ancestor's settings? + */ + if (dsct->freezer.e_freeze > 1) + continue; + } else { + dsct->freezer.e_freeze--; + /* + * Still frozen because of ancestor's settings? + */ + if (dsct->freezer.e_freeze > 0) + continue; + + WARN_ON_ONCE(dsct->freezer.e_freeze < 0); + } + + /* + * Do change actual state: freeze or unfreeze. + */ + cgroup_do_freeze(dsct, freeze); + applied = true; + } + + /* + * Even if the actual state hasn't changed, let's notify a user. + * The state can be enforced by an ancestor cgroup: the cgroup + * can already be in the desired state or it can be locked in the + * opposite state, so that the transition will never happen. + * In both cases it's better to notify a user, that there is + * nothing to wait for. + */ + if (!applied) + cgroup_file_notify(&cgrp->events_file); +} diff --git a/kernel/fork.c b/kernel/fork.c index 9dcd18aa210b..8097a0cce4db 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1222,7 +1222,9 @@ static int wait_for_vfork_done(struct task_struct *child, int killed; freezer_do_not_count(); + cgroup_enter_frozen(); killed = wait_for_completion_killable(vfork); + cgroup_leave_frozen(false); freezer_count(); if (killed) { diff --git a/kernel/signal.c b/kernel/signal.c index f98448cf2def..095e0fc57b25 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -43,6 +43,7 @@ #include <linux/compiler.h> #include <linux/posix-timers.h> #include <linux/livepatch.h> +#include <linux/cgroup.h> #define CREATE_TRACE_POINTS #include <trace/events/signal.h> @@ -146,9 +147,10 @@ static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked) static bool recalc_sigpending_tsk(struct task_struct *t) { - if ((t->jobctl & JOBCTL_PENDING_MASK) || + if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) || PENDING(&t->pending, &t->blocked) || - PENDING(&t->signal->shared_pending, &t->blocked)) { + PENDING(&t->signal->shared_pending, &t->blocked) || + cgroup_task_frozen(t)) { set_tsk_thread_flag(t, TIF_SIGPENDING); return true; } @@ -2108,6 +2110,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t preempt_disable(); read_unlock(&tasklist_lock); preempt_enable_no_resched(); + cgroup_enter_frozen(); freezable_schedule(); } else { /* @@ -2286,6 +2289,7 @@ static bool do_signal_stop(int signr) } /* Now we don't run again until woken by SIGCONT or SIGKILL */ + cgroup_enter_frozen(); freezable_schedule(); return true; } else { @@ -2332,6 +2336,43 @@ static void do_jobctl_trap(void) } } +/** + * do_freezer_trap - handle the freezer jobctl trap + * + * Puts the task into frozen state, if only the task is not about to quit. + * In this case it drops JOBCTL_TRAP_FREEZE. + * + * CONTEXT: + * Must be called with @current->sighand->siglock held, + * which is always released before returning. + */ +static void do_freezer_trap(void) + __releases(¤t->sighand->siglock) +{ + /* + * If there are other trap bits pending except JOBCTL_TRAP_FREEZE, + * let's make another loop to give it a chance to be handled. + * In any case, we'll return back. + */ + if ((current->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) != + JOBCTL_TRAP_FREEZE) { + spin_unlock_irq(¤t->sighand->siglock); + return; + } + + /* + * Now we're sure that there is no pending fatal signal and no + * pending traps. Clear TIF_SIGPENDING to not get out of schedule() + * immediately (if there is a non-fatal signal pending), and + * put the task into sleep. + */ + __set_current_state(TASK_INTERRUPTIBLE); + clear_thread_flag(TIF_SIGPENDING); + spin_unlock_irq(¤t->sighand->siglock); + cgroup_enter_frozen(); + freezable_schedule(); +} + static int ptrace_signal(int signr, kernel_siginfo_t *info) { /* @@ -2442,6 +2483,10 @@ relock: ksig->info.si_signo = signr = SIGKILL; sigdelset(¤t->pending.signal, SIGKILL); recalc_sigpending(); + current->jobctl &= ~JOBCTL_TRAP_FREEZE; + spin_unlock_irq(&sighand->siglock); + if (unlikely(cgroup_task_frozen(current))) + cgroup_leave_frozen(true); goto fatal; } @@ -2452,9 +2497,24 @@ relock: do_signal_stop(0)) goto relock; - if (unlikely(current->jobctl & JOBCTL_TRAP_MASK)) { - do_jobctl_trap(); + if (unlikely(current->jobctl & + (JOBCTL_TRAP_MASK | JOBCTL_TRAP_FREEZE))) { + if (current->jobctl & JOBCTL_TRAP_MASK) { + do_jobctl_trap(); + spin_unlock_irq(&sighand->siglock); + } else if (current->jobctl & JOBCTL_TRAP_FREEZE) + do_freezer_trap(); + + goto relock; + } + + /* + * If the task is leaving the frozen state, let's update + * cgroup counters and reset the frozen bit. + */ + if (unlikely(cgroup_task_frozen(current))) { spin_unlock_irq(&sighand->siglock); + cgroup_leave_frozen(true); goto relock; } @@ -2548,8 +2608,8 @@ relock: continue; } - fatal: spin_unlock_irq(&sighand->siglock); + fatal: /* * Anything else is fatal, maybe with a core dump. |