diff options
author | Christian Brauner <christian.brauner@ubuntu.com> | 2020-02-05 14:26:22 +0100 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2020-02-12 17:57:51 -0500 |
commit | ef2c41cf38a7559bbf91af42d5b6a4429db8fc68 (patch) | |
tree | ad5801b2782c5268dc0ad1b6f0f591b1da582eee /kernel/cgroup/pids.c | |
parent | f3553220d4cc458d69f7da6e71a3a6097778bd28 (diff) | |
download | lwn-ef2c41cf38a7559bbf91af42d5b6a4429db8fc68.tar.gz lwn-ef2c41cf38a7559bbf91af42d5b6a4429db8fc68.zip |
clone3: allow spawning processes into cgroups
This adds support for creating a process in a different cgroup than its
parent. Callers can limit and account processes and threads right from
the moment they are spawned:
- A service manager can directly spawn new services into dedicated
cgroups.
- A process can be directly created in a frozen cgroup and will be
frozen as well.
- The initial accounting jitter experienced by process supervisors and
daemons is eliminated with this.
- Threaded applications or even thread implementations can choose to
create a specific cgroup layout where each thread is spawned
directly into a dedicated cgroup.
This feature is limited to the unified hierarchy. Callers need to pass
a directory file descriptor for the target cgroup. The caller can
choose to pass an O_PATH file descriptor. All usual migration
restrictions apply, i.e. there can be no processes in inner nodes. In
general, creating a process directly in a target cgroup adheres to all
migration restrictions.
One of the biggest advantages of this feature is that CLONE_INTO_GROUP does
not need to grab the write side of the cgroup cgroup_threadgroup_rwsem.
This global lock makes moving tasks/threads around super expensive. With
clone3() this lock is avoided.
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: cgroups@vger.kernel.org
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/cgroup/pids.c')
-rw-r--r-- | kernel/cgroup/pids.c | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index 138059eb730d..511af87f685e 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c @@ -33,6 +33,7 @@ #include <linux/atomic.h> #include <linux/cgroup.h> #include <linux/slab.h> +#include <linux/sched/task.h> #define PIDS_MAX (PID_MAX_LIMIT + 1ULL) #define PIDS_MAX_STR "max" @@ -214,13 +215,16 @@ static void pids_cancel_attach(struct cgroup_taskset *tset) * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies * on cgroup_threadgroup_change_begin() held by the copy_process(). */ -static int pids_can_fork(struct task_struct *task) +static int pids_can_fork(struct task_struct *task, struct css_set *cset) { struct cgroup_subsys_state *css; struct pids_cgroup *pids; int err; - css = task_css_check(current, pids_cgrp_id, true); + if (cset) + css = cset->subsys[pids_cgrp_id]; + else + css = task_css_check(current, pids_cgrp_id, true); pids = css_pids(css); err = pids_try_charge(pids, 1); if (err) { @@ -235,12 +239,15 @@ static int pids_can_fork(struct task_struct *task) return err; } -static void pids_cancel_fork(struct task_struct *task) +static void pids_cancel_fork(struct task_struct *task, struct css_set *cset) { struct cgroup_subsys_state *css; struct pids_cgroup *pids; - css = task_css_check(current, pids_cgrp_id, true); + if (cset) + css = cset->subsys[pids_cgrp_id]; + else + css = task_css_check(current, pids_cgrp_id, true); pids = css_pids(css); pids_uncharge(pids, 1); } |