summaryrefslogtreecommitdiff
path: root/kernel/cgroup/pids.c
diff options
context:
space:
mode:
authorChristian Brauner <christian.brauner@ubuntu.com>2020-02-05 14:26:22 +0100
committerTejun Heo <tj@kernel.org>2020-02-12 17:57:51 -0500
commitef2c41cf38a7559bbf91af42d5b6a4429db8fc68 (patch)
treead5801b2782c5268dc0ad1b6f0f591b1da582eee /kernel/cgroup/pids.c
parentf3553220d4cc458d69f7da6e71a3a6097778bd28 (diff)
downloadlwn-ef2c41cf38a7559bbf91af42d5b6a4429db8fc68.tar.gz
lwn-ef2c41cf38a7559bbf91af42d5b6a4429db8fc68.zip
clone3: allow spawning processes into cgroups
This adds support for creating a process in a different cgroup than its parent. Callers can limit and account processes and threads right from the moment they are spawned: - A service manager can directly spawn new services into dedicated cgroups. - A process can be directly created in a frozen cgroup and will be frozen as well. - The initial accounting jitter experienced by process supervisors and daemons is eliminated with this. - Threaded applications or even thread implementations can choose to create a specific cgroup layout where each thread is spawned directly into a dedicated cgroup. This feature is limited to the unified hierarchy. Callers need to pass a directory file descriptor for the target cgroup. The caller can choose to pass an O_PATH file descriptor. All usual migration restrictions apply, i.e. there can be no processes in inner nodes. In general, creating a process directly in a target cgroup adheres to all migration restrictions. One of the biggest advantages of this feature is that CLONE_INTO_GROUP does not need to grab the write side of the cgroup cgroup_threadgroup_rwsem. This global lock makes moving tasks/threads around super expensive. With clone3() this lock is avoided. Cc: Tejun Heo <tj@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Li Zefan <lizefan@huawei.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: cgroups@vger.kernel.org Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/cgroup/pids.c')
-rw-r--r--kernel/cgroup/pids.c15
1 files changed, 11 insertions, 4 deletions
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c
index 138059eb730d..511af87f685e 100644
--- a/kernel/cgroup/pids.c
+++ b/kernel/cgroup/pids.c
@@ -33,6 +33,7 @@
#include <linux/atomic.h>
#include <linux/cgroup.h>
#include <linux/slab.h>
+#include <linux/sched/task.h>
#define PIDS_MAX (PID_MAX_LIMIT + 1ULL)
#define PIDS_MAX_STR "max"
@@ -214,13 +215,16 @@ static void pids_cancel_attach(struct cgroup_taskset *tset)
* task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
* on cgroup_threadgroup_change_begin() held by the copy_process().
*/
-static int pids_can_fork(struct task_struct *task)
+static int pids_can_fork(struct task_struct *task, struct css_set *cset)
{
struct cgroup_subsys_state *css;
struct pids_cgroup *pids;
int err;
- css = task_css_check(current, pids_cgrp_id, true);
+ if (cset)
+ css = cset->subsys[pids_cgrp_id];
+ else
+ css = task_css_check(current, pids_cgrp_id, true);
pids = css_pids(css);
err = pids_try_charge(pids, 1);
if (err) {
@@ -235,12 +239,15 @@ static int pids_can_fork(struct task_struct *task)
return err;
}
-static void pids_cancel_fork(struct task_struct *task)
+static void pids_cancel_fork(struct task_struct *task, struct css_set *cset)
{
struct cgroup_subsys_state *css;
struct pids_cgroup *pids;
- css = task_css_check(current, pids_cgrp_id, true);
+ if (cset)
+ css = cset->subsys[pids_cgrp_id];
+ else
+ css = task_css_check(current, pids_cgrp_id, true);
pids = css_pids(css);
pids_uncharge(pids, 1);
}