summaryrefslogtreecommitdiff
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-05-16 13:22:47 -0400
committerTejun Heo <tj@kernel.org>2014-05-16 13:22:47 -0400
commit3b514d24e200fcdcde0a57c354a51d3677a86743 (patch)
tree7f38e805f1c6e8086e0a6f51383a9776e380e11d /kernel/cgroup.c
parent9d755d33f0db8c9b49438f71b38a56e375b34360 (diff)
downloadlwn-3b514d24e200fcdcde0a57c354a51d3677a86743.tar.gz
lwn-3b514d24e200fcdcde0a57c354a51d3677a86743.zip
cgroup: skip refcnting on normal root csses and cgrp_dfl_root self css
9395a4500404 ("cgroup: enable refcnting for root csses") enabled reference counting for root csses (cgroup_subsys_states) so that cgroup's self csses can be used to manage the lifetime of the containing cgroups. Unfortunately, this change was incorrect. During early init, cgrp_dfl_root self css refcnt is used. percpu_ref can't initialized during early init and its initialization is deferred till cgroup_init() time. This means that cpu was using percpu_ref which wasn't properly initialized. Due to the way percpu variables are laid out on x86, this didn't blow up immediately on x86 but ended up incrementing and decrementing the percpu variable at offset zero, whatever it may be; however, on other archs, this caused fault and early boot failure. As cgroup self csses for root cgroups of non-dfl hierarchies need working refcounting, we can't revert 9395a4500404. This patch adds CSS_NO_REF which explicitly inhibits reference counting on the css and sets it on all normal (non-self) csses and cgroup_dfl_root self css. v2: cgrp_dfl_root.self is the offending one. Set the flag on it. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Stephen Warren <swarren@nvidia.com> Tested-by: Stephen Warren <swarren@nvidia.com> Fixes: 9395a4500404 ("cgroup: enable refcnting for root csses")
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c11
1 files changed, 9 insertions, 2 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c01e8e8dfad0..0343d7ee6d62 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4593,11 +4593,17 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
/* We don't handle early failures gracefully */
BUG_ON(IS_ERR(css));
init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
+
+ /*
+ * Root csses are never destroyed and we can't initialize
+ * percpu_ref during early init. Disable refcnting.
+ */
+ css->flags |= CSS_NO_REF;
+
if (early) {
/* allocation can't be done safely during early init */
css->id = 1;
} else {
- BUG_ON(percpu_ref_init(&css->refcnt, css_release));
css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
BUG_ON(css->id < 0);
}
@@ -4636,6 +4642,8 @@ int __init cgroup_init_early(void)
int i;
init_cgroup_root(&cgrp_dfl_root, &opts);
+ cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
+
RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
for_each_subsys(ss, i) {
@@ -4684,7 +4692,6 @@ int __init cgroup_init(void)
struct cgroup_subsys_state *css =
init_css_set.subsys[ss->id];
- BUG_ON(percpu_ref_init(&css->refcnt, css_release));
css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
GFP_KERNEL);
BUG_ON(css->id < 0);