summaryrefslogtreecommitdiff
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-05-16 13:22:51 -0400
committerTejun Heo <tj@kernel.org>2014-05-16 13:22:51 -0400
commitc2931b70a32c705b9bd5762f5044f9eac8a52bb3 (patch)
tree0a530060fd1b196eda8e2ff81bd4010bd559ff12 /kernel/cgroup.c
parentde3f034182ecbf0efbcef7ab8b253c6c3049a592 (diff)
downloadlwn-c2931b70a32c705b9bd5762f5044f9eac8a52bb3.tar.gz
lwn-c2931b70a32c705b9bd5762f5044f9eac8a52bb3.zip
cgroup: iterate cgroup_subsys_states directly
Currently, css_next_child() is implemented as finding the next child cgroup which has the css enabled, which used to be the only way to do it as only cgroups participated in sibling lists and thus could be iteratd. This works as long as what's required during iteration is not missing online csses; however, it turns out that there are use cases where offlined but not yet released csses need to be iterated. This is difficult to implement through cgroup iteration the unified hierarchy as there may be multiple dying csses for the same subsystem associated with single cgroup. After the recent changes, the cgroup self and regular csses behave identically in how they're linked and unlinked from the sibling lists including assertion of CSS_RELEASED and css_next_child() can simply switch to iterating csses directly. This both simplifies the logic and ensures that all visible non-released csses are included in the iteration whether there are multiple dying csses for a subsystem or not. As all other iterators depend on css_next_child() for sibling iteration, this changes behaviors of all css iterators. Add and update explanations on the css states which are included in traversal to all iterators. As css iteration could always contain offlined csses, this shouldn't break any of the current users and new usages which need iteration of all on and offline csses can make use of the new semantics. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizefan@huawei.com> Cc: Johannes Weiner <hannes@cmpxchg.org>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c62
1 files changed, 37 insertions, 25 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5544e685f2da..097a1fc1e1e8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3089,21 +3089,25 @@ static int cgroup_task_count(const struct cgroup *cgrp)
/**
* css_next_child - find the next child of a given css
- * @pos_css: the current position (%NULL to initiate traversal)
- * @parent_css: css whose children to walk
+ * @pos: the current position (%NULL to initiate traversal)
+ * @parent: css whose children to walk
*
- * This function returns the next child of @parent_css and should be called
+ * This function returns the next child of @parent and should be called
* under either cgroup_mutex or RCU read lock. The only requirement is
- * that @parent_css and @pos_css are accessible. The next sibling is
- * guaranteed to be returned regardless of their states.
+ * that @parent and @pos are accessible. The next sibling is guaranteed to
+ * be returned regardless of their states.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal. It's each subsystem's
+ * responsibility to synchronize against on/offlining.
*/
-struct cgroup_subsys_state *
-css_next_child(struct cgroup_subsys_state *pos_css,
- struct cgroup_subsys_state *parent_css)
+struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
+ struct cgroup_subsys_state *parent)
{
- struct cgroup *pos = pos_css ? pos_css->cgroup : NULL;
- struct cgroup *cgrp = parent_css->cgroup;
- struct cgroup *next;
+ struct cgroup_subsys_state *next;
cgroup_assert_mutex_or_rcu_locked();
@@ -3128,27 +3132,21 @@ css_next_child(struct cgroup_subsys_state *pos_css,
* races against release and the race window is very small.
*/
if (!pos) {
- next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling);
- } else if (likely(!(pos->self.flags & CSS_RELEASED))) {
- next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling);
+ next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
+ } else if (likely(!(pos->flags & CSS_RELEASED))) {
+ next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
} else {
- list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling)
- if (next->self.serial_nr > pos->self.serial_nr)
+ list_for_each_entry_rcu(next, &parent->children, sibling)
+ if (next->serial_nr > pos->serial_nr)
break;
}
/*
* @next, if not pointing to the head, can be dereferenced and is
- * the next sibling; however, it might have @ss disabled. If so,
- * fast-forward to the next enabled one.
+ * the next sibling.
*/
- while (&next->self.sibling != &cgrp->self.children) {
- struct cgroup_subsys_state *next_css = cgroup_css(next, parent_css->ss);
-
- if (next_css)
- return next_css;
- next = list_entry_rcu(next->self.sibling.next, struct cgroup, self.sibling);
- }
+ if (&next->sibling != &parent->children)
+ return next;
return NULL;
}
@@ -3165,6 +3163,13 @@ css_next_child(struct cgroup_subsys_state *pos_css,
* doesn't require the whole traversal to be contained in a single critical
* section. This function will return the correct next descendant as long
* as both @pos and @root are accessible and @pos is a descendant of @root.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal. It's each subsystem's
+ * responsibility to synchronize against on/offlining.
*/
struct cgroup_subsys_state *
css_next_descendant_pre(struct cgroup_subsys_state *pos,
@@ -3252,6 +3257,13 @@ css_leftmost_descendant(struct cgroup_subsys_state *pos)
* section. This function will return the correct next descendant as long
* as both @pos and @cgroup are accessible and @pos is a descendant of
* @cgroup.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal. It's each subsystem's
+ * responsibility to synchronize against on/offlining.
*/
struct cgroup_subsys_state *
css_next_descendant_post(struct cgroup_subsys_state *pos,