summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/cgroup.h3
-rw-r--r--kernel/cgroup.c51
2 files changed, 49 insertions, 5 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 5207c28c2402..50d8cc37498b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -275,6 +275,9 @@ enum {
* - "tasks" is removed. Everything should be at process
* granularity. Use "cgroup.procs" instead.
*
+ * - "cgroup.procs" is not sorted. pids will be unique unless they
+ * got recycled inbetween reads.
+ *
* - "release_agent" and "notify_on_release" are removed.
* Replacement notification mechanism will be implemented.
*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a2458031d851..f9f5fe3526ac 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3564,11 +3564,49 @@ after:
return dest;
}
+/*
+ * The two pid files - task and cgroup.procs - guaranteed that the result
+ * is sorted, which forced this whole pidlist fiasco. As pid order is
+ * different per namespace, each namespace needs differently sorted list,
+ * making it impossible to use, for example, single rbtree of member tasks
+ * sorted by task pointer. As pidlists can be fairly large, allocating one
+ * per open file is dangerous, so cgroup had to implement shared pool of
+ * pidlists keyed by cgroup and namespace.
+ *
+ * All this extra complexity was caused by the original implementation
+ * committing to an entirely unnecessary property. In the long term, we
+ * want to do away with it. Explicitly scramble sort order if
+ * sane_behavior so that no such expectation exists in the new interface.
+ *
+ * Scrambling is done by swapping every two consecutive bits, which is
+ * non-identity one-to-one mapping which disturbs sort order sufficiently.
+ */
+static pid_t pid_fry(pid_t pid)
+{
+ unsigned a = pid & 0x55555555;
+ unsigned b = pid & 0xAAAAAAAA;
+
+ return (a << 1) | (b >> 1);
+}
+
+static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid)
+{
+ if (cgroup_sane_behavior(cgrp))
+ return pid_fry(pid);
+ else
+ return pid;
+}
+
static int cmppid(const void *a, const void *b)
{
return *(pid_t *)a - *(pid_t *)b;
}
+static int fried_cmppid(const void *a, const void *b)
+{
+ return pid_fry(*(pid_t *)a) - pid_fry(*(pid_t *)b);
+}
+
static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
enum cgroup_filetype type)
{
@@ -3656,7 +3694,10 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
css_task_iter_end(&it);
length = n;
/* now sort & (if procs) strip out duplicates */
- sort(array, length, sizeof(pid_t), cmppid, NULL);
+ if (cgroup_sane_behavior(cgrp))
+ sort(array, length, sizeof(pid_t), fried_cmppid, NULL);
+ else
+ sort(array, length, sizeof(pid_t), cmppid, NULL);
if (type == CGROUP_FILE_PROCS)
length = pidlist_uniq(array, length);
@@ -3777,10 +3818,10 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
while (index < end) {
int mid = (index + end) / 2;
- if (l->list[mid] == pid) {
+ if (cgroup_pid_fry(cgrp, l->list[mid]) == pid) {
index = mid;
break;
- } else if (l->list[mid] <= pid)
+ } else if (cgroup_pid_fry(cgrp, l->list[mid]) <= pid)
index = mid + 1;
else
end = mid;
@@ -3791,7 +3832,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
return NULL;
/* Update the abstract position to be the actual pid that we found */
iter = l->list + index;
- *pos = *iter;
+ *pos = cgroup_pid_fry(cgrp, *iter);
return iter;
}
@@ -3820,7 +3861,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
if (p >= end) {
return NULL;
} else {
- *pos = *p;
+ *pos = cgroup_pid_fry(of->cgrp, *p);
return p;
}
}