summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Menage <menage@google.com>2008-04-04 14:29:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-04 14:46:26 -0700
commit8bab8dded67d026c39367bbd5e27d2f6c556c38e (patch)
treed80f8f85f1da496c56bfa8575f0b59eba7c2ef55
parent3a143125ddc4e2e0ca1e67fb4bedd45c36e59cc7 (diff)
downloadlwn-8bab8dded67d026c39367bbd5e27d2f6c556c38e.tar.gz
lwn-8bab8dded67d026c39367bbd5e27d2f6c556c38e.zip
cgroups: add cgroup support for enabling controllers at boot time
The effects of cgroup_disable=foo are: - foo isn't auto-mounted if you mount all cgroups in a single hierarchy - foo isn't visible as an individually mountable subsystem As a result there will only ever be one call to foo->create(), at init time; all processes will stay in this group, and the group will never be mounted on a visible hierarchy. Any additional effects (e.g. not allocating metadata) are up to the foo subsystem. This doesn't handle early_init subsystems (their "disabled" bit isn't set be, but it could easily be extended to do so if any of the early_init systems wanted it - I think it would just involve some nastier parameter processing since it would occur before the command-line argument parser had been run. Hugh said: Ballpark figures, I'm trying to get this question out rather than processing the exact numbers: CONFIG_CGROUP_MEM_RES_CTLR adds 15% overhead to the affected paths, booting with cgroup_disable=memory cuts that back to 1% overhead (due to slightly bigger struct page). I'm no expert on distros, they may have no interest whatever in CONFIG_CGROUP_MEM_RES_CTLR=y; and the rest of us can easily build with or without it, or apply the cgroup_disable=memory patches. Unix bench's execl test result on x86_64 was == just after boot without mounting any cgroup fs.== mem_cgorup=off : Execl Throughput 43.0 3150.1 732.6 mem_cgroup=on : Execl Throughput 43.0 2932.6 682.0 == [lizf@cn.fujitsu.com: fix boot option parsing] Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Paul Menage <menage@google.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Pavel Emelyanov <xemul@openvz.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Hugh Dickins <hugh@veritas.com> Cc: Sudhir Kumar <skumar@linux.vnet.ibm.com> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/kernel-parameters.txt4
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--kernel/cgroup.c42
3 files changed, 42 insertions, 5 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4cd1a5da80a4..32e9297ef747 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -375,6 +375,10 @@ and is between 256 and 4096 characters. It is defined in the file
ccw_timeout_log [S390]
See Documentation/s390/CommonIO for details.
+ cgroup_disable= [KNL] Disable a particular controller
+ Format: {name of the controller(s) to disable}
+ {Currently supported controllers - "memory"}
+
checkreqprot [SELINUX] Set initial checkreqprot flag value.
Format: { "0" | "1" }
See security/selinux/Kconfig help text.
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 028ba3b523b1..a6a6035a4e1e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -256,6 +256,7 @@ struct cgroup_subsys {
void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
int subsys_id;
int active;
+ int disabled;
int early_init;
#define MAX_CGROUP_TYPE_NAMELEN 32
const char *name;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 53d86b4b0ce0..62f1a5231fe9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -782,7 +782,14 @@ static int parse_cgroupfs_options(char *data,
if (!*token)
return -EINVAL;
if (!strcmp(token, "all")) {
- opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1;
+ /* Add all non-disabled subsystems */
+ int i;
+ opts->subsys_bits = 0;
+ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+ struct cgroup_subsys *ss = subsys[i];
+ if (!ss->disabled)
+ opts->subsys_bits |= 1ul << i;
+ }
} else if (!strcmp(token, "noprefix")) {
set_bit(ROOT_NOPREFIX, &opts->flags);
} else if (!strncmp(token, "release_agent=", 14)) {
@@ -800,7 +807,8 @@ static int parse_cgroupfs_options(char *data,
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
ss = subsys[i];
if (!strcmp(token, ss->name)) {
- set_bit(i, &opts->subsys_bits);
+ if (!ss->disabled)
+ set_bit(i, &opts->subsys_bits);
break;
}
}
@@ -2600,13 +2608,13 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
{
int i;
- seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\n");
+ seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
mutex_lock(&cgroup_mutex);
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
- seq_printf(m, "%s\t%lu\t%d\n",
+ seq_printf(m, "%s\t%lu\t%d\t%d\n",
ss->name, ss->root->subsys_bits,
- ss->root->number_of_cgroups);
+ ss->root->number_of_cgroups, !ss->disabled);
}
mutex_unlock(&cgroup_mutex);
return 0;
@@ -3010,3 +3018,27 @@ static void cgroup_release_agent(struct work_struct *work)
spin_unlock(&release_list_lock);
mutex_unlock(&cgroup_mutex);
}
+
+static int __init cgroup_disable(char *str)
+{
+ int i;
+ char *token;
+
+ while ((token = strsep(&str, ",")) != NULL) {
+ if (!*token)
+ continue;
+
+ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+ struct cgroup_subsys *ss = subsys[i];
+
+ if (!strcmp(token, ss->name)) {
+ ss->disabled = 1;
+ printk(KERN_INFO "Disabling %s control group"
+ " subsystem\n", ss->name);
+ break;
+ }
+ }
+ }
+ return 1;
+}
+__setup("cgroup_disable=", cgroup_disable);