summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2012-01-21 13:34:05 -0800
committerEric W. Biederman <ebiederm@xmission.com>2012-01-24 16:40:27 -0800
commit7c60c48f58a78195acc1f71c9a9d01958c02ab89 (patch)
tree7d1a66abc2510aa474105a747fdcd08b033f2b36
parentf728019bb72e655680c02ad1829323054a8e875f (diff)
downloadlwn-7c60c48f58a78195acc1f71c9a9d01958c02ab89.tar.gz
lwn-7c60c48f58a78195acc1f71c9a9d01958c02ab89.zip
sysctl: Improve the sysctl sanity checks
- Stop validating subdirectories now that we only register leaf tables - Cleanup and improve the duplicate filename check. * Run the duplicate filename check under the sysctl_lock to guarantee we never add duplicate names. * Reduce the duplicate filename check to nearly O(M*N) where M is the number of entries in tthe table we are registering and N is the number of entries in the directory before we got there. - Move the duplicate filename check into it's own function and call it directtly from __register_sysctl_table - Kill the config option as the sanity checks are now cheap enough the config option is unnecessary. The original reason for the config option was because we had a huge table used to verify the proc filename to binary sysctl mapping. That table has now evolved into the binary_sysctl translation layer and is no longer part of the sysctl_check code. - Tighten up the permission checks. Guarnateeing that files only have read or write permissions. - Removed redudant check for parents having a procname as now everything has a procname. - Generalize the backtrace logic so that we print a backtrace from any failure of __register_sysctl_table that was not caused by a memmory allocation failure. The backtrace allows us to track down who erroneously registered a sysctl table. Bechmark before (CONFIG_SYSCTL_CHECK=y): make-dummies 0 999 -> 12s rmmod dummy -> 0.08s Bechmark before (CONFIG_SYSCTL_CHECK=n): make-dummies 0 999 -> 0.7s rmmod dummy -> 0.06s make-dummies 0 99999 -> 1m13s rmmod dummy -> 0.38s Benchmark after: make-dummies 0 999 -> 0.65s rmmod dummy -> 0.055s make-dummies 0 9999 -> 1m10s rmmod dummy -> 0.39s The sysctl sanity checks now impose no measurable cost. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
-rw-r--r--fs/proc/proc_sysctl.c222
-rw-r--r--lib/Kconfig.debug8
2 files changed, 86 insertions, 144 deletions
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 6bab2ae9e395..a492ff60e071 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -726,160 +726,106 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
}
}
-#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
-static int sysctl_depth(struct ctl_table *table)
+static int sysctl_check_table_dups(const char *path, struct ctl_table *old,
+ struct ctl_table *table)
{
- struct ctl_table *tmp;
- int depth;
-
- depth = 0;
- for (tmp = table; tmp->parent; tmp = tmp->parent)
- depth++;
+ struct ctl_table *entry, *test;
+ int error = 0;
- return depth;
+ for (entry = old; entry->procname; entry++) {
+ for (test = table; test->procname; test++) {
+ if (strcmp(entry->procname, test->procname) == 0) {
+ printk(KERN_ERR "sysctl duplicate entry: %s/%s\n",
+ path, test->procname);
+ error = -EEXIST;
+ }
+ }
+ }
+ return error;
}
-static struct ctl_table *sysctl_parent(struct ctl_table *table, int n)
+static int sysctl_check_dups(struct nsproxy *namespaces,
+ struct ctl_table_header *header,
+ const char *path, struct ctl_table *table)
{
- int i;
+ struct ctl_table_root *root;
+ struct ctl_table_set *set;
+ struct ctl_table_header *dir_head, *head;
+ struct ctl_table *dir_table;
+ int error = 0;
- for (i = 0; table && i < n; i++)
- table = table->parent;
+ /* No dups if we are the only member of our directory */
+ if (header->attached_by != table)
+ return 0;
- return table;
-}
+ dir_head = header->parent;
+ dir_table = header->attached_to;
+ error = sysctl_check_table_dups(path, dir_table, table);
-static void sysctl_print_path(struct ctl_table *table)
-{
- struct ctl_table *tmp;
- int depth, i;
- depth = sysctl_depth(table);
- if (table->procname) {
- for (i = depth; i >= 0; i--) {
- tmp = sysctl_parent(table, i);
- printk("/%s", tmp->procname?tmp->procname:"");
- }
- }
- printk(" ");
-}
+ root = &sysctl_table_root;
+ do {
+ set = lookup_header_set(root, namespaces);
-static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
- struct ctl_table *table)
-{
- struct ctl_table_header *head;
- struct ctl_table *ref, *test;
- int depth, cur_depth;
-
- depth = sysctl_depth(table);
-
- for (head = __sysctl_head_next(namespaces, NULL); head;
- head = __sysctl_head_next(namespaces, head)) {
- cur_depth = depth;
- ref = head->ctl_table;
-repeat:
- test = sysctl_parent(table, cur_depth);
- for (; ref->procname; ref++) {
- int match = 0;
- if (cur_depth && !ref->child)
+ list_for_each_entry(head, &set->list, ctl_entry) {
+ if (head->unregistering)
continue;
-
- if (test->procname && ref->procname &&
- (strcmp(test->procname, ref->procname) == 0))
- match++;
-
- if (match) {
- if (cur_depth != 0) {
- cur_depth--;
- ref = ref->child;
- goto repeat;
- }
- goto out;
- }
+ if (head->attached_to != dir_table)
+ continue;
+ error = sysctl_check_table_dups(path, head->attached_by,
+ table);
}
- }
- ref = NULL;
-out:
- sysctl_head_finish(head);
- return ref;
+ root = list_entry(root->root_list.next,
+ struct ctl_table_root, root_list);
+ } while (root != &sysctl_table_root);
+ return error;
}
-static void set_fail(const char **fail, struct ctl_table *table, const char *str)
+static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
{
- if (*fail) {
- printk(KERN_ERR "sysctl table check failed: ");
- sysctl_print_path(table);
- printk(" %s\n", *fail);
- dump_stack();
- }
- *fail = str;
-}
+ struct va_format vaf;
+ va_list args;
-static void sysctl_check_leaf(struct nsproxy *namespaces,
- struct ctl_table *table, const char **fail)
-{
- struct ctl_table *ref;
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n",
+ path, table->procname, &vaf);
- ref = sysctl_check_lookup(namespaces, table);
- if (ref && (ref != table))
- set_fail(fail, table, "Sysctl already exists");
+ va_end(args);
+ return -EINVAL;
}
-static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
+static int sysctl_check_table(const char *path, struct ctl_table *table)
{
- int error = 0;
+ int err = 0;
for (; table->procname; table++) {
- const char *fail = NULL;
-
- if (table->parent) {
- if (!table->parent->procname)
- set_fail(&fail, table, "Parent without procname");
- }
- if (table->child) {
- if (table->data)
- set_fail(&fail, table, "Directory with data?");
- if (table->maxlen)
- set_fail(&fail, table, "Directory with maxlen?");
- if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode)
- set_fail(&fail, table, "Writable sysctl directory");
- if (table->proc_handler)
- set_fail(&fail, table, "Directory with proc_handler");
- if (table->extra1)
- set_fail(&fail, table, "Directory with extra1");
- if (table->extra2)
- set_fail(&fail, table, "Directory with extra2");
- } else {
- if ((table->proc_handler == proc_dostring) ||
- (table->proc_handler == proc_dointvec) ||
- (table->proc_handler == proc_dointvec_minmax) ||
- (table->proc_handler == proc_dointvec_jiffies) ||
- (table->proc_handler == proc_dointvec_userhz_jiffies) ||
- (table->proc_handler == proc_dointvec_ms_jiffies) ||
- (table->proc_handler == proc_doulongvec_minmax) ||
- (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
- if (!table->data)
- set_fail(&fail, table, "No data");
- if (!table->maxlen)
- set_fail(&fail, table, "No maxlen");
- }
-#ifdef CONFIG_PROC_SYSCTL
- if (!table->proc_handler)
- set_fail(&fail, table, "No proc_handler");
-#endif
- sysctl_check_leaf(namespaces, table, &fail);
- }
- if (table->mode > 0777)
- set_fail(&fail, table, "bogus .mode");
- if (fail) {
- set_fail(&fail, table, NULL);
- error = -EINVAL;
- }
if (table->child)
- error |= sysctl_check_table(namespaces, table->child);
+ err = sysctl_err(path, table, "Not a file");
+
+ if ((table->proc_handler == proc_dostring) ||
+ (table->proc_handler == proc_dointvec) ||
+ (table->proc_handler == proc_dointvec_minmax) ||
+ (table->proc_handler == proc_dointvec_jiffies) ||
+ (table->proc_handler == proc_dointvec_userhz_jiffies) ||
+ (table->proc_handler == proc_dointvec_ms_jiffies) ||
+ (table->proc_handler == proc_doulongvec_minmax) ||
+ (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
+ if (!table->data)
+ err = sysctl_err(path, table, "No data");
+ if (!table->maxlen)
+ err = sysctl_err(path, table, "No maxlen");
+ }
+ if (!table->proc_handler)
+ err = sysctl_err(path, table, "No proc_handler");
+
+ if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
+ err = sysctl_err(path, table, "bogus .mode 0%o",
+ table->mode);
}
- return error;
+ return err;
}
-#endif /* CONFIG_SYSCTL_SYSCALL_CHECK */
/**
* __register_sysctl_table - register a leaf sysctl table
@@ -1003,12 +949,8 @@ struct ctl_table_header *__register_sysctl_table(
header->root = root;
sysctl_set_parent(NULL, header->ctl_table);
header->count = 1;
-#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
- if (sysctl_check_table(namespaces, header->ctl_table)) {
- kfree(header);
- return NULL;
- }
-#endif
+ if (sysctl_check_table(path, table))
+ goto fail;
spin_lock(&sysctl_lock);
header->set = lookup_header_set(root, namespaces);
header->attached_by = header->ctl_table;
@@ -1029,11 +971,19 @@ struct ctl_table_header *__register_sysctl_table(
struct ctl_table_root, root_list);
set = lookup_header_set(root, namespaces);
}
+ if (sysctl_check_dups(namespaces, header, path, table))
+ goto fail_locked;
header->parent->count++;
list_add_tail(&header->ctl_entry, &header->set->list);
spin_unlock(&sysctl_lock);
return header;
+fail_locked:
+ spin_unlock(&sysctl_lock);
+fail:
+ kfree(header);
+ dump_stack();
+ return NULL;
}
static char *append_path(const char *path, char *pos, const char *name)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7d1f75..943a6182cdf2 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1113,14 +1113,6 @@ config LATENCYTOP
Enable this option if you want to use the LatencyTOP tool
to find out which userspace is blocking on what kernel operations.
-config SYSCTL_SYSCALL_CHECK
- bool "Sysctl checks"
- depends on SYSCTL
- ---help---
- sys_sysctl uses binary paths that have been found challenging
- to properly maintain and use. This enables checks that help
- you to keep things correct.
-
source mm/Kconfig.debug
source kernel/trace/Kconfig