diff options
author | Ben Blum <bblum@google.com> | 2009-09-23 15:56:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-24 07:20:58 -0700 |
commit | d1d9fd3308fdef6b4bf564fa3d6cfe35b68b50bc (patch) | |
tree | 8de392166f2edb696950a90e468ef27f043be509 /kernel | |
parent | 72a8cb30d10d4041c455a7054607a7d519167c87 (diff) | |
download | lwn-d1d9fd3308fdef6b4bf564fa3d6cfe35b68b50bc.tar.gz lwn-d1d9fd3308fdef6b4bf564fa3d6cfe35b68b50bc.zip |
cgroups: use vmalloc for large cgroups pidlist allocations
Separates all pidlist allocation requests to a separate function that
judges based on the requested size whether or not the array needs to be
vmalloced or can be gotten via kmalloc, and similar for kfree/vfree.
Signed-off-by: Ben Blum <bblum@google.com>
Signed-off-by: Paul Menage <menage@google.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 47 |
1 files changed, 42 insertions, 5 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 97194ba12014..3e356b05b2d5 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -50,6 +50,7 @@ #include <linux/smp_lock.h> #include <linux/pid_namespace.h> #include <linux/idr.h> +#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ #include <asm/atomic.h> @@ -2351,6 +2352,42 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) */ /* + * The following two functions "fix" the issue where there are more pids + * than kmalloc will give memory for; in such cases, we use vmalloc/vfree. + * TODO: replace with a kernel-wide solution to this problem + */ +#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2)) +static void *pidlist_allocate(int count) +{ + if (PIDLIST_TOO_LARGE(count)) + return vmalloc(count * sizeof(pid_t)); + else + return kmalloc(count * sizeof(pid_t), GFP_KERNEL); +} +static void pidlist_free(void *p) +{ + if (is_vmalloc_addr(p)) + vfree(p); + else + kfree(p); +} +static void *pidlist_resize(void *p, int newcount) +{ + void *newlist; + /* note: if new alloc fails, old p will still be valid either way */ + if (is_vmalloc_addr(p)) { + newlist = vmalloc(newcount * sizeof(pid_t)); + if (!newlist) + return NULL; + memcpy(newlist, p, newcount * sizeof(pid_t)); + vfree(p); + } else { + newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL); + } + return newlist; +} + +/* * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries * If the new stripped list is sufficiently smaller and there's enough memory * to allocate a new buffer, will let go of the unneeded memory. Returns the @@ -2389,7 +2426,7 @@ after: * we'll just stay with what we've got. */ if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) { - newlist = krealloc(list, dest * sizeof(pid_t), GFP_KERNEL); + newlist = pidlist_resize(list, dest); if (newlist) *p = newlist; } @@ -2470,7 +2507,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, * show up until sometime later on. */ length = cgroup_task_count(cgrp); - array = kmalloc(length * sizeof(pid_t), GFP_KERNEL); + array = pidlist_allocate(length); if (!array) return -ENOMEM; /* now, populate the array */ @@ -2494,11 +2531,11 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, length = pidlist_uniq(&array, length); l = cgroup_pidlist_find(cgrp, type); if (!l) { - kfree(array); + pidlist_free(array); return -ENOMEM; } /* store array, freeing old if necessary - lock already held */ - kfree(l->list); + pidlist_free(l->list); l->list = array; l->length = length; l->use_count++; @@ -2659,7 +2696,7 @@ static void cgroup_release_pid_array(struct cgroup_pidlist *l) /* we're the last user if refcount is 0; remove and free */ list_del(&l->links); mutex_unlock(&l->owner->pidlist_mutex); - kfree(l->list); + pidlist_free(l->list); put_pid_ns(l->key.ns); up_write(&l->mutex); kfree(l); |