diff options
author | Tejun Heo <tj@kernel.org> | 2017-02-22 15:41:24 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-22 16:41:27 -0800 |
commit | 510ded33e075c2bd662b1efab0110f4240325fc9 (patch) | |
tree | 9199fa1031aac4fcf633ae89a01233a8988e23fc /mm/slab_common.c | |
parent | bc2791f857e1984b7548d2a2de2ffb1a913dee62 (diff) | |
download | lwn-510ded33e075c2bd662b1efab0110f4240325fc9.tar.gz lwn-510ded33e075c2bd662b1efab0110f4240325fc9.zip |
slab: implement slab_root_caches list
With kmem cgroup support enabled, kmem_caches can be created and
destroyed frequently and a great number of near empty kmem_caches can
accumulate if there are a lot of transient cgroups and the system is not
under memory pressure. When memory reclaim starts under such
conditions, it can lead to consecutive deactivation and destruction of
many kmem_caches, easily hundreds of thousands on moderately large
systems, exposing scalability issues in the current slab management
code. This is one of the patches to address the issue.
slab_caches currently lists all caches including root and memcg ones.
This is the only data structure which lists the root caches and
iterating root caches can only be done by walking the list while
skipping over memcg caches. As there can be a huge number of memcg
caches, this can become very expensive.
This also can make /proc/slabinfo behave very badly. seq_file processes
reads in 4k chunks and seeks to the previous Nth position on slab_caches
list to resume after each chunk. With a lot of memcg cache churns on
the list, reading /proc/slabinfo can become very slow and its content
often ends up with duplicate and/or missing entries.
This patch adds a new list slab_root_caches which lists only the root
caches. When memcg is not enabled, it becomes just an alias of
slab_caches. memcg specific list operations are collected into
memcg_[un]link_cache().
Link: http://lkml.kernel.org/r/20170117235411.9408-7-tj@kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jay Vana <jsvana@fb.com>
Acked-by: Vladimir Davydov <vdavydov@tarantool.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/slab_common.c')
-rw-r--r-- | mm/slab_common.c | 59 |
1 files changed, 34 insertions, 25 deletions
diff --git a/mm/slab_common.c b/mm/slab_common.c index c3bbeddaeaaf..274697e1a42a 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -138,6 +138,9 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, } #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) + +LIST_HEAD(slab_root_caches); + void slab_init_memcg_params(struct kmem_cache *s) { s->memcg_params.root_cache = NULL; @@ -183,9 +186,6 @@ static int update_memcg_params(struct kmem_cache *s, int new_array_size) { struct memcg_cache_array *old, *new; - if (!is_root_cache(s)) - return 0; - new = kzalloc(sizeof(struct memcg_cache_array) + new_array_size * sizeof(void *), GFP_KERNEL); if (!new) @@ -209,7 +209,7 @@ int memcg_update_all_caches(int num_memcgs) int ret = 0; mutex_lock(&slab_mutex); - list_for_each_entry(s, &slab_caches, list) { + list_for_each_entry(s, &slab_root_caches, root_caches_node) { ret = update_memcg_params(s, num_memcgs); /* * Instead of freeing the memory, we'll just leave the caches @@ -222,10 +222,26 @@ int memcg_update_all_caches(int num_memcgs) return ret; } -static void unlink_memcg_cache(struct kmem_cache *s) +void memcg_link_cache(struct kmem_cache *s) +{ + if (is_root_cache(s)) { + list_add(&s->root_caches_node, &slab_root_caches); + } else { + list_add(&s->memcg_params.children_node, + &s->memcg_params.root_cache->memcg_params.children); + list_add(&s->memcg_params.kmem_caches_node, + &s->memcg_params.memcg->kmem_caches); + } +} + +static void memcg_unlink_cache(struct kmem_cache *s) { - list_del(&s->memcg_params.children_node); - list_del(&s->memcg_params.kmem_caches_node); + if (is_root_cache(s)) { + list_del(&s->root_caches_node); + } else { + list_del(&s->memcg_params.children_node); + list_del(&s->memcg_params.kmem_caches_node); + } } #else static inline int init_memcg_params(struct kmem_cache *s, @@ -238,7 +254,7 @@ static inline void destroy_memcg_params(struct kmem_cache *s) { } -static inline void unlink_memcg_cache(struct kmem_cache *s) +static inline void memcg_unlink_cache(struct kmem_cache *s) { } #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ @@ -285,7 +301,7 @@ struct kmem_cache *find_mergeable(size_t size, size_t align, if (flags & SLAB_NEVER_MERGE) return NULL; - list_for_each_entry_reverse(s, &slab_caches, list) { + list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) { if (slab_unmergeable(s)) continue; @@ -369,6 +385,7 @@ static struct kmem_cache *create_cache(const char *name, s->refcount = 1; list_add(&s->list, &slab_caches); + memcg_link_cache(s); out: if (err) return ERR_PTR(err); @@ -514,9 +531,8 @@ static int shutdown_cache(struct kmem_cache *s) if (__kmem_cache_shutdown(s) != 0) return -EBUSY; + memcg_unlink_cache(s); list_del(&s->list); - if (!is_root_cache(s)) - unlink_memcg_cache(s); if (s->flags & SLAB_DESTROY_BY_RCU) { list_add_tail(&s->list, &slab_caches_to_rcu_destroy); @@ -596,10 +612,6 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg, goto out_unlock; } - list_add(&s->memcg_params.children_node, - &root_cache->memcg_params.children); - list_add(&s->memcg_params.kmem_caches_node, &memcg->kmem_caches); - /* * Since readers won't lock (see cache_from_memcg_idx()), we need a * barrier here to ensure nobody will see the kmem_cache partially @@ -627,10 +639,7 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) get_online_mems(); mutex_lock(&slab_mutex); - list_for_each_entry(s, &slab_caches, list) { - if (!is_root_cache(s)) - continue; - + list_for_each_entry(s, &slab_root_caches, root_caches_node) { arr = rcu_dereference_protected(s->memcg_params.memcg_caches, lockdep_is_held(&slab_mutex)); c = arr->entries[idx]; @@ -829,6 +838,7 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, create_boot_cache(s, name, size, flags); list_add(&s->list, &slab_caches); + memcg_link_cache(s); s->refcount = 1; return s; } @@ -1136,12 +1146,12 @@ static void print_slabinfo_header(struct seq_file *m) void *slab_start(struct seq_file *m, loff_t *pos) { mutex_lock(&slab_mutex); - return seq_list_start(&slab_caches, *pos); + return seq_list_start(&slab_root_caches, *pos); } void *slab_next(struct seq_file *m, void *p, loff_t *pos) { - return seq_list_next(p, &slab_caches, pos); + return seq_list_next(p, &slab_root_caches, pos); } void slab_stop(struct seq_file *m, void *p) @@ -1193,12 +1203,11 @@ static void cache_show(struct kmem_cache *s, struct seq_file *m) static int slab_show(struct seq_file *m, void *p) { - struct kmem_cache *s = list_entry(p, struct kmem_cache, list); + struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node); - if (p == slab_caches.next) + if (p == slab_root_caches.next) print_slabinfo_header(m); - if (is_root_cache(s)) - cache_show(s, m); + cache_show(s, m); return 0; } |