summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2006-03-24 03:16:07 -0800
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-24 07:33:23 -0800
commit101a50019ae5e370d73984ee05d56dd3b08f330a (patch)
treef5628837d4bb68a4109dfb8d6601f95b630848c3
parentfffb60f93ce5880aade88e01d7133b52a4879710 (diff)
downloadlwn-101a50019ae5e370d73984ee05d56dd3b08f330a.tar.gz
lwn-101a50019ae5e370d73984ee05d56dd3b08f330a.zip
[PATCH] cpuset memory spread slab cache implementation
Provide the slab cache infrastructure to support cpuset memory spreading. See the previous patches, cpuset_mem_spread, for an explanation of cpuset memory spreading. This patch provides a slab cache SLAB_MEM_SPREAD flag. If set in the kmem_cache_create() call defining a slab cache, then any task marked with the process state flag PF_MEMSPREAD will spread memory page allocations for that cache over all the allowed nodes, instead of preferring the local (faulting) node. On systems not configured with CONFIG_NUMA, this results in no change to the page allocation code path for slab caches. On systems with cpusets configured in the kernel, but the "memory_spread" cpuset option not enabled for the current tasks cpuset, this adds a call to a cpuset routine and failed bit test of the processor state flag PF_SPREAD_SLAB. For tasks so marked, a second inline test is done for the slab cache flag SLAB_MEM_SPREAD, and if that is set and if the allocation is not in_interrupt(), this adds a call to to a cpuset routine that computes which of the tasks mems_allowed nodes should be preferred for this allocation. ==> This patch adds another hook into the performance critical code path to allocating objects from the slab cache, in the ____cache_alloc() chunk, below. The next patch optimizes this hook, reducing the impact of the combined mempolicy plus memory spreading hooks on this critical code path to a single check against the tasks task_struct flags word. This patch provides the generic slab flags and logic needed to apply memory spreading to a particular slab. A subsequent patch will mark a few specific slab caches for this placement policy. Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/slab.h1
-rw-r--r--mm/slab.c13
2 files changed, 12 insertions, 2 deletions
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 2b28c849d75a..e2ee5b268797 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -46,6 +46,7 @@ typedef struct kmem_cache kmem_cache_t;
what is reclaimable later*/
#define SLAB_PANIC 0x00040000UL /* panic if kmem_cache_create() fails */
#define SLAB_DESTROY_BY_RCU 0x00080000UL /* defer freeing pages to RCU */
+#define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */
/* flags passed to a constructor func */
#define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */
diff --git a/mm/slab.c b/mm/slab.c
index 1c8f5ee230d5..de516658d3d8 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -94,6 +94,7 @@
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/compiler.h>
+#include <linux/cpuset.h>
#include <linux/seq_file.h>
#include <linux/notifier.h>
#include <linux/kallsyms.h>
@@ -173,12 +174,12 @@
SLAB_CACHE_DMA | \
SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
- SLAB_DESTROY_BY_RCU)
+ SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
#else
# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
- SLAB_DESTROY_BY_RCU)
+ SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
#endif
/*
@@ -2813,6 +2814,14 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
if (nid != numa_node_id())
return __cache_alloc_node(cachep, flags, nid);
}
+ if (unlikely(cpuset_do_slab_mem_spread() &&
+ (cachep->flags & SLAB_MEM_SPREAD) &&
+ !in_interrupt())) {
+ int nid = cpuset_mem_spread_node();
+
+ if (nid != numa_node_id())
+ return __cache_alloc_node(cachep, flags, nid);
+ }
#endif
check_irq_off();