summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@engr.sgi.com>2005-06-21 17:14:57 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 18:46:18 -0700
commit4ae7c03943fca73f23bc0cdb938070f41b98101f (patch)
treed4b3a7369896af7aa7bb58d0d1699be91fc4aa0d
parent578c2fd6a7f378434655e5c480e23152a3994404 (diff)
downloadlwn-4ae7c03943fca73f23bc0cdb938070f41b98101f.tar.gz
lwn-4ae7c03943fca73f23bc0cdb938070f41b98101f.zip
[PATCH] Periodically drain non local pagesets
The pageset array can potentially acquire a huge amount of memory on large NUMA systems. F.e. on a system with 512 processors and 256 nodes there will be 256*512 pagesets. If each pageset only holds 5 pages then we are talking about 655360 pages.With a 16K page size on IA64 this results in potentially 10 Gigabytes of memory being trapped in pagesets. The typical cases are much less for smaller systems but there is still the potential of memory being trapped in off node pagesets. Off node memory may be rarely used if local memory is available and so we may potentially have memory in seldom used pagesets without this patch. The slab allocator flushes its per cpu caches every 2 seconds. The following patch flushes the off node pageset caches in the same way by tying into the slab flush. The patch also changes /proc/zoneinfo to include the number of pages currently in each pageset. Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/gfp.h5
-rw-r--r--mm/page_alloc.c35
-rw-r--r--mm/slab.c1
3 files changed, 39 insertions, 2 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 208535fd4832..8d6bf608b199 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -133,5 +133,10 @@ extern void FASTCALL(free_cold_page(struct page *page));
#define free_page(addr) free_pages((addr),0)
void page_alloc_init(void);
+#ifdef CONFIG_NUMA
+void drain_remote_pages(void);
+#else
+static inline void drain_remote_pages(void) { };
+#endif
#endif /* __LINUX_GFP_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index be05d17bd7df..a95e72d7f945 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -516,6 +516,36 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
return allocated;
}
+#ifdef CONFIG_NUMA
+/* Called from the slab reaper to drain remote pagesets */
+void drain_remote_pages(void)
+{
+ struct zone *zone;
+ int i;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ for_each_zone(zone) {
+ struct per_cpu_pageset *pset;
+
+ /* Do not drain local pagesets */
+ if (zone->zone_pgdat->node_id == numa_node_id())
+ continue;
+
+ pset = zone->pageset[smp_processor_id()];
+ for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
+ struct per_cpu_pages *pcp;
+
+ pcp = &pset->pcp[i];
+ if (pcp->count)
+ pcp->count -= free_pages_bulk(zone, pcp->count,
+ &pcp->list, 0);
+ }
+ }
+ local_irq_restore(flags);
+}
+#endif
+
#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
static void __drain_pages(unsigned int cpu)
{
@@ -1271,12 +1301,13 @@ void show_free_areas(void)
pageset = zone_pcp(zone, cpu);
for (temperature = 0; temperature < 2; temperature++)
- printk("cpu %d %s: low %d, high %d, batch %d\n",
+ printk("cpu %d %s: low %d, high %d, batch %d used:%d\n",
cpu,
temperature ? "cold" : "hot",
pageset->pcp[temperature].low,
pageset->pcp[temperature].high,
- pageset->pcp[temperature].batch);
+ pageset->pcp[temperature].batch,
+ pageset->pcp[temperature].count);
}
}
diff --git a/mm/slab.c b/mm/slab.c
index c78d343b3c5f..93cbbbb39f42 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2851,6 +2851,7 @@ next:
}
check_irq_on();
up(&cache_chain_sem);
+ drain_remote_pages();
/* Setup the next iteration */
schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id());
}