summaryrefslogtreecommitdiff
path: root/mm/memory_hotplug.c
diff options
context:
space:
mode:
authorHaicheng Li <haicheng.li@linux.intel.com>2010-05-24 14:32:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-25 08:07:01 -0700
commit1f522509c77a5dea8dc384b735314f03908a6415 (patch)
tree4b848527b90877a8a64c46e8e2d76723405c319d /mm/memory_hotplug.c
parent319774e25fa4b7641bdc3b0a464dd84e62103347 (diff)
downloadlwn-1f522509c77a5dea8dc384b735314f03908a6415.tar.gz
lwn-1f522509c77a5dea8dc384b735314f03908a6415.zip
mem-hotplug: avoid multiple zones sharing same boot strapping boot_pageset
For each new populated zone of hotadded node, need to update its pagesets with dynamically allocated per_cpu_pageset struct for all possible CPUs: 1) Detach zone->pageset from the shared boot_pageset at end of __build_all_zonelists(). 2) Use mutex to protect zone->pageset when it's still shared in onlined_pages() Otherwises, multiple zones of different nodes would share same boot strapping boot_pageset for same CPU, which will finally cause below kernel panic: ------------[ cut here ]------------ kernel BUG at mm/page_alloc.c:1239! invalid opcode: 0000 [#1] SMP ... Call Trace: [<ffffffff811300c1>] __alloc_pages_nodemask+0x131/0x7b0 [<ffffffff81162e67>] alloc_pages_current+0x87/0xd0 [<ffffffff81128407>] __page_cache_alloc+0x67/0x70 [<ffffffff811325f0>] __do_page_cache_readahead+0x120/0x260 [<ffffffff81132751>] ra_submit+0x21/0x30 [<ffffffff811329c6>] ondemand_readahead+0x166/0x2c0 [<ffffffff81132ba0>] page_cache_async_readahead+0x80/0xa0 [<ffffffff8112a0e4>] generic_file_aio_read+0x364/0x670 [<ffffffff81266cfa>] nfs_file_read+0xca/0x130 [<ffffffff8117b20a>] do_sync_read+0xfa/0x140 [<ffffffff8117bf75>] vfs_read+0xb5/0x1a0 [<ffffffff8117c151>] sys_read+0x51/0x80 [<ffffffff8103c032>] system_call_fastpath+0x16/0x1b RIP [<ffffffff8112ff13>] get_page_from_freelist+0x883/0x900 RSP <ffff88000d1e78a8> ---[ end trace 4bda28328b9990db ] [akpm@linux-foundation.org: merge fix] Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com> Reviewed-by: Andi Kleen <andi.kleen@intel.com> Reviewed-by: Christoph Lameter <cl@linux-foundation.org> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r--mm/memory_hotplug.c18
1 files changed, 13 insertions, 5 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 85eb4d342ac5..089cc97aed3c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -389,6 +389,11 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
int nid;
int ret;
struct memory_notify arg;
+ /*
+ * mutex to protect zone->pageset when it's still shared
+ * in onlined_pages()
+ */
+ static DEFINE_MUTEX(zone_pageset_mutex);
arg.start_pfn = pfn;
arg.nr_pages = nr_pages;
@@ -415,12 +420,14 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
* This means the page allocator ignores this zone.
* So, zonelist must be updated after online.
*/
+ mutex_lock(&zone_pageset_mutex);
if (!populated_zone(zone))
need_zonelists_rebuild = 1;
ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
online_pages_range);
if (ret) {
+ mutex_unlock(&zone_pageset_mutex);
printk(KERN_DEBUG "online_pages %lx at %lx failed\n",
nr_pages, pfn);
memory_notify(MEM_CANCEL_ONLINE, &arg);
@@ -429,8 +436,12 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
zone->present_pages += onlined_pages;
zone->zone_pgdat->node_present_pages += onlined_pages;
+ if (need_zonelists_rebuild)
+ build_all_zonelists(zone);
+ else
+ zone_pcp_update(zone);
- zone_pcp_update(zone);
+ mutex_unlock(&zone_pageset_mutex);
setup_per_zone_wmarks();
calculate_zone_inactive_ratio(zone);
if (onlined_pages) {
@@ -438,10 +449,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
}
- if (need_zonelists_rebuild)
- build_all_zonelists();
- else
- vm_total_pages = nr_free_pagecache_pages();
+ vm_total_pages = nr_free_pagecache_pages();
writeback_set_ratelimit();