diff options
author | Yasunori Goto <y-goto@jp.fujitsu.com> | 2007-10-21 16:41:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-22 08:13:17 -0700 |
commit | 7b78d335ac15b10bbcb0397c635d7f0d569b0270 (patch) | |
tree | 3e49006c0166ff8bcc6e11b401437fc31d875ec8 | |
parent | 10020ca246c55744dad815ad4f15e1f488ca55a8 (diff) | |
download | lwn-7b78d335ac15b10bbcb0397c635d7f0d569b0270.tar.gz lwn-7b78d335ac15b10bbcb0397c635d7f0d569b0270.zip |
memory hotplug: rearrange memory hotplug notifier
Current memory notifier has some defects yet. (Fortunately, nothing uses
it.) This patch is to fix and rearrange for them.
- Add information of start_pfn, nr_pages, and node id if node status is
changes from/to memoryless node for callback functions.
Callbacks can't do anything without those information.
- Add notification going-online status.
It is necessary for creating per node structure before the node's
pages are available.
- Move GOING_OFFLINE status notification after page isolation.
It is good place for return memory like cache for callback,
because returned page is not used again.
- Make CANCEL events for rollingback when error occurs.
- Delete MEM_MAPPING_INVALID notification. It will be not used.
- Fix compile error of (un)register_memory_notifier().
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/base/memory.c | 9 | ||||
-rw-r--r-- | include/linux/memory.h | 27 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 48 |
3 files changed, 61 insertions, 23 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c41d0728efe2..7868707c7eda 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -137,7 +137,7 @@ static ssize_t show_mem_state(struct sys_device *dev, char *buf) return len; } -static inline int memory_notify(unsigned long val, void *v) +int memory_notify(unsigned long val, void *v) { return blocking_notifier_call_chain(&memory_chain, val, v); } @@ -183,7 +183,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) break; case MEM_OFFLINE: mem->state = MEM_GOING_OFFLINE; - memory_notify(MEM_GOING_OFFLINE, NULL); start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; ret = remove_memory(start_paddr, PAGES_PER_SECTION << PAGE_SHIFT); @@ -191,7 +190,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) mem->state = old_state; break; } - memory_notify(MEM_MAPPING_INVALID, NULL); break; default: printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", @@ -199,11 +197,6 @@ memory_block_action(struct memory_block *mem, unsigned long action) WARN_ON(1); ret = -EINVAL; } - /* - * For now, only notify on successful memory operations - */ - if (!ret) - memory_notify(action, NULL); return ret; } diff --git a/include/linux/memory.h b/include/linux/memory.h index 654ef5544878..ec376e482abb 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -41,18 +41,15 @@ struct memory_block { #define MEM_ONLINE (1<<0) /* exposed to userspace */ #define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */ #define MEM_OFFLINE (1<<2) /* exposed to userspace */ +#define MEM_GOING_ONLINE (1<<3) +#define MEM_CANCEL_ONLINE (1<<4) +#define MEM_CANCEL_OFFLINE (1<<5) -/* - * All of these states are currently kernel-internal for notifying - * kernel components and architectures. - * - * For MEM_MAPPING_INVALID, all notifier chains with priority >0 - * are called before pfn_to_page() becomes invalid. The priority=0 - * entry is reserved for the function that actually makes - * pfn_to_page() stop working. Any notifiers that want to be called - * after that should have priority <0. - */ -#define MEM_MAPPING_INVALID (1<<3) +struct memory_notify { + unsigned long start_pfn; + unsigned long nr_pages; + int status_change_nid; +}; struct notifier_block; struct mem_section; @@ -69,12 +66,18 @@ static inline int register_memory_notifier(struct notifier_block *nb) static inline void unregister_memory_notifier(struct notifier_block *nb) { } +static inline int memory_notify(unsigned long val, void *v) +{ + return 0; +} #else +extern int register_memory_notifier(struct notifier_block *nb); +extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_new_memory(struct mem_section *); extern int unregister_memory_section(struct mem_section *); extern int memory_dev_init(void); extern int remove_memory_block(unsigned long, struct mem_section *, int); - +extern int memory_notify(unsigned long val, void *v); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1833879f8438..3a47871a29d9 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -187,7 +187,24 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) unsigned long onlined_pages = 0; struct zone *zone; int need_zonelists_rebuild = 0; + int nid; + int ret; + struct memory_notify arg; + + arg.start_pfn = pfn; + arg.nr_pages = nr_pages; + arg.status_change_nid = -1; + + nid = page_to_nid(pfn_to_page(pfn)); + if (node_present_pages(nid) == 0) + arg.status_change_nid = nid; + ret = memory_notify(MEM_GOING_ONLINE, &arg); + ret = notifier_to_errno(ret); + if (ret) { + memory_notify(MEM_CANCEL_ONLINE, &arg); + return ret; + } /* * This doesn't need a lock to do pfn_to_page(). * The section can't be removed here because of the @@ -222,6 +239,10 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) build_all_zonelists(); vm_total_pages = nr_free_pagecache_pages(); writeback_set_ratelimit(); + + if (onlined_pages) + memory_notify(MEM_ONLINE, &arg); + return 0; } #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ @@ -467,8 +488,9 @@ int offline_pages(unsigned long start_pfn, { unsigned long pfn, nr_pages, expire; long offlined_pages; - int ret, drain, retry_max; + int ret, drain, retry_max, node; struct zone *zone; + struct memory_notify arg; BUG_ON(start_pfn >= end_pfn); /* at least, alignment against pageblock is necessary */ @@ -480,11 +502,27 @@ int offline_pages(unsigned long start_pfn, we assume this for now. .*/ if (!test_pages_in_a_zone(start_pfn, end_pfn)) return -EINVAL; + + zone = page_zone(pfn_to_page(start_pfn)); + node = zone_to_nid(zone); + nr_pages = end_pfn - start_pfn; + /* set above range as isolated */ ret = start_isolate_page_range(start_pfn, end_pfn); if (ret) return ret; - nr_pages = end_pfn - start_pfn; + + arg.start_pfn = start_pfn; + arg.nr_pages = nr_pages; + arg.status_change_nid = -1; + if (nr_pages >= node_present_pages(node)) + arg.status_change_nid = node; + + ret = memory_notify(MEM_GOING_OFFLINE, &arg); + ret = notifier_to_errno(ret); + if (ret) + goto failed_removal; + pfn = start_pfn; expire = jiffies + timeout; drain = 0; @@ -539,20 +577,24 @@ repeat: /* reset pagetype flags */ start_isolate_page_range(start_pfn, end_pfn); /* removal success */ - zone = page_zone(pfn_to_page(start_pfn)); zone->present_pages -= offlined_pages; zone->zone_pgdat->node_present_pages -= offlined_pages; totalram_pages -= offlined_pages; num_physpages -= offlined_pages; + vm_total_pages = nr_free_pagecache_pages(); writeback_set_ratelimit(); + + memory_notify(MEM_OFFLINE, &arg); return 0; failed_removal: printk(KERN_INFO "memory offlining %lx to %lx failed\n", start_pfn, end_pfn); + memory_notify(MEM_CANCEL_OFFLINE, &arg); /* pushback to free area */ undo_isolate_page_range(start_pfn, end_pfn); + return ret; } #else |