summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYasunori Goto <y-goto@jp.fujitsu.com>2007-10-21 16:41:36 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-22 08:13:17 -0700
commit7b78d335ac15b10bbcb0397c635d7f0d569b0270 (patch)
tree3e49006c0166ff8bcc6e11b401437fc31d875ec8
parent10020ca246c55744dad815ad4f15e1f488ca55a8 (diff)
downloadlwn-7b78d335ac15b10bbcb0397c635d7f0d569b0270.tar.gz
lwn-7b78d335ac15b10bbcb0397c635d7f0d569b0270.zip
memory hotplug: rearrange memory hotplug notifier
Current memory notifier has some defects yet. (Fortunately, nothing uses it.) This patch is to fix and rearrange for them. - Add information of start_pfn, nr_pages, and node id if node status is changes from/to memoryless node for callback functions. Callbacks can't do anything without those information. - Add notification going-online status. It is necessary for creating per node structure before the node's pages are available. - Move GOING_OFFLINE status notification after page isolation. It is good place for return memory like cache for callback, because returned page is not used again. - Make CANCEL events for rollingback when error occurs. - Delete MEM_MAPPING_INVALID notification. It will be not used. - Fix compile error of (un)register_memory_notifier(). Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/base/memory.c9
-rw-r--r--include/linux/memory.h27
-rw-r--r--mm/memory_hotplug.c48
3 files changed, 61 insertions, 23 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index c41d0728efe2..7868707c7eda 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -137,7 +137,7 @@ static ssize_t show_mem_state(struct sys_device *dev, char *buf)
return len;
}
-static inline int memory_notify(unsigned long val, void *v)
+int memory_notify(unsigned long val, void *v)
{
return blocking_notifier_call_chain(&memory_chain, val, v);
}
@@ -183,7 +183,6 @@ memory_block_action(struct memory_block *mem, unsigned long action)
break;
case MEM_OFFLINE:
mem->state = MEM_GOING_OFFLINE;
- memory_notify(MEM_GOING_OFFLINE, NULL);
start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
ret = remove_memory(start_paddr,
PAGES_PER_SECTION << PAGE_SHIFT);
@@ -191,7 +190,6 @@ memory_block_action(struct memory_block *mem, unsigned long action)
mem->state = old_state;
break;
}
- memory_notify(MEM_MAPPING_INVALID, NULL);
break;
default:
printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
@@ -199,11 +197,6 @@ memory_block_action(struct memory_block *mem, unsigned long action)
WARN_ON(1);
ret = -EINVAL;
}
- /*
- * For now, only notify on successful memory operations
- */
- if (!ret)
- memory_notify(action, NULL);
return ret;
}
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 654ef5544878..ec376e482abb 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -41,18 +41,15 @@ struct memory_block {
#define MEM_ONLINE (1<<0) /* exposed to userspace */
#define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */
#define MEM_OFFLINE (1<<2) /* exposed to userspace */
+#define MEM_GOING_ONLINE (1<<3)
+#define MEM_CANCEL_ONLINE (1<<4)
+#define MEM_CANCEL_OFFLINE (1<<5)
-/*
- * All of these states are currently kernel-internal for notifying
- * kernel components and architectures.
- *
- * For MEM_MAPPING_INVALID, all notifier chains with priority >0
- * are called before pfn_to_page() becomes invalid. The priority=0
- * entry is reserved for the function that actually makes
- * pfn_to_page() stop working. Any notifiers that want to be called
- * after that should have priority <0.
- */
-#define MEM_MAPPING_INVALID (1<<3)
+struct memory_notify {
+ unsigned long start_pfn;
+ unsigned long nr_pages;
+ int status_change_nid;
+};
struct notifier_block;
struct mem_section;
@@ -69,12 +66,18 @@ static inline int register_memory_notifier(struct notifier_block *nb)
static inline void unregister_memory_notifier(struct notifier_block *nb)
{
}
+static inline int memory_notify(unsigned long val, void *v)
+{
+ return 0;
+}
#else
+extern int register_memory_notifier(struct notifier_block *nb);
+extern void unregister_memory_notifier(struct notifier_block *nb);
extern int register_new_memory(struct mem_section *);
extern int unregister_memory_section(struct mem_section *);
extern int memory_dev_init(void);
extern int remove_memory_block(unsigned long, struct mem_section *, int);
-
+extern int memory_notify(unsigned long val, void *v);
#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1833879f8438..3a47871a29d9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -187,7 +187,24 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
unsigned long onlined_pages = 0;
struct zone *zone;
int need_zonelists_rebuild = 0;
+ int nid;
+ int ret;
+ struct memory_notify arg;
+
+ arg.start_pfn = pfn;
+ arg.nr_pages = nr_pages;
+ arg.status_change_nid = -1;
+
+ nid = page_to_nid(pfn_to_page(pfn));
+ if (node_present_pages(nid) == 0)
+ arg.status_change_nid = nid;
+ ret = memory_notify(MEM_GOING_ONLINE, &arg);
+ ret = notifier_to_errno(ret);
+ if (ret) {
+ memory_notify(MEM_CANCEL_ONLINE, &arg);
+ return ret;
+ }
/*
* This doesn't need a lock to do pfn_to_page().
* The section can't be removed here because of the
@@ -222,6 +239,10 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
build_all_zonelists();
vm_total_pages = nr_free_pagecache_pages();
writeback_set_ratelimit();
+
+ if (onlined_pages)
+ memory_notify(MEM_ONLINE, &arg);
+
return 0;
}
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
@@ -467,8 +488,9 @@ int offline_pages(unsigned long start_pfn,
{
unsigned long pfn, nr_pages, expire;
long offlined_pages;
- int ret, drain, retry_max;
+ int ret, drain, retry_max, node;
struct zone *zone;
+ struct memory_notify arg;
BUG_ON(start_pfn >= end_pfn);
/* at least, alignment against pageblock is necessary */
@@ -480,11 +502,27 @@ int offline_pages(unsigned long start_pfn,
we assume this for now. .*/
if (!test_pages_in_a_zone(start_pfn, end_pfn))
return -EINVAL;
+
+ zone = page_zone(pfn_to_page(start_pfn));
+ node = zone_to_nid(zone);
+ nr_pages = end_pfn - start_pfn;
+
/* set above range as isolated */
ret = start_isolate_page_range(start_pfn, end_pfn);
if (ret)
return ret;
- nr_pages = end_pfn - start_pfn;
+
+ arg.start_pfn = start_pfn;
+ arg.nr_pages = nr_pages;
+ arg.status_change_nid = -1;
+ if (nr_pages >= node_present_pages(node))
+ arg.status_change_nid = node;
+
+ ret = memory_notify(MEM_GOING_OFFLINE, &arg);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ goto failed_removal;
+
pfn = start_pfn;
expire = jiffies + timeout;
drain = 0;
@@ -539,20 +577,24 @@ repeat:
/* reset pagetype flags */
start_isolate_page_range(start_pfn, end_pfn);
/* removal success */
- zone = page_zone(pfn_to_page(start_pfn));
zone->present_pages -= offlined_pages;
zone->zone_pgdat->node_present_pages -= offlined_pages;
totalram_pages -= offlined_pages;
num_physpages -= offlined_pages;
+
vm_total_pages = nr_free_pagecache_pages();
writeback_set_ratelimit();
+
+ memory_notify(MEM_OFFLINE, &arg);
return 0;
failed_removal:
printk(KERN_INFO "memory offlining %lx to %lx failed\n",
start_pfn, end_pfn);
+ memory_notify(MEM_CANCEL_OFFLINE, &arg);
/* pushback to free area */
undo_isolate_page_range(start_pfn, end_pfn);
+
return ret;
}
#else