diff options
author | Paul Mackerras <paulus@samba.org> | 2007-05-08 13:37:51 +1000 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2007-05-08 13:37:51 +1000 |
commit | 02bbc0f09c90cefdb2837605c96a66c5ce4ba2e1 (patch) | |
tree | 04ef573cd4de095c500c9fc3477f4278c0b36300 /kernel/power | |
parent | 7487a2245b8841c77ba9db406cf99a483b9334e9 (diff) | |
parent | 5b94f675f57e4ff16c8fda09088d7480a84dcd91 (diff) | |
download | lwn-02bbc0f09c90cefdb2837605c96a66c5ce4ba2e1.tar.gz lwn-02bbc0f09c90cefdb2837605c96a66c5ce4ba2e1.zip |
Merge branch 'linux-2.6'
Diffstat (limited to 'kernel/power')
-rw-r--r-- | kernel/power/Kconfig | 11 | ||||
-rw-r--r-- | kernel/power/disk.c | 77 | ||||
-rw-r--r-- | kernel/power/main.c | 29 | ||||
-rw-r--r-- | kernel/power/power.h | 49 | ||||
-rw-r--r-- | kernel/power/process.c | 6 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 309 | ||||
-rw-r--r-- | kernel/power/swap.c | 60 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 139 | ||||
-rw-r--r-- | kernel/power/user.c | 39 |
9 files changed, 491 insertions, 228 deletions
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 5001c652028c..495b7d4dd330 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -78,17 +78,22 @@ config PM_SYSFS_DEPRECATED are likely to be bus or driver specific. config SOFTWARE_SUSPEND - bool "Software Suspend" + bool "Software Suspend (Hibernation)" depends on PM && SWAP && (((X86 || PPC64_SWSUSP) && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) ---help--- - Enable the suspend to disk (STD) functionality. + Enable the suspend to disk (STD) functionality, which is usually + called "hibernation" in user interfaces. STD checkpoints the + system and powers it off; and restores that checkpoint on reboot. You can suspend your machine with 'echo disk > /sys/power/state'. Alternatively, you can use the additional userland tools available from <http://suspend.sf.net>. In principle it does not require ACPI or APM, although for example - ACPI will be used if available. + ACPI will be used for the final steps when it is available. One + of the reasons to use software suspend is that the firmware hooks + for suspend states like suspend-to-RAM (STR) often don't work very + well with Linux. It creates an image which is saved in your active swap. Upon the next boot, pass the 'resume=/dev/swappartition' argument to the kernel to diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 02e4fb69111a..06331374d862 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -130,15 +130,25 @@ int pm_suspend_disk(void) { int error; + /* The snapshot device should not be opened while we're running */ + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) + return -EBUSY; + + /* Allocate memory management structures */ + error = create_basic_memory_bitmaps(); + if (error) + goto Exit; + error = prepare_processes(); if (error) - return error; + goto Finish; if (pm_disk_mode == PM_DISK_TESTPROC) { printk("swsusp debug: Waiting for 5 seconds.\n"); mdelay(5000); goto Thaw; } + /* Free memory before shutting down devices. */ error = swsusp_shrink_memory(); if (error) @@ -196,6 +206,10 @@ int pm_suspend_disk(void) resume_console(); Thaw: unprepare_processes(); + Finish: + free_basic_memory_bitmaps(); + Exit: + atomic_inc(&snapshot_device_available); return error; } @@ -239,13 +253,21 @@ static int software_resume(void) } pr_debug("PM: Checking swsusp image.\n"); - error = swsusp_check(); if (error) - goto Done; + goto Unlock; - pr_debug("PM: Preparing processes for restore.\n"); + /* The snapshot device should not be opened while we're running */ + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { + error = -EBUSY; + goto Unlock; + } + error = create_basic_memory_bitmaps(); + if (error) + goto Finish; + + pr_debug("PM: Preparing processes for restore.\n"); error = prepare_processes(); if (error) { swsusp_close(); @@ -280,7 +302,11 @@ static int software_resume(void) printk(KERN_ERR "PM: Restore failed, recovering.\n"); unprepare_processes(); Done: + free_basic_memory_bitmaps(); + Finish: + atomic_inc(&snapshot_device_available); /* For success case, the suspend path will release the lock */ + Unlock: mutex_unlock(&pm_mutex); pr_debug("PM: Resume from disk failed.\n"); return 0; @@ -322,13 +348,40 @@ static const char * const pm_disk_modes[] = { * supports it (as determined from pm_ops->pm_disk_mode). */ -static ssize_t disk_show(struct subsystem * subsys, char * buf) +static ssize_t disk_show(struct kset *kset, char *buf) { - return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]); + int i; + char *start = buf; + + for (i = PM_DISK_PLATFORM; i < PM_DISK_MAX; i++) { + if (!pm_disk_modes[i]) + continue; + switch (i) { + case PM_DISK_SHUTDOWN: + case PM_DISK_REBOOT: + case PM_DISK_TEST: + case PM_DISK_TESTPROC: + break; + default: + if (pm_ops && pm_ops->enter && + (i == pm_ops->pm_disk_mode)) + break; + /* not a valid mode, continue with loop */ + continue; + } + if (i == pm_disk_mode) + buf += sprintf(buf, "[%s]", pm_disk_modes[i]); + else + buf += sprintf(buf, "%s", pm_disk_modes[i]); + if (i+1 != PM_DISK_MAX) + buf += sprintf(buf, " "); + } + buf += sprintf(buf, "\n"); + return buf-start; } -static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) +static ssize_t disk_store(struct kset *kset, const char *buf, size_t n) { int error = 0; int i; @@ -373,13 +426,13 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) power_attr(disk); -static ssize_t resume_show(struct subsystem * subsys, char *buf) +static ssize_t resume_show(struct kset *kset, char *buf) { return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); } -static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n) +static ssize_t resume_store(struct kset *kset, const char *buf, size_t n) { unsigned int maj, min; dev_t res; @@ -405,12 +458,12 @@ static ssize_t resume_store(struct subsystem *subsys, const char *buf, size_t n) power_attr(resume); -static ssize_t image_size_show(struct subsystem * subsys, char *buf) +static ssize_t image_size_show(struct kset *kset, char *buf) { return sprintf(buf, "%lu\n", image_size); } -static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n) +static ssize_t image_size_store(struct kset *kset, const char *buf, size_t n) { unsigned long size; @@ -439,7 +492,7 @@ static struct attribute_group attr_group = { static int __init pm_disk_init(void) { - return sysfs_create_group(&power_subsys.kset.kobj,&attr_group); + return sysfs_create_group(&power_subsys.kobj, &attr_group); } core_initcall(pm_disk_init); diff --git a/kernel/power/main.c b/kernel/power/main.c index 72419a3b1beb..f6dda685e7e2 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -184,17 +184,21 @@ static void suspend_finish(suspend_state_t state) static const char * const pm_states[PM_SUSPEND_MAX] = { [PM_SUSPEND_STANDBY] = "standby", [PM_SUSPEND_MEM] = "mem", -#ifdef CONFIG_SOFTWARE_SUSPEND [PM_SUSPEND_DISK] = "disk", -#endif }; static inline int valid_state(suspend_state_t state) { /* Suspend-to-disk does not really need low-level support. - * It can work with reboot if needed. */ + * It can work with shutdown/reboot if needed. If it isn't + * configured, then it cannot be supported. + */ if (state == PM_SUSPEND_DISK) +#ifdef CONFIG_SOFTWARE_SUSPEND return 1; +#else + return 0; +#endif /* all other states need lowlevel support and need to be * valid to the lowlevel implementation, no valid callback @@ -244,15 +248,6 @@ static int enter_state(suspend_state_t state) return error; } -/* - * This is main interface to the outside world. It needs to be - * called from process context. - */ -int software_suspend(void) -{ - return enter_state(PM_SUSPEND_DISK); -} - /** * pm_suspend - Externally visible function for suspending system. @@ -285,7 +280,7 @@ decl_subsys(power,NULL,NULL); * proper enumerated value, and initiates a suspend transition. */ -static ssize_t state_show(struct subsystem * subsys, char * buf) +static ssize_t state_show(struct kset *kset, char *buf) { int i; char * s = buf; @@ -298,7 +293,7 @@ static ssize_t state_show(struct subsystem * subsys, char * buf) return (s - buf); } -static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n) +static ssize_t state_store(struct kset *kset, const char *buf, size_t n) { suspend_state_t state = PM_SUSPEND_STANDBY; const char * const *s; @@ -325,13 +320,13 @@ power_attr(state); #ifdef CONFIG_PM_TRACE int pm_trace_enabled; -static ssize_t pm_trace_show(struct subsystem * subsys, char * buf) +static ssize_t pm_trace_show(struct kset *kset, char *buf) { return sprintf(buf, "%d\n", pm_trace_enabled); } static ssize_t -pm_trace_store(struct subsystem * subsys, const char * buf, size_t n) +pm_trace_store(struct kset *kset, const char *buf, size_t n) { int val; @@ -365,7 +360,7 @@ static int __init pm_init(void) { int error = subsystem_register(&power_subsys); if (!error) - error = sysfs_create_group(&power_subsys.kset.kobj,&attr_group); + error = sysfs_create_group(&power_subsys.kobj,&attr_group); return error; } diff --git a/kernel/power/power.h b/kernel/power/power.h index eb461b816bf4..34b43542785a 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -14,8 +14,18 @@ struct swsusp_info { #ifdef CONFIG_SOFTWARE_SUSPEND -extern int pm_suspend_disk(void); +/* + * Keep some memory free so that I/O operations can succeed without paging + * [Might this be more than 4 MB?] + */ +#define PAGES_FOR_IO ((4096 * 1024) >> PAGE_SHIFT) +/* + * Keep 1 MB of memory free so that device drivers can allocate some pages in + * their .suspend() routines without breaking the suspend to disk. + */ +#define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) +extern int pm_suspend_disk(void); #else static inline int pm_suspend_disk(void) { @@ -23,6 +33,8 @@ static inline int pm_suspend_disk(void) } #endif +extern int pfn_is_nosave(unsigned long); + extern struct mutex pm_mutex; #define power_attr(_name) \ @@ -35,10 +47,7 @@ static struct subsys_attribute _name##_attr = { \ .store = _name##_store, \ } -extern struct subsystem power_subsys; - -/* References to section boundaries */ -extern const void __nosave_begin, __nosave_end; +extern struct kset power_subsys; /* Preferred image size in bytes (default 500 MB) */ extern unsigned long image_size; @@ -49,6 +58,8 @@ extern sector_t swsusp_resume_block; extern asmlinkage int swsusp_arch_suspend(void); extern asmlinkage int swsusp_arch_resume(void); +extern int create_basic_memory_bitmaps(void); +extern void free_basic_memory_bitmaps(void); extern unsigned int count_data_pages(void); /** @@ -139,30 +150,12 @@ struct resume_swap_area { #define PMOPS_ENTER 2 #define PMOPS_FINISH 3 -/** - * The bitmap is used for tracing allocated swap pages - * - * The entire bitmap consists of a number of bitmap_page - * structures linked with the help of the .next member. - * Thus each page can be allocated individually, so we only - * need to make 0-order memory allocations to create - * the bitmap. - */ - -#define BITMAP_PAGE_SIZE (PAGE_SIZE - sizeof(void *)) -#define BITMAP_PAGE_CHUNKS (BITMAP_PAGE_SIZE / sizeof(long)) -#define BITS_PER_CHUNK (sizeof(long) * 8) -#define BITMAP_PAGE_BITS (BITMAP_PAGE_CHUNKS * BITS_PER_CHUNK) - -struct bitmap_page { - unsigned long chunks[BITMAP_PAGE_CHUNKS]; - struct bitmap_page *next; -}; +/* If unset, the snapshot device cannot be open. */ +extern atomic_t snapshot_device_available; -extern void free_bitmap(struct bitmap_page *bitmap); -extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits); -extern sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap); -extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); +extern sector_t alloc_swapdev_block(int swap); +extern void free_all_swap_pages(int swap); +extern int swsusp_swap_in_use(void); extern int swsusp_check(void); extern int swsusp_shrink_memory(void); diff --git a/kernel/power/process.c b/kernel/power/process.c index 6d566bf7085c..0eb5c420e8ed 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -47,8 +47,10 @@ void refrigerator(void) recalc_sigpending(); /* We sent fake signal, clean it up */ spin_unlock_irq(¤t->sighand->siglock); - while (frozen(current)) { - current->state = TASK_UNINTERRUPTIBLE; + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (!frozen(current)) + break; schedule(); } pr_debug("%s left refrigerator\n", current->comm); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index fc53ad068128..128da11f01c2 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -21,6 +21,7 @@ #include <linux/kernel.h> #include <linux/pm.h> #include <linux/device.h> +#include <linux/init.h> #include <linux/bootmem.h> #include <linux/syscalls.h> #include <linux/console.h> @@ -34,6 +35,10 @@ #include "power.h" +static int swsusp_page_is_free(struct page *); +static void swsusp_set_page_forbidden(struct page *); +static void swsusp_unset_page_forbidden(struct page *); + /* List of PBEs needed for restoring the pages that were allocated before * the suspend and included in the suspend image, but have also been * allocated by the "resume" kernel, so their contents cannot be written @@ -67,15 +72,15 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed) res = (void *)get_zeroed_page(gfp_mask); if (safe_needed) - while (res && PageNosaveFree(virt_to_page(res))) { + while (res && swsusp_page_is_free(virt_to_page(res))) { /* The page is unsafe, mark it for swsusp_free() */ - SetPageNosave(virt_to_page(res)); + swsusp_set_page_forbidden(virt_to_page(res)); allocated_unsafe_pages++; res = (void *)get_zeroed_page(gfp_mask); } if (res) { - SetPageNosave(virt_to_page(res)); - SetPageNosaveFree(virt_to_page(res)); + swsusp_set_page_forbidden(virt_to_page(res)); + swsusp_set_page_free(virt_to_page(res)); } return res; } @@ -91,8 +96,8 @@ static struct page *alloc_image_page(gfp_t gfp_mask) page = alloc_page(gfp_mask); if (page) { - SetPageNosave(page); - SetPageNosaveFree(page); + swsusp_set_page_forbidden(page); + swsusp_set_page_free(page); } return page; } @@ -110,9 +115,9 @@ static inline void free_image_page(void *addr, int clear_nosave_free) page = virt_to_page(addr); - ClearPageNosave(page); + swsusp_unset_page_forbidden(page); if (clear_nosave_free) - ClearPageNosaveFree(page); + swsusp_unset_page_free(page); __free_page(page); } @@ -224,11 +229,6 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave) * of type unsigned long each). It also contains the pfns that * correspond to the start and end of the represented memory area and * the number of bit chunks in the block. - * - * NOTE: Memory bitmaps are used for two types of operations only: - * "set a bit" and "find the next bit set". Moreover, the searching - * is always carried out after all of the "set a bit" operations - * on given bitmap. */ #define BM_END_OF_MAP (~0UL) @@ -443,15 +443,13 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) } /** - * memory_bm_set_bit - set the bit in the bitmap @bm that corresponds + * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds * to given pfn. The cur_zone_bm member of @bm and the cur_block member * of @bm->cur_zone_bm are updated. - * - * If the bit cannot be set, the function returns -EINVAL . */ -static int -memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) +static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, + void **addr, unsigned int *bit_nr) { struct zone_bitmap *zone_bm; struct bm_block *bb; @@ -463,8 +461,8 @@ memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) /* We don't assume that the zones are sorted by pfns */ while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { zone_bm = zone_bm->next; - if (unlikely(!zone_bm)) - return -EINVAL; + + BUG_ON(!zone_bm); } bm->cur.zone_bm = zone_bm; } @@ -475,13 +473,40 @@ memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) while (pfn >= bb->end_pfn) { bb = bb->next; - if (unlikely(!bb)) - return -EINVAL; + + BUG_ON(!bb); } zone_bm->cur_block = bb; pfn -= bb->start_pfn; - set_bit(pfn % BM_BITS_PER_CHUNK, bb->data + pfn / BM_BITS_PER_CHUNK); - return 0; + *bit_nr = pfn % BM_BITS_PER_CHUNK; + *addr = bb->data + pfn / BM_BITS_PER_CHUNK; +} + +static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + + memory_bm_find_bit(bm, pfn, &addr, &bit); + set_bit(bit, addr); +} + +static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + + memory_bm_find_bit(bm, pfn, &addr, &bit); + clear_bit(bit, addr); +} + +static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) +{ + void *addr; + unsigned int bit; + + memory_bm_find_bit(bm, pfn, &addr, &bit); + return test_bit(bit, addr); } /* Two auxiliary functions for memory_bm_next_pfn */ @@ -564,6 +589,199 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) } /** + * This structure represents a range of page frames the contents of which + * should not be saved during the suspend. + */ + +struct nosave_region { + struct list_head list; + unsigned long start_pfn; + unsigned long end_pfn; +}; + +static LIST_HEAD(nosave_regions); + +/** + * register_nosave_region - register a range of page frames the contents + * of which should not be saved during the suspend (to be used in the early + * initialization code) + */ + +void __init +register_nosave_region(unsigned long start_pfn, unsigned long end_pfn) +{ + struct nosave_region *region; + + if (start_pfn >= end_pfn) + return; + + if (!list_empty(&nosave_regions)) { + /* Try to extend the previous region (they should be sorted) */ + region = list_entry(nosave_regions.prev, + struct nosave_region, list); + if (region->end_pfn == start_pfn) { + region->end_pfn = end_pfn; + goto Report; + } + } + /* This allocation cannot fail */ + region = alloc_bootmem_low(sizeof(struct nosave_region)); + region->start_pfn = start_pfn; + region->end_pfn = end_pfn; + list_add_tail(®ion->list, &nosave_regions); + Report: + printk("swsusp: Registered nosave memory region: %016lx - %016lx\n", + start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); +} + +/* + * Set bits in this map correspond to the page frames the contents of which + * should not be saved during the suspend. + */ +static struct memory_bitmap *forbidden_pages_map; + +/* Set bits in this map correspond to free page frames. */ +static struct memory_bitmap *free_pages_map; + +/* + * Each page frame allocated for creating the image is marked by setting the + * corresponding bits in forbidden_pages_map and free_pages_map simultaneously + */ + +void swsusp_set_page_free(struct page *page) +{ + if (free_pages_map) + memory_bm_set_bit(free_pages_map, page_to_pfn(page)); +} + +static int swsusp_page_is_free(struct page *page) +{ + return free_pages_map ? + memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; +} + +void swsusp_unset_page_free(struct page *page) +{ + if (free_pages_map) + memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); +} + +static void swsusp_set_page_forbidden(struct page *page) +{ + if (forbidden_pages_map) + memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); +} + +int swsusp_page_is_forbidden(struct page *page) +{ + return forbidden_pages_map ? + memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; +} + +static void swsusp_unset_page_forbidden(struct page *page) +{ + if (forbidden_pages_map) + memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); +} + +/** + * mark_nosave_pages - set bits corresponding to the page frames the + * contents of which should not be saved in a given bitmap. + */ + +static void mark_nosave_pages(struct memory_bitmap *bm) +{ + struct nosave_region *region; + + if (list_empty(&nosave_regions)) + return; + + list_for_each_entry(region, &nosave_regions, list) { + unsigned long pfn; + + printk("swsusp: Marking nosave pages: %016lx - %016lx\n", + region->start_pfn << PAGE_SHIFT, + region->end_pfn << PAGE_SHIFT); + + for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) + memory_bm_set_bit(bm, pfn); + } +} + +/** + * create_basic_memory_bitmaps - create bitmaps needed for marking page + * frames that should not be saved and free page frames. The pointers + * forbidden_pages_map and free_pages_map are only modified if everything + * goes well, because we don't want the bits to be used before both bitmaps + * are set up. + */ + +int create_basic_memory_bitmaps(void) +{ + struct memory_bitmap *bm1, *bm2; + int error = 0; + + BUG_ON(forbidden_pages_map || free_pages_map); + + bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); + if (!bm1) + return -ENOMEM; + + error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); + if (error) + goto Free_first_object; + + bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); + if (!bm2) + goto Free_first_bitmap; + + error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); + if (error) + goto Free_second_object; + + forbidden_pages_map = bm1; + free_pages_map = bm2; + mark_nosave_pages(forbidden_pages_map); + + printk("swsusp: Basic memory bitmaps created\n"); + + return 0; + + Free_second_object: + kfree(bm2); + Free_first_bitmap: + memory_bm_free(bm1, PG_UNSAFE_CLEAR); + Free_first_object: + kfree(bm1); + return -ENOMEM; +} + +/** + * free_basic_memory_bitmaps - free memory bitmaps allocated by + * create_basic_memory_bitmaps(). The auxiliary pointers are necessary + * so that the bitmaps themselves are not referred to while they are being + * freed. + */ + +void free_basic_memory_bitmaps(void) +{ + struct memory_bitmap *bm1, *bm2; + + BUG_ON(!(forbidden_pages_map && free_pages_map)); + + bm1 = forbidden_pages_map; + bm2 = free_pages_map; + forbidden_pages_map = NULL; + free_pages_map = NULL; + memory_bm_free(bm1, PG_UNSAFE_CLEAR); + kfree(bm1); + memory_bm_free(bm2, PG_UNSAFE_CLEAR); + kfree(bm2); + + printk("swsusp: Basic memory bitmaps freed\n"); +} + +/** * snapshot_additional_pages - estimate the number of additional pages * be needed for setting up the suspend image data structures for given * zone (usually the returned value is greater than the exact number) @@ -615,7 +833,8 @@ static struct page *saveable_highmem_page(unsigned long pfn) BUG_ON(!PageHighMem(page)); - if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page)) + if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) || + PageReserved(page)) return NULL; return page; @@ -651,17 +870,6 @@ static inline unsigned int count_highmem_pages(void) { return 0; } #endif /* CONFIG_HIGHMEM */ /** - * pfn_is_nosave - check if given pfn is in the 'nosave' section - */ - -static inline int pfn_is_nosave(unsigned long pfn) -{ - unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; - unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; - return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); -} - -/** * saveable - Determine whether a non-highmem page should be included in * the suspend image. * @@ -681,7 +889,7 @@ static struct page *saveable_page(unsigned long pfn) BUG_ON(PageHighMem(page)); - if (PageNosave(page) || PageNosaveFree(page)) + if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) return NULL; if (PageReserved(page) && pfn_is_nosave(pfn)) @@ -821,9 +1029,10 @@ void swsusp_free(void) if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); - if (PageNosave(page) && PageNosaveFree(page)) { - ClearPageNosave(page); - ClearPageNosaveFree(page); + if (swsusp_page_is_forbidden(page) && + swsusp_page_is_free(page)) { + swsusp_unset_page_forbidden(page); + swsusp_unset_page_free(page); __free_page(page); } } @@ -1146,7 +1355,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) - ClearPageNosaveFree(pfn_to_page(pfn)); + swsusp_unset_page_free(pfn_to_page(pfn)); } /* Mark pages that correspond to the "original" pfns as "unsafe" */ @@ -1155,7 +1364,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) pfn = memory_bm_next_pfn(bm); if (likely(pfn != BM_END_OF_MAP)) { if (likely(pfn_valid(pfn))) - SetPageNosaveFree(pfn_to_page(pfn)); + swsusp_set_page_free(pfn_to_page(pfn)); else return -EFAULT; } @@ -1321,14 +1530,14 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) struct page *page; page = alloc_page(__GFP_HIGHMEM); - if (!PageNosaveFree(page)) { + if (!swsusp_page_is_free(page)) { /* The page is "safe", set its bit the bitmap */ memory_bm_set_bit(bm, page_to_pfn(page)); safe_highmem_pages++; } /* Mark the page as allocated */ - SetPageNosave(page); - SetPageNosaveFree(page); + swsusp_set_page_forbidden(page); + swsusp_set_page_free(page); } memory_bm_position_reset(bm); safe_highmem_bm = bm; @@ -1360,7 +1569,7 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) struct highmem_pbe *pbe; void *kaddr; - if (PageNosave(page) && PageNosaveFree(page)) { + if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { /* We have allocated the "original" page frame and we can * use it directly to store the loaded page. */ @@ -1522,14 +1731,14 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) error = -ENOMEM; goto Free; } - if (!PageNosaveFree(virt_to_page(lp))) { + if (!swsusp_page_is_free(virt_to_page(lp))) { /* The page is "safe", add it to the list */ lp->next = safe_pages_list; safe_pages_list = lp; } /* Mark the page as allocated */ - SetPageNosave(virt_to_page(lp)); - SetPageNosaveFree(virt_to_page(lp)); + swsusp_set_page_forbidden(virt_to_page(lp)); + swsusp_set_page_free(virt_to_page(lp)); nr_pages--; } /* Free the reserved safe pages so that chain_alloc() can use them */ @@ -1558,7 +1767,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) if (PageHighMem(page)) return get_highmem_page_buffer(page, ca); - if (PageNosave(page) && PageNosaveFree(page)) + if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) /* We have allocated the "original" page frame and we can * use it directly to store the loaded page. */ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 3581f8f86acd..e83ed9945a80 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -33,12 +33,14 @@ extern char resume_file[]; #define SWSUSP_SIG "S1SUSPEND" -static struct swsusp_header { +struct swsusp_header { char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; sector_t image; char orig_sig[10]; char sig[10]; -} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; +} __attribute__((packed)); + +static struct swsusp_header *swsusp_header; /* * General things @@ -141,14 +143,14 @@ static int mark_swapfiles(sector_t start) { int error; - bio_read_page(swsusp_resume_block, &swsusp_header, NULL); - if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || - !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { - memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); - memcpy(swsusp_header.sig,SWSUSP_SIG, 10); - swsusp_header.image = start; + bio_read_page(swsusp_resume_block, swsusp_header, NULL); + if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || + !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { + memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); + memcpy(swsusp_header->sig,SWSUSP_SIG, 10); + swsusp_header->image = start; error = bio_write_page(swsusp_resume_block, - &swsusp_header, NULL); + swsusp_header, NULL); } else { printk(KERN_ERR "swsusp: Swap header not found!\n"); error = -ENODEV; @@ -241,7 +243,6 @@ struct swap_map_page { struct swap_map_handle { struct swap_map_page *cur; sector_t cur_swap; - struct bitmap_page *bitmap; unsigned int k; }; @@ -250,9 +251,6 @@ static void release_swap_writer(struct swap_map_handle *handle) if (handle->cur) free_page((unsigned long)handle->cur); handle->cur = NULL; - if (handle->bitmap) - free_bitmap(handle->bitmap); - handle->bitmap = NULL; } static int get_swap_writer(struct swap_map_handle *handle) @@ -260,12 +258,7 @@ static int get_swap_writer(struct swap_map_handle *handle) handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); if (!handle->cur) return -ENOMEM; - handle->bitmap = alloc_bitmap(count_swap_pages(root_swap, 0)); - if (!handle->bitmap) { - release_swap_writer(handle); - return -ENOMEM; - } - handle->cur_swap = alloc_swapdev_block(root_swap, handle->bitmap); + handle->cur_swap = alloc_swapdev_block(root_swap); if (!handle->cur_swap) { release_swap_writer(handle); return -ENOSPC; @@ -282,7 +275,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, if (!handle->cur) return -EINVAL; - offset = alloc_swapdev_block(root_swap, handle->bitmap); + offset = alloc_swapdev_block(root_swap); error = write_page(buf, offset, bio_chain); if (error) return error; @@ -291,7 +284,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, error = wait_on_bio_chain(bio_chain); if (error) goto out; - offset = alloc_swapdev_block(root_swap, handle->bitmap); + offset = alloc_swapdev_block(root_swap); if (!offset) return -ENOSPC; handle->cur->next_swap = offset; @@ -428,7 +421,8 @@ int swsusp_write(void) } } if (error) - free_all_swap_pages(root_swap, handle.bitmap); + free_all_swap_pages(root_swap); + release_swap_writer(&handle); out: swsusp_close(); @@ -564,7 +558,7 @@ int swsusp_read(void) if (error < PAGE_SIZE) return error < 0 ? error : -EFAULT; header = (struct swsusp_info *)data_of(snapshot); - error = get_swap_reader(&handle, swsusp_header.image); + error = get_swap_reader(&handle, swsusp_header->image); if (!error) error = swap_read_page(&handle, header, NULL); if (!error) @@ -591,17 +585,17 @@ int swsusp_check(void) resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); if (!IS_ERR(resume_bdev)) { set_blocksize(resume_bdev, PAGE_SIZE); - memset(&swsusp_header, 0, sizeof(swsusp_header)); + memset(swsusp_header, 0, sizeof(PAGE_SIZE)); error = bio_read_page(swsusp_resume_block, - &swsusp_header, NULL); + swsusp_header, NULL); if (error) return error; - if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { - memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); + if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) { + memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); /* Reset swap signature now */ error = bio_write_page(swsusp_resume_block, - &swsusp_header, NULL); + swsusp_header, NULL); } else { return -EINVAL; } @@ -632,3 +626,13 @@ void swsusp_close(void) blkdev_put(resume_bdev); } + +static int swsusp_header_init(void) +{ + swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL); + if (!swsusp_header) + panic("Could not allocate memory for swsusp_header\n"); + return 0; +} + +core_initcall(swsusp_header_init); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 175370824f37..5da304c8f1f6 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -50,6 +50,7 @@ #include <linux/syscalls.h> #include <linux/highmem.h> #include <linux/time.h> +#include <linux/rbtree.h> #include "power.h" @@ -74,72 +75,69 @@ static inline unsigned int count_highmem_pages(void) { return 0; } /** * The following functions are used for tracing the allocated * swap pages, so that they can be freed in case of an error. - * - * The functions operate on a linked bitmap structure defined - * in power.h */ -void free_bitmap(struct bitmap_page *bitmap) -{ - struct bitmap_page *bp; +struct swsusp_extent { + struct rb_node node; + unsigned long start; + unsigned long end; +}; - while (bitmap) { - bp = bitmap->next; - free_page((unsigned long)bitmap); - bitmap = bp; - } -} +static struct rb_root swsusp_extents = RB_ROOT; -struct bitmap_page *alloc_bitmap(unsigned int nr_bits) +static int swsusp_extents_insert(unsigned long swap_offset) { - struct bitmap_page *bitmap, *bp; - unsigned int n; - - if (!nr_bits) - return NULL; - - bitmap = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL); - bp = bitmap; - for (n = BITMAP_PAGE_BITS; n < nr_bits; n += BITMAP_PAGE_BITS) { - bp->next = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL); - bp = bp->next; - if (!bp) { - free_bitmap(bitmap); - return NULL; + struct rb_node **new = &(swsusp_extents.rb_node); + struct rb_node *parent = NULL; + struct swsusp_extent *ext; + + /* Figure out where to put the new node */ + while (*new) { + ext = container_of(*new, struct swsusp_extent, node); + parent = *new; + if (swap_offset < ext->start) { + /* Try to merge */ + if (swap_offset == ext->start - 1) { + ext->start--; + return 0; + } + new = &((*new)->rb_left); + } else if (swap_offset > ext->end) { + /* Try to merge */ + if (swap_offset == ext->end + 1) { + ext->end++; + return 0; + } + new = &((*new)->rb_right); + } else { + /* It already is in the tree */ + return -EINVAL; } } - return bitmap; -} - -static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit) -{ - unsigned int n; - - n = BITMAP_PAGE_BITS; - while (bitmap && n <= bit) { - n += BITMAP_PAGE_BITS; - bitmap = bitmap->next; - } - if (!bitmap) - return -EINVAL; - n -= BITMAP_PAGE_BITS; - bit -= n; - n = 0; - while (bit >= BITS_PER_CHUNK) { - bit -= BITS_PER_CHUNK; - n++; - } - bitmap->chunks[n] |= (1UL << bit); + /* Add the new node and rebalance the tree. */ + ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL); + if (!ext) + return -ENOMEM; + + ext->start = swap_offset; + ext->end = swap_offset; + rb_link_node(&ext->node, parent, new); + rb_insert_color(&ext->node, &swsusp_extents); return 0; } -sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) +/** + * alloc_swapdev_block - allocate a swap page and register that it has + * been allocated, so that it can be freed in case of an error. + */ + +sector_t alloc_swapdev_block(int swap) { unsigned long offset; offset = swp_offset(get_swap_page_of_type(swap)); if (offset) { - if (bitmap_set(bitmap, offset)) + if (swsusp_extents_insert(offset)) swap_free(swp_entry(swap, offset)); else return swapdev_block(swap, offset); @@ -147,23 +145,34 @@ sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) return 0; } -void free_all_swap_pages(int swap, struct bitmap_page *bitmap) +/** + * free_all_swap_pages - free swap pages allocated for saving image data. + * It also frees the extents used to register which swap entres had been + * allocated. + */ + +void free_all_swap_pages(int swap) { - unsigned int bit, n; - unsigned long test; - - bit = 0; - while (bitmap) { - for (n = 0; n < BITMAP_PAGE_CHUNKS; n++) - for (test = 1UL; test; test <<= 1) { - if (bitmap->chunks[n] & test) - swap_free(swp_entry(swap, bit)); - bit++; - } - bitmap = bitmap->next; + struct rb_node *node; + + while ((node = swsusp_extents.rb_node)) { + struct swsusp_extent *ext; + unsigned long offset; + + ext = container_of(node, struct swsusp_extent, node); + rb_erase(node, &swsusp_extents); + for (offset = ext->start; offset <= ext->end; offset++) + swap_free(swp_entry(swap, offset)); + + kfree(ext); } } +int swsusp_swap_in_use(void) +{ + return (swsusp_extents.rb_node != NULL); +} + /** * swsusp_show_speed - print the time elapsed between two events represented by * @start and @stop @@ -224,7 +233,7 @@ int swsusp_shrink_memory(void) long size, highmem_size; highmem_size = count_highmem_pages(); - size = count_data_pages() + PAGES_FOR_IO; + size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; tmp = size; size += highmem_size; for_each_zone (zone) diff --git a/kernel/power/user.c b/kernel/power/user.c index 7cf6713b2325..040560d9c312 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -33,25 +33,29 @@ static struct snapshot_data { struct snapshot_handle handle; int swap; - struct bitmap_page *bitmap; int mode; char frozen; char ready; char platform_suspend; } snapshot_state; -static atomic_t device_available = ATOMIC_INIT(1); +atomic_t snapshot_device_available = ATOMIC_INIT(1); static int snapshot_open(struct inode *inode, struct file *filp) { struct snapshot_data *data; - if (!atomic_add_unless(&device_available, -1, 0)) + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) return -EBUSY; - if ((filp->f_flags & O_ACCMODE) == O_RDWR) + if ((filp->f_flags & O_ACCMODE) == O_RDWR) { + atomic_inc(&snapshot_device_available); return -ENOSYS; - + } + if(create_basic_memory_bitmaps()) { + atomic_inc(&snapshot_device_available); + return -ENOMEM; + } nonseekable_open(inode, filp); data = &snapshot_state; filp->private_data = data; @@ -64,7 +68,6 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = -1; data->mode = O_WRONLY; } - data->bitmap = NULL; data->frozen = 0; data->ready = 0; data->platform_suspend = 0; @@ -77,16 +80,15 @@ static int snapshot_release(struct inode *inode, struct file *filp) struct snapshot_data *data; swsusp_free(); + free_basic_memory_bitmaps(); data = filp->private_data; - free_all_swap_pages(data->swap, data->bitmap); - free_bitmap(data->bitmap); + free_all_swap_pages(data->swap); if (data->frozen) { mutex_lock(&pm_mutex); thaw_processes(); - enable_nonboot_cpus(); mutex_unlock(&pm_mutex); } - atomic_inc(&device_available); + atomic_inc(&snapshot_device_available); return 0; } @@ -294,14 +296,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -ENODEV; break; } - if (!data->bitmap) { - data->bitmap = alloc_bitmap(count_swap_pages(data->swap, 0)); - if (!data->bitmap) { - error = -ENOMEM; - break; - } - } - offset = alloc_swapdev_block(data->swap, data->bitmap); + offset = alloc_swapdev_block(data->swap); if (offset) { offset <<= PAGE_SHIFT; error = put_user(offset, (sector_t __user *)arg); @@ -315,13 +310,11 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, error = -ENODEV; break; } - free_all_swap_pages(data->swap, data->bitmap); - free_bitmap(data->bitmap); - data->bitmap = NULL; + free_all_swap_pages(data->swap); break; case SNAPSHOT_SET_SWAP_FILE: - if (!data->bitmap) { + if (!swsusp_swap_in_use()) { /* * User space encodes device types as two-byte values, * so we need to recode them @@ -420,7 +413,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; case SNAPSHOT_SET_SWAP_AREA: - if (data->bitmap) { + if (swsusp_swap_in_use()) { error = -EPERM; } else { struct resume_swap_area swap_area; |