diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /kernel/power | |
download | lwn-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.gz lwn-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'kernel/power')
-rw-r--r-- | kernel/power/Kconfig | 74 | ||||
-rw-r--r-- | kernel/power/Makefile | 11 | ||||
-rw-r--r-- | kernel/power/console.c | 58 | ||||
-rw-r--r-- | kernel/power/disk.c | 431 | ||||
-rw-r--r-- | kernel/power/main.c | 269 | ||||
-rw-r--r-- | kernel/power/pm.c | 265 | ||||
-rw-r--r-- | kernel/power/power.h | 52 | ||||
-rw-r--r-- | kernel/power/poweroff.c | 45 | ||||
-rw-r--r-- | kernel/power/process.c | 121 | ||||
-rw-r--r-- | kernel/power/smp.c | 85 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 1433 |
11 files changed, 2844 insertions, 0 deletions
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig new file mode 100644 index 000000000000..696387ffe49c --- /dev/null +++ b/kernel/power/Kconfig @@ -0,0 +1,74 @@ +config PM + bool "Power Management support" + ---help--- + "Power Management" means that parts of your computer are shut + off or put into a power conserving "sleep" mode if they are not + being used. There are two competing standards for doing this: APM + and ACPI. If you want to use either one, say Y here and then also + to the requisite support below. + + Power Management is most important for battery powered laptop + computers; if you have a laptop, check out the Linux Laptop home + page on the WWW at <http://www.linux-on-laptops.com/> or + Tuxmobil - Linux on Mobile Computers at <http://www.tuxmobil.org/> + and the Battery Powered Linux mini-HOWTO, available from + <http://www.tldp.org/docs.html#howto>. + + Note that, even if you say N here, Linux on the x86 architecture + will issue the hlt instruction if nothing is to be done, thereby + sending the processor to sleep and saving power. + +config PM_DEBUG + bool "Power Management Debug Support" + depends on PM + ---help--- + This option enables verbose debugging support in the Power Management + code. This is helpful when debugging and reporting various PM bugs, + like suspend support. + +config SOFTWARE_SUSPEND + bool "Software Suspend (EXPERIMENTAL)" + depends on EXPERIMENTAL && PM && SWAP + ---help--- + Enable the possibility of suspending the machine. + It doesn't need APM. + You may suspend your machine by 'swsusp' or 'shutdown -z <time>' + (patch for sysvinit needed). + + It creates an image which is saved in your active swap. Upon next + boot, pass the 'resume=/dev/swappartition' argument to the kernel to + have it detect the saved image, restore memory state from it, and + continue to run as before. If you do not want the previous state to + be reloaded, then use the 'noresume' kernel argument. However, note + that your partitions will be fsck'd and you must re-mkswap your swap + partitions. It does not work with swap files. + + Right now you may boot without resuming and then later resume but + in meantime you cannot use those swap partitions/files which were + involved in suspending. Also in this case there is a risk that buffers + on disk won't match with saved ones. + + For more information take a look at <file:Documentation/power/swsusp.txt>. + +config PM_STD_PARTITION + string "Default resume partition" + depends on SOFTWARE_SUSPEND + default "" + ---help--- + The default resume partition is the partition that the suspend- + to-disk implementation will look for a suspended disk image. + + The partition specified here will be different for almost every user. + It should be a valid swap partition (at least for now) that is turned + on before suspending. + + The partition specified can be overridden by specifying: + + resume=/dev/<other device> + + which will set the resume partition to the device specified. + + Note there is currently not a way to specify which device to save the + suspended image to. It will simply pick the first available swap + device. + diff --git a/kernel/power/Makefile b/kernel/power/Makefile new file mode 100644 index 000000000000..fbdc634135a7 --- /dev/null +++ b/kernel/power/Makefile @@ -0,0 +1,11 @@ + +ifeq ($(CONFIG_PM_DEBUG),y) +EXTRA_CFLAGS += -DDEBUG +endif + +swsusp-smp-$(CONFIG_SMP) += smp.o + +obj-y := main.o process.o console.o pm.o +obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y) disk.o + +obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/console.c b/kernel/power/console.c new file mode 100644 index 000000000000..7ff375e7c95f --- /dev/null +++ b/kernel/power/console.c @@ -0,0 +1,58 @@ +/* + * drivers/power/process.c - Functions for saving/restoring console. + * + * Originally from swsusp. + */ + +#include <linux/vt_kern.h> +#include <linux/kbd_kern.h> +#include <linux/console.h> +#include "power.h" + +static int new_loglevel = 10; +static int orig_loglevel; +#ifdef SUSPEND_CONSOLE +static int orig_fgconsole, orig_kmsg; +#endif + +int pm_prepare_console(void) +{ + orig_loglevel = console_loglevel; + console_loglevel = new_loglevel; + +#ifdef SUSPEND_CONSOLE + acquire_console_sem(); + + orig_fgconsole = fg_console; + + if (vc_allocate(SUSPEND_CONSOLE)) { + /* we can't have a free VC for now. Too bad, + * we don't want to mess the screen for now. */ + release_console_sem(); + return 1; + } + + set_console(SUSPEND_CONSOLE); + release_console_sem(); + + if (vt_waitactive(SUSPEND_CONSOLE)) { + pr_debug("Suspend: Can't switch VCs."); + return 1; + } + orig_kmsg = kmsg_redirect; + kmsg_redirect = SUSPEND_CONSOLE; +#endif + return 0; +} + +void pm_restore_console(void) +{ + console_loglevel = orig_loglevel; +#ifdef SUSPEND_CONSOLE + acquire_console_sem(); + set_console(orig_fgconsole); + release_console_sem(); + kmsg_redirect = orig_kmsg; +#endif + return; +} diff --git a/kernel/power/disk.c b/kernel/power/disk.c new file mode 100644 index 000000000000..02b6764034dc --- /dev/null +++ b/kernel/power/disk.c @@ -0,0 +1,431 @@ +/* + * kernel/power/disk.c - Suspend-to-disk support. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * Copyright (c) 2004 Pavel Machek <pavel@suse.cz> + * + * This file is released under the GPLv2. + * + */ + +#include <linux/suspend.h> +#include <linux/syscalls.h> +#include <linux/reboot.h> +#include <linux/string.h> +#include <linux/device.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include "power.h" + + +extern suspend_disk_method_t pm_disk_mode; +extern struct pm_ops * pm_ops; + +extern int swsusp_suspend(void); +extern int swsusp_write(void); +extern int swsusp_check(void); +extern int swsusp_read(void); +extern void swsusp_close(void); +extern int swsusp_resume(void); +extern int swsusp_free(void); + + +static int noresume = 0; +char resume_file[256] = CONFIG_PM_STD_PARTITION; +dev_t swsusp_resume_device; + +/** + * power_down - Shut machine down for hibernate. + * @mode: Suspend-to-disk mode + * + * Use the platform driver, if configured so, and return gracefully if it + * fails. + * Otherwise, try to power off and reboot. If they fail, halt the machine, + * there ain't no turning back. + */ + +static void power_down(suspend_disk_method_t mode) +{ + unsigned long flags; + int error = 0; + + local_irq_save(flags); + switch(mode) { + case PM_DISK_PLATFORM: + device_shutdown(); + error = pm_ops->enter(PM_SUSPEND_DISK); + break; + case PM_DISK_SHUTDOWN: + printk("Powering off system\n"); + device_shutdown(); + machine_power_off(); + break; + case PM_DISK_REBOOT: + device_shutdown(); + machine_restart(NULL); + break; + } + machine_halt(); + /* Valid image is on the disk, if we continue we risk serious data corruption + after resume. */ + printk(KERN_CRIT "Please power me down manually\n"); + while(1); +} + + +static int in_suspend __nosavedata = 0; + + +/** + * free_some_memory - Try to free as much memory as possible + * + * ... but do not OOM-kill anyone + * + * Notice: all userland should be stopped at this point, or + * livelock is possible. + */ + +static void free_some_memory(void) +{ + unsigned int i = 0; + unsigned int tmp; + unsigned long pages = 0; + char *p = "-\\|/"; + + printk("Freeing memory... "); + while ((tmp = shrink_all_memory(10000))) { + pages += tmp; + printk("\b%c", p[i]); + i++; + if (i > 3) + i = 0; + } + printk("\bdone (%li pages freed)\n", pages); +} + + +static inline void platform_finish(void) +{ + if (pm_disk_mode == PM_DISK_PLATFORM) { + if (pm_ops && pm_ops->finish) + pm_ops->finish(PM_SUSPEND_DISK); + } +} + +static void finish(void) +{ + device_resume(); + platform_finish(); + enable_nonboot_cpus(); + thaw_processes(); + pm_restore_console(); +} + + +static int prepare_processes(void) +{ + int error; + + pm_prepare_console(); + + sys_sync(); + + if (freeze_processes()) { + error = -EBUSY; + return error; + } + + if (pm_disk_mode == PM_DISK_PLATFORM) { + if (pm_ops && pm_ops->prepare) { + if ((error = pm_ops->prepare(PM_SUSPEND_DISK))) + return error; + } + } + + /* Free memory before shutting down devices. */ + free_some_memory(); + + return 0; +} + +static void unprepare_processes(void) +{ + enable_nonboot_cpus(); + thaw_processes(); + pm_restore_console(); +} + +static int prepare_devices(void) +{ + int error; + + disable_nonboot_cpus(); + if ((error = device_suspend(PMSG_FREEZE))) { + printk("Some devices failed to suspend\n"); + platform_finish(); + enable_nonboot_cpus(); + return error; + } + + return 0; +} + +/** + * pm_suspend_disk - The granpappy of power management. + * + * If we're going through the firmware, then get it over with quickly. + * + * If not, then call swsusp to do its thing, then figure out how + * to power down the system. + */ + +int pm_suspend_disk(void) +{ + int error; + + error = prepare_processes(); + if (!error) { + error = prepare_devices(); + } + + if (error) { + unprepare_processes(); + return error; + } + + pr_debug("PM: Attempting to suspend to disk.\n"); + if (pm_disk_mode == PM_DISK_FIRMWARE) + return pm_ops->enter(PM_SUSPEND_DISK); + + pr_debug("PM: snapshotting memory.\n"); + in_suspend = 1; + if ((error = swsusp_suspend())) + goto Done; + + if (in_suspend) { + pr_debug("PM: writing image.\n"); + error = swsusp_write(); + if (!error) + power_down(pm_disk_mode); + } else + pr_debug("PM: Image restored successfully.\n"); + swsusp_free(); + Done: + finish(); + return error; +} + + +/** + * software_resume - Resume from a saved image. + * + * Called as a late_initcall (so all devices are discovered and + * initialized), we call swsusp to see if we have a saved image or not. + * If so, we quiesce devices, the restore the saved image. We will + * return above (in pm_suspend_disk() ) if everything goes well. + * Otherwise, we fail gracefully and return to the normally + * scheduled program. + * + */ + +static int software_resume(void) +{ + int error; + + if (noresume) { + /** + * FIXME: If noresume is specified, we need to find the partition + * and reset it back to normal swap space. + */ + return 0; + } + + pr_debug("PM: Checking swsusp image.\n"); + + if ((error = swsusp_check())) + goto Done; + + pr_debug("PM: Preparing processes for restore.\n"); + + if ((error = prepare_processes())) { + swsusp_close(); + goto Cleanup; + } + + pr_debug("PM: Reading swsusp image.\n"); + + if ((error = swsusp_read())) + goto Cleanup; + + pr_debug("PM: Preparing devices for restore.\n"); + + if ((error = prepare_devices())) + goto Free; + + mb(); + + pr_debug("PM: Restoring saved image.\n"); + swsusp_resume(); + pr_debug("PM: Restore failed, recovering.n"); + finish(); + Free: + swsusp_free(); + Cleanup: + unprepare_processes(); + Done: + pr_debug("PM: Resume from disk failed.\n"); + return 0; +} + +late_initcall(software_resume); + + +static char * pm_disk_modes[] = { + [PM_DISK_FIRMWARE] = "firmware", + [PM_DISK_PLATFORM] = "platform", + [PM_DISK_SHUTDOWN] = "shutdown", + [PM_DISK_REBOOT] = "reboot", +}; + +/** + * disk - Control suspend-to-disk mode + * + * Suspend-to-disk can be handled in several ways. The greatest + * distinction is who writes memory to disk - the firmware or the OS. + * If the firmware does it, we assume that it also handles suspending + * the system. + * If the OS does it, then we have three options for putting the system + * to sleep - using the platform driver (e.g. ACPI or other PM registers), + * powering off the system or rebooting the system (for testing). + * + * The system will support either 'firmware' or 'platform', and that is + * known a priori (and encoded in pm_ops). But, the user may choose + * 'shutdown' or 'reboot' as alternatives. + * + * show() will display what the mode is currently set to. + * store() will accept one of + * + * 'firmware' + * 'platform' + * 'shutdown' + * 'reboot' + * + * It will only change to 'firmware' or 'platform' if the system + * supports it (as determined from pm_ops->pm_disk_mode). + */ + +static ssize_t disk_show(struct subsystem * subsys, char * buf) +{ + return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]); +} + + +static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) +{ + int error = 0; + int i; + int len; + char *p; + suspend_disk_method_t mode = 0; + + p = memchr(buf, '\n', n); + len = p ? p - buf : n; + + down(&pm_sem); + for (i = PM_DISK_FIRMWARE; i < PM_DISK_MAX; i++) { + if (!strncmp(buf, pm_disk_modes[i], len)) { + mode = i; + break; + } + } + if (mode) { + if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT) + pm_disk_mode = mode; + else { + if (pm_ops && pm_ops->enter && + (mode == pm_ops->pm_disk_mode)) + pm_disk_mode = mode; + else + error = -EINVAL; + } + } else + error = -EINVAL; + + pr_debug("PM: suspend-to-disk mode set to '%s'\n", + pm_disk_modes[mode]); + up(&pm_sem); + return error ? error : n; +} + +power_attr(disk); + +static ssize_t resume_show(struct subsystem * subsys, char *buf) +{ + return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device), + MINOR(swsusp_resume_device)); +} + +static ssize_t resume_store(struct subsystem * subsys, const char * buf, size_t n) +{ + int len; + char *p; + unsigned int maj, min; + int error = -EINVAL; + dev_t res; + + p = memchr(buf, '\n', n); + len = p ? p - buf : n; + + if (sscanf(buf, "%u:%u", &maj, &min) == 2) { + res = MKDEV(maj,min); + if (maj == MAJOR(res) && min == MINOR(res)) { + swsusp_resume_device = res; + printk("Attempting manual resume\n"); + noresume = 0; + software_resume(); + } + } + + return error >= 0 ? n : error; +} + +power_attr(resume); + +static struct attribute * g[] = { + &disk_attr.attr, + &resume_attr.attr, + NULL, +}; + + +static struct attribute_group attr_group = { + .attrs = g, +}; + + +static int __init pm_disk_init(void) +{ + return sysfs_create_group(&power_subsys.kset.kobj,&attr_group); +} + +core_initcall(pm_disk_init); + + +static int __init resume_setup(char *str) +{ + if (noresume) + return 1; + + strncpy( resume_file, str, 255 ); + return 1; +} + +static int __init noresume_setup(char *str) +{ + noresume = 1; + return 1; +} + +__setup("noresume", noresume_setup); +__setup("resume=", resume_setup); diff --git a/kernel/power/main.c b/kernel/power/main.c new file mode 100644 index 000000000000..7960ddf04a57 --- /dev/null +++ b/kernel/power/main.c @@ -0,0 +1,269 @@ +/* + * kernel/power/main.c - PM subsystem core functionality. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * + * This file is released under the GPLv2 + * + */ + +#include <linux/suspend.h> +#include <linux/kobject.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/pm.h> + + +#include "power.h" + +DECLARE_MUTEX(pm_sem); + +struct pm_ops * pm_ops = NULL; +suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN; + +/** + * pm_set_ops - Set the global power method table. + * @ops: Pointer to ops structure. + */ + +void pm_set_ops(struct pm_ops * ops) +{ + down(&pm_sem); + pm_ops = ops; + up(&pm_sem); +} + + +/** + * suspend_prepare - Do prep work before entering low-power state. + * @state: State we're entering. + * + * This is common code that is called for each state that we're + * entering. Allocate a console, stop all processes, then make sure + * the platform can enter the requested state. + */ + +static int suspend_prepare(suspend_state_t state) +{ + int error = 0; + + if (!pm_ops || !pm_ops->enter) + return -EPERM; + + pm_prepare_console(); + + if (freeze_processes()) { + error = -EAGAIN; + goto Thaw; + } + + if (pm_ops->prepare) { + if ((error = pm_ops->prepare(state))) + goto Thaw; + } + + if ((error = device_suspend(PMSG_SUSPEND))) { + printk(KERN_ERR "Some devices failed to suspend\n"); + goto Finish; + } + return 0; + Finish: + if (pm_ops->finish) + pm_ops->finish(state); + Thaw: + thaw_processes(); + pm_restore_console(); + return error; +} + + +static int suspend_enter(suspend_state_t state) +{ + int error = 0; + unsigned long flags; + + local_irq_save(flags); + + if ((error = device_power_down(PMSG_SUSPEND))) { + printk(KERN_ERR "Some devices failed to power down\n"); + goto Done; + } + error = pm_ops->enter(state); + device_power_up(); + Done: + local_irq_restore(flags); + return error; +} + + +/** + * suspend_finish - Do final work before exiting suspend sequence. + * @state: State we're coming out of. + * + * Call platform code to clean up, restart processes, and free the + * console that we've allocated. This is not called for suspend-to-disk. + */ + +static void suspend_finish(suspend_state_t state) +{ + device_resume(); + if (pm_ops && pm_ops->finish) + pm_ops->finish(state); + thaw_processes(); + pm_restore_console(); +} + + + + +static char * pm_states[] = { + [PM_SUSPEND_STANDBY] = "standby", + [PM_SUSPEND_MEM] = "mem", + [PM_SUSPEND_DISK] = "disk", + NULL, +}; + + +/** + * enter_state - Do common work of entering low-power state. + * @state: pm_state structure for state we're entering. + * + * Make sure we're the only ones trying to enter a sleep state. Fail + * if someone has beat us to it, since we don't want anything weird to + * happen when we wake up. + * Then, do the setup for suspend, enter the state, and cleaup (after + * we've woken up). + */ + +static int enter_state(suspend_state_t state) +{ + int error; + + if (down_trylock(&pm_sem)) + return -EBUSY; + + if (state == PM_SUSPEND_DISK) { + error = pm_suspend_disk(); + goto Unlock; + } + + /* Suspend is hard to get right on SMP. */ + if (num_online_cpus() != 1) { + error = -EPERM; + goto Unlock; + } + + pr_debug("PM: Preparing system for suspend\n"); + if ((error = suspend_prepare(state))) + goto Unlock; + + pr_debug("PM: Entering state.\n"); + error = suspend_enter(state); + + pr_debug("PM: Finishing up.\n"); + suspend_finish(state); + Unlock: + up(&pm_sem); + return error; +} + +/* + * This is main interface to the outside world. It needs to be + * called from process context. + */ +int software_suspend(void) +{ + return enter_state(PM_SUSPEND_DISK); +} + + +/** + * pm_suspend - Externally visible function for suspending system. + * @state: Enumarted value of state to enter. + * + * Determine whether or not value is within range, get state + * structure, and enter (above). + */ + +int pm_suspend(suspend_state_t state) +{ + if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) + return enter_state(state); + return -EINVAL; +} + + + +decl_subsys(power,NULL,NULL); + + +/** + * state - control system power state. + * + * show() returns what states are supported, which is hard-coded to + * 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and + * 'disk' (Suspend-to-Disk). + * + * store() accepts one of those strings, translates it into the + * proper enumerated value, and initiates a suspend transition. + */ + +static ssize_t state_show(struct subsystem * subsys, char * buf) +{ + int i; + char * s = buf; + + for (i = 0; i < PM_SUSPEND_MAX; i++) { + if (pm_states[i]) + s += sprintf(s,"%s ",pm_states[i]); + } + s += sprintf(s,"\n"); + return (s - buf); +} + +static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n) +{ + suspend_state_t state = PM_SUSPEND_STANDBY; + char ** s; + char *p; + int error; + int len; + + p = memchr(buf, '\n', n); + len = p ? p - buf : n; + + for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { + if (*s && !strncmp(buf, *s, len)) + break; + } + if (*s) + error = enter_state(state); + else + error = -EINVAL; + return error ? error : n; +} + +power_attr(state); + +static struct attribute * g[] = { + &state_attr.attr, + NULL, +}; + +static struct attribute_group attr_group = { + .attrs = g, +}; + + +static int __init pm_init(void) +{ + int error = subsystem_register(&power_subsys); + if (!error) + error = sysfs_create_group(&power_subsys.kset.kobj,&attr_group); + return error; +} + +core_initcall(pm_init); diff --git a/kernel/power/pm.c b/kernel/power/pm.c new file mode 100644 index 000000000000..61deda04e39e --- /dev/null +++ b/kernel/power/pm.c @@ -0,0 +1,265 @@ +/* + * pm.c - Power management interface + * + * Copyright (C) 2000 Andrew Henroid + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/pm.h> +#include <linux/interrupt.h> + +int pm_active; + +/* + * Locking notes: + * pm_devs_lock can be a semaphore providing pm ops are not called + * from an interrupt handler (already a bad idea so no change here). Each + * change must be protected so that an unlink of an entry doesn't clash + * with a pm send - which is permitted to sleep in the current architecture + * + * Module unloads clashing with pm events now work out safely, the module + * unload path will block until the event has been sent. It may well block + * until a resume but that will be fine. + */ + +static DECLARE_MUTEX(pm_devs_lock); +static LIST_HEAD(pm_devs); + +/** + * pm_register - register a device with power management + * @type: device type + * @id: device ID + * @callback: callback function + * + * Add a device to the list of devices that wish to be notified about + * power management events. A &pm_dev structure is returned on success, + * on failure the return is %NULL. + * + * The callback function will be called in process context and + * it may sleep. + */ + +struct pm_dev *pm_register(pm_dev_t type, + unsigned long id, + pm_callback callback) +{ + struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL); + if (dev) { + memset(dev, 0, sizeof(*dev)); + dev->type = type; + dev->id = id; + dev->callback = callback; + + down(&pm_devs_lock); + list_add(&dev->entry, &pm_devs); + up(&pm_devs_lock); + } + return dev; +} + +/** + * pm_unregister - unregister a device with power management + * @dev: device to unregister + * + * Remove a device from the power management notification lists. The + * dev passed must be a handle previously returned by pm_register. + */ + +void pm_unregister(struct pm_dev *dev) +{ + if (dev) { + down(&pm_devs_lock); + list_del(&dev->entry); + up(&pm_devs_lock); + + kfree(dev); + } +} + +static void __pm_unregister(struct pm_dev *dev) +{ + if (dev) { + list_del(&dev->entry); + kfree(dev); + } +} + +/** + * pm_unregister_all - unregister all devices with matching callback + * @callback: callback function pointer + * + * Unregister every device that would call the callback passed. This + * is primarily meant as a helper function for loadable modules. It + * enables a module to give up all its managed devices without keeping + * its own private list. + */ + +void pm_unregister_all(pm_callback callback) +{ + struct list_head *entry; + + if (!callback) + return; + + down(&pm_devs_lock); + entry = pm_devs.next; + while (entry != &pm_devs) { + struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); + entry = entry->next; + if (dev->callback == callback) + __pm_unregister(dev); + } + up(&pm_devs_lock); +} + +/** + * pm_send - send request to a single device + * @dev: device to send to + * @rqst: power management request + * @data: data for the callback + * + * Issue a power management request to a given device. The + * %PM_SUSPEND and %PM_RESUME events are handled specially. The + * data field must hold the intended next state. No call is made + * if the state matches. + * + * BUGS: what stops two power management requests occurring in parallel + * and conflicting. + * + * WARNING: Calling pm_send directly is not generally recommended, in + * particular there is no locking against the pm_dev going away. The + * caller must maintain all needed locking or have 'inside knowledge' + * on the safety. Also remember that this function is not locked against + * pm_unregister. This means that you must handle SMP races on callback + * execution and unload yourself. + */ + +static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data) +{ + int status = 0; + unsigned long prev_state, next_state; + + if (in_interrupt()) + BUG(); + + switch (rqst) { + case PM_SUSPEND: + case PM_RESUME: + prev_state = dev->state; + next_state = (unsigned long) data; + if (prev_state != next_state) { + if (dev->callback) + status = (*dev->callback)(dev, rqst, data); + if (!status) { + dev->state = next_state; + dev->prev_state = prev_state; + } + } + else { + dev->prev_state = prev_state; + } + break; + default: + if (dev->callback) + status = (*dev->callback)(dev, rqst, data); + break; + } + return status; +} + +/* + * Undo incomplete request + */ +static void pm_undo_all(struct pm_dev *last) +{ + struct list_head *entry = last->entry.prev; + while (entry != &pm_devs) { + struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); + if (dev->state != dev->prev_state) { + /* previous state was zero (running) resume or + * previous state was non-zero (suspended) suspend + */ + pm_request_t undo = (dev->prev_state + ? PM_SUSPEND:PM_RESUME); + pm_send(dev, undo, (void*) dev->prev_state); + } + entry = entry->prev; + } +} + +/** + * pm_send_all - send request to all managed devices + * @rqst: power management request + * @data: data for the callback + * + * Issue a power management request to a all devices. The + * %PM_SUSPEND events are handled specially. Any device is + * permitted to fail a suspend by returning a non zero (error) + * value from its callback function. If any device vetoes a + * suspend request then all other devices that have suspended + * during the processing of this request are restored to their + * previous state. + * + * WARNING: This function takes the pm_devs_lock. The lock is not dropped until + * the callbacks have completed. This prevents races against pm locking + * functions, races against module unload pm_unregister code. It does + * mean however that you must not issue pm_ functions within the callback + * or you will deadlock and users will hate you. + * + * Zero is returned on success. If a suspend fails then the status + * from the device that vetoes the suspend is returned. + * + * BUGS: what stops two power management requests occurring in parallel + * and conflicting. + */ + +int pm_send_all(pm_request_t rqst, void *data) +{ + struct list_head *entry; + + down(&pm_devs_lock); + entry = pm_devs.next; + while (entry != &pm_devs) { + struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); + if (dev->callback) { + int status = pm_send(dev, rqst, data); + if (status) { + /* return devices to previous state on + * failed suspend request + */ + if (rqst == PM_SUSPEND) + pm_undo_all(dev); + up(&pm_devs_lock); + return status; + } + } + entry = entry->next; + } + up(&pm_devs_lock); + return 0; +} + +EXPORT_SYMBOL(pm_register); +EXPORT_SYMBOL(pm_unregister); +EXPORT_SYMBOL(pm_unregister_all); +EXPORT_SYMBOL(pm_send_all); +EXPORT_SYMBOL(pm_active); + + diff --git a/kernel/power/power.h b/kernel/power/power.h new file mode 100644 index 000000000000..cd6a3493cc0d --- /dev/null +++ b/kernel/power/power.h @@ -0,0 +1,52 @@ +#include <linux/suspend.h> +#include <linux/utsname.h> + +/* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but + we probably do not take enough locks for switching consoles, etc, + so bad things might happen. +*/ +#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) +#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) +#endif + + +struct swsusp_info { + struct new_utsname uts; + u32 version_code; + unsigned long num_physpages; + int cpus; + unsigned long image_pages; + unsigned long pagedir_pages; + suspend_pagedir_t * suspend_pagedir; + swp_entry_t pagedir[768]; +} __attribute__((aligned(PAGE_SIZE))); + + + +#ifdef CONFIG_SOFTWARE_SUSPEND +extern int pm_suspend_disk(void); + +#else +static inline int pm_suspend_disk(void) +{ + return -EPERM; +} +#endif +extern struct semaphore pm_sem; +#define power_attr(_name) \ +static struct subsys_attribute _name##_attr = { \ + .attr = { \ + .name = __stringify(_name), \ + .mode = 0644, \ + }, \ + .show = _name##_show, \ + .store = _name##_store, \ +} + +extern struct subsystem power_subsys; + +extern int freeze_processes(void); +extern void thaw_processes(void); + +extern int pm_prepare_console(void); +extern void pm_restore_console(void); diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c new file mode 100644 index 000000000000..715081b2d829 --- /dev/null +++ b/kernel/power/poweroff.c @@ -0,0 +1,45 @@ +/* + * poweroff.c - sysrq handler to gracefully power down machine. + * + * This file is released under the GPL v2 + */ + +#include <linux/kernel.h> +#include <linux/sysrq.h> +#include <linux/init.h> +#include <linux/pm.h> +#include <linux/workqueue.h> + +/* + * When the user hits Sys-Rq o to power down the machine this is the + * callback we use. + */ + +static void do_poweroff(void *dummy) +{ + if (pm_power_off) + pm_power_off(); +} + +static DECLARE_WORK(poweroff_work, do_poweroff, NULL); + +static void handle_poweroff(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) +{ + schedule_work(&poweroff_work); +} + +static struct sysrq_key_op sysrq_poweroff_op = { + .handler = handle_poweroff, + .help_msg = "powerOff", + .action_msg = "Power Off", + .enable_mask = SYSRQ_ENABLE_BOOT, +}; + +static int pm_sysrq_init(void) +{ + register_sysrq_key('o', &sysrq_poweroff_op); + return 0; +} + +subsys_initcall(pm_sysrq_init); diff --git a/kernel/power/process.c b/kernel/power/process.c new file mode 100644 index 000000000000..78d92dc6a1ed --- /dev/null +++ b/kernel/power/process.c @@ -0,0 +1,121 @@ +/* + * drivers/power/process.c - Functions for starting/stopping processes on + * suspend transitions. + * + * Originally from swsusp. + */ + + +#undef DEBUG + +#include <linux/smp_lock.h> +#include <linux/interrupt.h> +#include <linux/suspend.h> +#include <linux/module.h> + +/* + * Timeout for stopping processes + */ +#define TIMEOUT (6 * HZ) + + +static inline int freezeable(struct task_struct * p) +{ + if ((p == current) || + (p->flags & PF_NOFREEZE) || + (p->exit_state == EXIT_ZOMBIE) || + (p->exit_state == EXIT_DEAD) || + (p->state == TASK_STOPPED) || + (p->state == TASK_TRACED)) + return 0; + return 1; +} + +/* Refrigerator is place where frozen processes are stored :-). */ +void refrigerator(unsigned long flag) +{ + /* Hmm, should we be allowed to suspend when there are realtime + processes around? */ + long save; + save = current->state; + current->state = TASK_UNINTERRUPTIBLE; + pr_debug("%s entered refrigerator\n", current->comm); + printk("="); + current->flags &= ~PF_FREEZE; + + spin_lock_irq(¤t->sighand->siglock); + recalc_sigpending(); /* We sent fake signal, clean it up */ + spin_unlock_irq(¤t->sighand->siglock); + + current->flags |= PF_FROZEN; + while (current->flags & PF_FROZEN) + schedule(); + pr_debug("%s left refrigerator\n", current->comm); + current->state = save; +} + +/* 0 = success, else # of processes that we failed to stop */ +int freeze_processes(void) +{ + int todo; + unsigned long start_time; + struct task_struct *g, *p; + + printk( "Stopping tasks: " ); + start_time = jiffies; + do { + todo = 0; + read_lock(&tasklist_lock); + do_each_thread(g, p) { + unsigned long flags; + if (!freezeable(p)) + continue; + if ((p->flags & PF_FROZEN) || + (p->state == TASK_TRACED) || + (p->state == TASK_STOPPED)) + continue; + + /* FIXME: smp problem here: we may not access other process' flags + without locking */ + p->flags |= PF_FREEZE; + spin_lock_irqsave(&p->sighand->siglock, flags); + signal_wake_up(p, 0); + spin_unlock_irqrestore(&p->sighand->siglock, flags); + todo++; + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + yield(); /* Yield is okay here */ + if (time_after(jiffies, start_time + TIMEOUT)) { + printk( "\n" ); + printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo ); + return todo; + } + } while(todo); + + printk( "|\n" ); + BUG_ON(in_atomic()); + return 0; +} + +void thaw_processes(void) +{ + struct task_struct *g, *p; + + printk( "Restarting tasks..." ); + read_lock(&tasklist_lock); + do_each_thread(g, p) { + if (!freezeable(p)) + continue; + if (p->flags & PF_FROZEN) { + p->flags &= ~PF_FROZEN; + wake_up_process(p); + } else + printk(KERN_INFO " Strange, %s not stopped\n", p->comm ); + } while_each_thread(g, p); + + read_unlock(&tasklist_lock); + schedule(); + printk( " done\n" ); +} + +EXPORT_SYMBOL(refrigerator); diff --git a/kernel/power/smp.c b/kernel/power/smp.c new file mode 100644 index 000000000000..7fa7f6e2b7fb --- /dev/null +++ b/kernel/power/smp.c @@ -0,0 +1,85 @@ +/* + * drivers/power/smp.c - Functions for stopping other CPUs. + * + * Copyright 2004 Pavel Machek <pavel@suse.cz> + * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz> + * + * This file is released under the GPLv2. + */ + +#undef DEBUG + +#include <linux/smp_lock.h> +#include <linux/interrupt.h> +#include <linux/suspend.h> +#include <linux/module.h> +#include <asm/atomic.h> +#include <asm/tlbflush.h> + +static atomic_t cpu_counter, freeze; + + +static void smp_pause(void * data) +{ + struct saved_context ctxt; + __save_processor_state(&ctxt); + printk("Sleeping in:\n"); + dump_stack(); + atomic_inc(&cpu_counter); + while (atomic_read(&freeze)) { + /* FIXME: restore takes place at random piece inside this. + This should probably be written in assembly, and + preserve general-purpose registers, too + + What about stack? We may need to move to new stack here. + + This should better be ran with interrupts disabled. + */ + cpu_relax(); + barrier(); + } + atomic_dec(&cpu_counter); + __restore_processor_state(&ctxt); +} + +static cpumask_t oldmask; + +void disable_nonboot_cpus(void) +{ + printk("Freezing CPUs (at %d)", smp_processor_id()); + oldmask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(0)); + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(HZ); + printk("..."); + BUG_ON(smp_processor_id() != 0); + + /* FIXME: for this to work, all the CPUs must be running + * "idle" thread (or we deadlock). Is that guaranteed? */ + + atomic_set(&cpu_counter, 0); + atomic_set(&freeze, 1); + smp_call_function(smp_pause, NULL, 0, 0); + while (atomic_read(&cpu_counter) < (num_online_cpus() - 1)) { + cpu_relax(); + barrier(); + } + printk("ok\n"); +} + +void enable_nonboot_cpus(void) +{ + printk("Restarting CPUs"); + atomic_set(&freeze, 0); + while (atomic_read(&cpu_counter)) { + cpu_relax(); + barrier(); + } + printk("..."); + set_cpus_allowed(current, oldmask); + schedule(); + printk("ok\n"); + +} + + diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c new file mode 100644 index 000000000000..ae5bebc3b18f --- /dev/null +++ b/kernel/power/swsusp.c @@ -0,0 +1,1433 @@ +/* + * linux/kernel/power/swsusp.c + * + * This file is to realize architecture-independent + * machine suspend feature using pretty near only high-level routines + * + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> + * Copyright (C) 1998,2001-2004 Pavel Machek <pavel@suse.cz> + * + * This file is released under the GPLv2. + * + * I'd like to thank the following people for their work: + * + * Pavel Machek <pavel@ucw.cz>: + * Modifications, defectiveness pointing, being with me at the very beginning, + * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17. + * + * Steve Doddi <dirk@loth.demon.co.uk>: + * Support the possibility of hardware state restoring. + * + * Raph <grey.havens@earthling.net>: + * Support for preserving states of network devices and virtual console + * (including X and svgatextmode) + * + * Kurt Garloff <garloff@suse.de>: + * Straightened the critical function in order to prevent compilers from + * playing tricks with local variables. + * + * Andreas Mohr <a.mohr@mailto.de> + * + * Alex Badea <vampire@go.ro>: + * Fixed runaway init + * + * More state savers are welcome. Especially for the scsi layer... + * + * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt + */ + +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/suspend.h> +#include <linux/smp_lock.h> +#include <linux/file.h> +#include <linux/utsname.h> +#include <linux/version.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/bitops.h> +#include <linux/vt_kern.h> +#include <linux/kbd_kern.h> +#include <linux/keyboard.h> +#include <linux/spinlock.h> +#include <linux/genhd.h> +#include <linux/kernel.h> +#include <linux/major.h> +#include <linux/swap.h> +#include <linux/pm.h> +#include <linux/device.h> +#include <linux/buffer_head.h> +#include <linux/swapops.h> +#include <linux/bootmem.h> +#include <linux/syscalls.h> +#include <linux/console.h> +#include <linux/highmem.h> +#include <linux/bio.h> + +#include <asm/uaccess.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <asm/io.h> + +#include "power.h" + +/* References to section boundaries */ +extern const void __nosave_begin, __nosave_end; + +/* Variables to be preserved over suspend */ +static int nr_copy_pages_check; + +extern char resume_file[]; + +/* Local variables that should not be affected by save */ +unsigned int nr_copy_pages __nosavedata = 0; + +/* Suspend pagedir is allocated before final copy, therefore it + must be freed after resume + + Warning: this is evil. There are actually two pagedirs at time of + resume. One is "pagedir_save", which is empty frame allocated at + time of suspend, that must be freed. Second is "pagedir_nosave", + allocated at time of resume, that travels through memory not to + collide with anything. + + Warning: this is even more evil than it seems. Pagedirs this file + talks about are completely different from page directories used by + MMU hardware. + */ +suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; +static suspend_pagedir_t *pagedir_save; + +#define SWSUSP_SIG "S1SUSPEND" + +static struct swsusp_header { + char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)]; + swp_entry_t swsusp_info; + char orig_sig[10]; + char sig[10]; +} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; + +static struct swsusp_info swsusp_info; + +/* + * XXX: We try to keep some more pages free so that I/O operations succeed + * without paging. Might this be more? + */ +#define PAGES_FOR_IO 512 + +/* + * Saving part... + */ + +/* We memorize in swapfile_used what swap devices are used for suspension */ +#define SWAPFILE_UNUSED 0 +#define SWAPFILE_SUSPEND 1 /* This is the suspending device */ +#define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */ + +static unsigned short swapfile_used[MAX_SWAPFILES]; +static unsigned short root_swap; + +static int mark_swapfiles(swp_entry_t prev) +{ + int error; + + rw_swap_page_sync(READ, + swp_entry(root_swap, 0), + virt_to_page((unsigned long)&swsusp_header)); + if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || + !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { + memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); + memcpy(swsusp_header.sig,SWSUSP_SIG, 10); + swsusp_header.swsusp_info = prev; + error = rw_swap_page_sync(WRITE, + swp_entry(root_swap, 0), + virt_to_page((unsigned long) + &swsusp_header)); + } else { + pr_debug("swsusp: Partition is not swap space.\n"); + error = -ENODEV; + } + return error; +} + +/* + * Check whether the swap device is the specified resume + * device, irrespective of whether they are specified by + * identical names. + * + * (Thus, device inode aliasing is allowed. You can say /dev/hda4 + * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs] + * and they'll be considered the same device. This is *necessary* for + * devfs, since the resume code can only recognize the form /dev/hda4, + * but the suspend code would see the long name.) + */ +static int is_resume_device(const struct swap_info_struct *swap_info) +{ + struct file *file = swap_info->swap_file; + struct inode *inode = file->f_dentry->d_inode; + + return S_ISBLK(inode->i_mode) && + swsusp_resume_device == MKDEV(imajor(inode), iminor(inode)); +} + +static int swsusp_swap_check(void) /* This is called before saving image */ +{ + int i, len; + + len=strlen(resume_file); + root_swap = 0xFFFF; + + swap_list_lock(); + for(i=0; i<MAX_SWAPFILES; i++) { + if (swap_info[i].flags == 0) { + swapfile_used[i]=SWAPFILE_UNUSED; + } else { + if(!len) { + printk(KERN_WARNING "resume= option should be used to set suspend device" ); + if(root_swap == 0xFFFF) { + swapfile_used[i] = SWAPFILE_SUSPEND; + root_swap = i; + } else + swapfile_used[i] = SWAPFILE_IGNORED; + } else { + /* we ignore all swap devices that are not the resume_file */ + if (is_resume_device(&swap_info[i])) { + swapfile_used[i] = SWAPFILE_SUSPEND; + root_swap = i; + } else { + swapfile_used[i] = SWAPFILE_IGNORED; + } + } + } + } + swap_list_unlock(); + return (root_swap != 0xffff) ? 0 : -ENODEV; +} + +/** + * This is called after saving image so modification + * will be lost after resume... and that's what we want. + * we make the device unusable. A new call to + * lock_swapdevices can unlock the devices. + */ +static void lock_swapdevices(void) +{ + int i; + + swap_list_lock(); + for(i = 0; i< MAX_SWAPFILES; i++) + if(swapfile_used[i] == SWAPFILE_IGNORED) { + swap_info[i].flags ^= 0xFF; + } + swap_list_unlock(); +} + +/** + * write_swap_page - Write one page to a fresh swap location. + * @addr: Address we're writing. + * @loc: Place to store the entry we used. + * + * Allocate a new swap entry and 'sync' it. Note we discard -EIO + * errors. That is an artifact left over from swsusp. It did not + * check the return of rw_swap_page_sync() at all, since most pages + * written back to swap would return -EIO. + * This is a partial improvement, since we will at least return other + * errors, though we need to eventually fix the damn code. + */ +static int write_page(unsigned long addr, swp_entry_t * loc) +{ + swp_entry_t entry; + int error = 0; + + entry = get_swap_page(); + if (swp_offset(entry) && + swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) { + error = rw_swap_page_sync(WRITE, entry, + virt_to_page(addr)); + if (error == -EIO) + error = 0; + if (!error) + *loc = entry; + } else + error = -ENOSPC; + return error; +} + +/** + * data_free - Free the swap entries used by the saved image. + * + * Walk the list of used swap entries and free each one. + * This is only used for cleanup when suspend fails. + */ +static void data_free(void) +{ + swp_entry_t entry; + int i; + + for (i = 0; i < nr_copy_pages; i++) { + entry = (pagedir_nosave + i)->swap_address; + if (entry.val) + swap_free(entry); + else + break; + (pagedir_nosave + i)->swap_address = (swp_entry_t){0}; + } +} + +/** + * data_write - Write saved image to swap. + * + * Walk the list of pages in the image and sync each one to swap. + */ +static int data_write(void) +{ + int error = 0, i = 0; + unsigned int mod = nr_copy_pages / 100; + struct pbe *p; + + if (!mod) + mod = 1; + + printk( "Writing data to swap (%d pages)... ", nr_copy_pages ); + for_each_pbe(p, pagedir_nosave) { + if (!(i%mod)) + printk( "\b\b\b\b%3d%%", i / mod ); + if ((error = write_page(p->address, &(p->swap_address)))) + return error; + i++; + } + printk("\b\b\b\bdone\n"); + return error; +} + +static void dump_info(void) +{ + pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code); + pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages); + pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname); + pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename); + pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release); + pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version); + pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine); + pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname); + pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus); + pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages); + pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info.pagedir_pages); +} + +static void init_header(void) +{ + memset(&swsusp_info, 0, sizeof(swsusp_info)); + swsusp_info.version_code = LINUX_VERSION_CODE; + swsusp_info.num_physpages = num_physpages; + memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname)); + + swsusp_info.suspend_pagedir = pagedir_nosave; + swsusp_info.cpus = num_online_cpus(); + swsusp_info.image_pages = nr_copy_pages; +} + +static int close_swap(void) +{ + swp_entry_t entry; + int error; + + dump_info(); + error = write_page((unsigned long)&swsusp_info, &entry); + if (!error) { + printk( "S" ); + error = mark_swapfiles(entry); + printk( "|\n" ); + } + return error; +} + +/** + * free_pagedir_entries - Free pages used by the page directory. + * + * This is used during suspend for error recovery. + */ + +static void free_pagedir_entries(void) +{ + int i; + + for (i = 0; i < swsusp_info.pagedir_pages; i++) + swap_free(swsusp_info.pagedir[i]); +} + + +/** + * write_pagedir - Write the array of pages holding the page directory. + * @last: Last swap entry we write (needed for header). + */ + +static int write_pagedir(void) +{ + int error = 0; + unsigned n = 0; + struct pbe * pbe; + + printk( "Writing pagedir..."); + for_each_pb_page(pbe, pagedir_nosave) { + if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++]))) + return error; + } + + swsusp_info.pagedir_pages = n; + printk("done (%u pages)\n", n); + return error; +} + +/** + * write_suspend_image - Write entire image and metadata. + * + */ + +static int write_suspend_image(void) +{ + int error; + + init_header(); + if ((error = data_write())) + goto FreeData; + + if ((error = write_pagedir())) + goto FreePagedir; + + if ((error = close_swap())) + goto FreePagedir; + Done: + return error; + FreePagedir: + free_pagedir_entries(); + FreeData: + data_free(); + goto Done; +} + + +#ifdef CONFIG_HIGHMEM +struct highmem_page { + char *data; + struct page *page; + struct highmem_page *next; +}; + +static struct highmem_page *highmem_copy; + +static int save_highmem_zone(struct zone *zone) +{ + unsigned long zone_pfn; + mark_free_pages(zone); + for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { + struct page *page; + struct highmem_page *save; + void *kaddr; + unsigned long pfn = zone_pfn + zone->zone_start_pfn; + + if (!(pfn%1000)) + printk("."); + if (!pfn_valid(pfn)) + continue; + page = pfn_to_page(pfn); + /* + * This condition results from rvmalloc() sans vmalloc_32() + * and architectural memory reservations. This should be + * corrected eventually when the cases giving rise to this + * are better understood. + */ + if (PageReserved(page)) { + printk("highmem reserved page?!\n"); + continue; + } + BUG_ON(PageNosave(page)); + if (PageNosaveFree(page)) + continue; + save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC); + if (!save) + return -ENOMEM; + save->next = highmem_copy; + save->page = page; + save->data = (void *) get_zeroed_page(GFP_ATOMIC); + if (!save->data) { + kfree(save); + return -ENOMEM; + } + kaddr = kmap_atomic(page, KM_USER0); + memcpy(save->data, kaddr, PAGE_SIZE); + kunmap_atomic(kaddr, KM_USER0); + highmem_copy = save; + } + return 0; +} +#endif /* CONFIG_HIGHMEM */ + + +static int save_highmem(void) +{ +#ifdef CONFIG_HIGHMEM + struct zone *zone; + int res = 0; + + pr_debug("swsusp: Saving Highmem\n"); + for_each_zone(zone) { + if (is_highmem(zone)) + res = save_highmem_zone(zone); + if (res) + return res; + } +#endif + return 0; +} + +static int restore_highmem(void) +{ +#ifdef CONFIG_HIGHMEM + printk("swsusp: Restoring Highmem\n"); + while (highmem_copy) { + struct highmem_page *save = highmem_copy; + void *kaddr; + highmem_copy = save->next; + + kaddr = kmap_atomic(save->page, KM_USER0); + memcpy(kaddr, save->data, PAGE_SIZE); + kunmap_atomic(kaddr, KM_USER0); + free_page((long) save->data); + kfree(save); + } +#endif + return 0; +} + + +static int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; + unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; + return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); +} + +/** + * saveable - Determine whether a page should be cloned or not. + * @pfn: The page + * + * We save a page if it's Reserved, and not in the range of pages + * statically defined as 'unsaveable', or if it isn't reserved, and + * isn't part of a free chunk of pages. + */ + +static int saveable(struct zone * zone, unsigned long * zone_pfn) +{ + unsigned long pfn = *zone_pfn + zone->zone_start_pfn; + struct page * page; + + if (!pfn_valid(pfn)) + return 0; + + page = pfn_to_page(pfn); + BUG_ON(PageReserved(page) && PageNosave(page)); + if (PageNosave(page)) + return 0; + if (PageReserved(page) && pfn_is_nosave(pfn)) { + pr_debug("[nosave pfn 0x%lx]", pfn); + return 0; + } + if (PageNosaveFree(page)) + return 0; + + return 1; +} + +static void count_data_pages(void) +{ + struct zone *zone; + unsigned long zone_pfn; + + nr_copy_pages = 0; + + for_each_zone(zone) { + if (is_highmem(zone)) + continue; + mark_free_pages(zone); + for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) + nr_copy_pages += saveable(zone, &zone_pfn); + } +} + + +static void copy_data_pages(void) +{ + struct zone *zone; + unsigned long zone_pfn; + struct pbe * pbe = pagedir_nosave; + + pr_debug("copy_data_pages(): pages to copy: %d\n", nr_copy_pages); + for_each_zone(zone) { + if (is_highmem(zone)) + continue; + mark_free_pages(zone); + for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { + if (saveable(zone, &zone_pfn)) { + struct page * page; + page = pfn_to_page(zone_pfn + zone->zone_start_pfn); + BUG_ON(!pbe); + pbe->orig_address = (long) page_address(page); + /* copy_page is not usable for copying task structs. */ + memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE); + pbe = pbe->next; + } + } + } + BUG_ON(pbe); +} + + +/** + * calc_nr - Determine the number of pages needed for a pbe list. + */ + +static int calc_nr(int nr_copy) +{ + int extra = 0; + int mod = !!(nr_copy % PBES_PER_PAGE); + int diff = (nr_copy / PBES_PER_PAGE) + mod; + + do { + extra += diff; + nr_copy += diff; + mod = !!(nr_copy % PBES_PER_PAGE); + diff = (nr_copy / PBES_PER_PAGE) + mod - extra; + } while (diff > 0); + + return nr_copy; +} + +/** + * free_pagedir - free pages allocated with alloc_pagedir() + */ + +static inline void free_pagedir(struct pbe *pblist) +{ + struct pbe *pbe; + + while (pblist) { + pbe = (pblist + PB_PAGE_SKIP)->next; + free_page((unsigned long)pblist); + pblist = pbe; + } +} + +/** + * fill_pb_page - Create a list of PBEs on a given memory page + */ + +static inline void fill_pb_page(struct pbe *pbpage) +{ + struct pbe *p; + + p = pbpage; + pbpage += PB_PAGE_SKIP; + do + p->next = p + 1; + while (++p < pbpage); +} + +/** + * create_pbe_list - Create a list of PBEs on top of a given chain + * of memory pages allocated with alloc_pagedir() + */ + +static void create_pbe_list(struct pbe *pblist, unsigned nr_pages) +{ + struct pbe *pbpage, *p; + unsigned num = PBES_PER_PAGE; + + for_each_pb_page (pbpage, pblist) { + if (num >= nr_pages) + break; + + fill_pb_page(pbpage); + num += PBES_PER_PAGE; + } + if (pbpage) { + for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++) + p->next = p + 1; + p->next = NULL; + } + pr_debug("create_pbe_list(): initialized %d PBEs\n", num); +} + +/** + * alloc_pagedir - Allocate the page directory. + * + * First, determine exactly how many pages we need and + * allocate them. + * + * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE + * struct pbe elements (pbes) and the last element in the page points + * to the next page. + * + * On each page we set up a list of struct_pbe elements. + */ + +static struct pbe * alloc_pagedir(unsigned nr_pages) +{ + unsigned num; + struct pbe *pblist, *pbe; + + if (!nr_pages) + return NULL; + + pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages); + pblist = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD); + for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; + pbe = pbe->next, num += PBES_PER_PAGE) { + pbe += PB_PAGE_SKIP; + pbe->next = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD); + } + if (!pbe) { /* get_zeroed_page() failed */ + free_pagedir(pblist); + pblist = NULL; + } + return pblist; +} + +/** + * free_image_pages - Free pages allocated for snapshot + */ + +static void free_image_pages(void) +{ + struct pbe * p; + + for_each_pbe(p, pagedir_save) { + if (p->address) { + ClearPageNosave(virt_to_page(p->address)); + free_page(p->address); + p->address = 0; + } + } +} + +/** + * alloc_image_pages - Allocate pages for the snapshot. + */ + +static int alloc_image_pages(void) +{ + struct pbe * p; + + for_each_pbe(p, pagedir_save) { + p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD); + if (!p->address) + return -ENOMEM; + SetPageNosave(virt_to_page(p->address)); + } + return 0; +} + +void swsusp_free(void) +{ + BUG_ON(PageNosave(virt_to_page(pagedir_save))); + BUG_ON(PageNosaveFree(virt_to_page(pagedir_save))); + free_image_pages(); + free_pagedir(pagedir_save); +} + + +/** + * enough_free_mem - Make sure we enough free memory to snapshot. + * + * Returns TRUE or FALSE after checking the number of available + * free pages. + */ + +static int enough_free_mem(void) +{ + if (nr_free_pages() < (nr_copy_pages + PAGES_FOR_IO)) { + pr_debug("swsusp: Not enough free pages: Have %d\n", + nr_free_pages()); + return 0; + } + return 1; +} + + +/** + * enough_swap - Make sure we have enough swap to save the image. + * + * Returns TRUE or FALSE after checking the total amount of swap + * space avaiable. + * + * FIXME: si_swapinfo(&i) returns all swap devices information. + * We should only consider resume_device. + */ + +static int enough_swap(void) +{ + struct sysinfo i; + + si_swapinfo(&i); + if (i.freeswap < (nr_copy_pages + PAGES_FOR_IO)) { + pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap); + return 0; + } + return 1; +} + +static int swsusp_alloc(void) +{ + int error; + + pr_debug("suspend: (pages needed: %d + %d free: %d)\n", + nr_copy_pages, PAGES_FOR_IO, nr_free_pages()); + + pagedir_nosave = NULL; + if (!enough_free_mem()) + return -ENOMEM; + + if (!enough_swap()) + return -ENOSPC; + + nr_copy_pages = calc_nr(nr_copy_pages); + + if (!(pagedir_save = alloc_pagedir(nr_copy_pages))) { + printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); + return -ENOMEM; + } + create_pbe_list(pagedir_save, nr_copy_pages); + pagedir_nosave = pagedir_save; + if ((error = alloc_image_pages())) { + printk(KERN_ERR "suspend: Allocating image pages failed.\n"); + swsusp_free(); + return error; + } + + nr_copy_pages_check = nr_copy_pages; + return 0; +} + +static int suspend_prepare_image(void) +{ + int error; + + pr_debug("swsusp: critical section: \n"); + if (save_highmem()) { + printk(KERN_CRIT "Suspend machine: Not enough free pages for highmem\n"); + restore_highmem(); + return -ENOMEM; + } + + drain_local_pages(); + count_data_pages(); + printk("swsusp: Need to copy %u pages\n", nr_copy_pages); + + error = swsusp_alloc(); + if (error) + return error; + + /* During allocating of suspend pagedir, new cold pages may appear. + * Kill them. + */ + drain_local_pages(); + copy_data_pages(); + + /* + * End of critical section. From now on, we can write to memory, + * but we should not touch disk. This specially means we must _not_ + * touch swap space! Except we must write out our image of course. + */ + + printk("swsusp: critical section/: done (%d pages copied)\n", nr_copy_pages ); + return 0; +} + + +/* It is important _NOT_ to umount filesystems at this point. We want + * them synced (in case something goes wrong) but we DO not want to mark + * filesystem clean: it is not. (And it does not matter, if we resume + * correctly, we'll mark system clean, anyway.) + */ +int swsusp_write(void) +{ + int error; + device_resume(); + lock_swapdevices(); + error = write_suspend_image(); + /* This will unlock ignored swap devices since writing is finished */ + lock_swapdevices(); + return error; + +} + + +extern asmlinkage int swsusp_arch_suspend(void); +extern asmlinkage int swsusp_arch_resume(void); + + +asmlinkage int swsusp_save(void) +{ + int error = 0; + + if ((error = swsusp_swap_check())) { + printk(KERN_ERR "swsusp: FATAL: cannot find swap device, try " + "swapon -a!\n"); + return error; + } + return suspend_prepare_image(); +} + +int swsusp_suspend(void) +{ + int error; + if ((error = arch_prepare_suspend())) + return error; + local_irq_disable(); + /* At this point, device_suspend() has been called, but *not* + * device_power_down(). We *must* device_power_down() now. + * Otherwise, drivers for some devices (e.g. interrupt controllers) + * become desynchronized with the actual state of the hardware + * at resume time, and evil weirdness ensues. + */ + if ((error = device_power_down(PMSG_FREEZE))) { + printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); + local_irq_enable(); + swsusp_free(); + return error; + } + save_processor_state(); + if ((error = swsusp_arch_suspend())) + swsusp_free(); + /* Restore control flow magically appears here */ + restore_processor_state(); + BUG_ON (nr_copy_pages_check != nr_copy_pages); + restore_highmem(); + device_power_up(); + local_irq_enable(); + return error; +} + +int swsusp_resume(void) +{ + int error; + local_irq_disable(); + if (device_power_down(PMSG_FREEZE)) + printk(KERN_ERR "Some devices failed to power down, very bad\n"); + /* We'll ignore saved state, but this gets preempt count (etc) right */ + save_processor_state(); + error = swsusp_arch_resume(); + /* Code below is only ever reached in case of failure. Otherwise + * execution continues at place where swsusp_arch_suspend was called + */ + BUG_ON(!error); + restore_processor_state(); + restore_highmem(); + device_power_up(); + local_irq_enable(); + return error; +} + +/* More restore stuff */ + +/* + * Returns true if given address/order collides with any orig_address + */ +static int does_collide_order(unsigned long addr, int order) +{ + int i; + + for (i=0; i < (1<<order); i++) + if (!PageNosaveFree(virt_to_page(addr + i * PAGE_SIZE))) + return 1; + return 0; +} + +/** + * On resume, for storing the PBE list and the image, + * we can only use memory pages that do not conflict with the pages + * which had been used before suspend. + * + * We don't know which pages are usable until we allocate them. + * + * Allocated but unusable (ie eaten) memory pages are linked together + * to create a list, so that we can free them easily + * + * We could have used a type other than (void *) + * for this purpose, but ... + */ +static void **eaten_memory = NULL; + +static inline void eat_page(void *page) +{ + void **c; + + c = eaten_memory; + eaten_memory = page; + *eaten_memory = c; +} + +static unsigned long get_usable_page(unsigned gfp_mask) +{ + unsigned long m; + + m = get_zeroed_page(gfp_mask); + while (does_collide_order(m, 0)) { + eat_page((void *)m); + m = get_zeroed_page(gfp_mask); + if (!m) + break; + } + return m; +} + +static void free_eaten_memory(void) +{ + unsigned long m; + void **c; + int i = 0; + + c = eaten_memory; + while (c) { + m = (unsigned long)c; + c = *c; + free_page(m); + i++; + } + eaten_memory = NULL; + pr_debug("swsusp: %d unused pages freed\n", i); +} + +/** + * check_pagedir - We ensure here that pages that the PBEs point to + * won't collide with pages where we're going to restore from the loaded + * pages later + */ + +static int check_pagedir(struct pbe *pblist) +{ + struct pbe *p; + + /* This is necessary, so that we can free allocated pages + * in case of failure + */ + for_each_pbe (p, pblist) + p->address = 0UL; + + for_each_pbe (p, pblist) { + p->address = get_usable_page(GFP_ATOMIC); + if (!p->address) + return -ENOMEM; + } + return 0; +} + +/** + * swsusp_pagedir_relocate - It is possible, that some memory pages + * occupied by the list of PBEs collide with pages where we're going to + * restore from the loaded pages later. We relocate them here. + */ + +static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) +{ + struct zone *zone; + unsigned long zone_pfn; + struct pbe *pbpage, *tail, *p; + void *m; + int rel = 0, error = 0; + + if (!pblist) /* a sanity check */ + return NULL; + + pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n", + swsusp_info.pagedir_pages); + + /* Set page flags */ + + for_each_zone(zone) { + for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) + SetPageNosaveFree(pfn_to_page(zone_pfn + + zone->zone_start_pfn)); + } + + /* Clear orig addresses */ + + for_each_pbe (p, pblist) + ClearPageNosaveFree(virt_to_page(p->orig_address)); + + tail = pblist + PB_PAGE_SKIP; + + /* Relocate colliding pages */ + + for_each_pb_page (pbpage, pblist) { + if (does_collide_order((unsigned long)pbpage, 0)) { + m = (void *)get_usable_page(GFP_ATOMIC | __GFP_COLD); + if (!m) { + error = -ENOMEM; + break; + } + memcpy(m, (void *)pbpage, PAGE_SIZE); + if (pbpage == pblist) + pblist = (struct pbe *)m; + else + tail->next = (struct pbe *)m; + + eat_page((void *)pbpage); + pbpage = (struct pbe *)m; + + /* We have to link the PBEs again */ + + for (p = pbpage; p < pbpage + PB_PAGE_SKIP; p++) + if (p->next) /* needed to save the end */ + p->next = p + 1; + + rel++; + } + tail = pbpage + PB_PAGE_SKIP; + } + + if (error) { + printk("\nswsusp: Out of memory\n\n"); + free_pagedir(pblist); + free_eaten_memory(); + pblist = NULL; + } + else + printk("swsusp: Relocated %d pages\n", rel); + + return pblist; +} + +/** + * Using bio to read from swap. + * This code requires a bit more work than just using buffer heads + * but, it is the recommended way for 2.5/2.6. + * The following are to signal the beginning and end of I/O. Bios + * finish asynchronously, while we want them to happen synchronously. + * A simple atomic_t, and a wait loop take care of this problem. + */ + +static atomic_t io_done = ATOMIC_INIT(0); + +static int end_io(struct bio * bio, unsigned int num, int err) +{ + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + panic("I/O error reading memory image"); + atomic_set(&io_done, 0); + return 0; +} + +static struct block_device * resume_bdev; + +/** + * submit - submit BIO request. + * @rw: READ or WRITE. + * @off physical offset of page. + * @page: page we're reading or writing. + * + * Straight from the textbook - allocate and initialize the bio. + * If we're writing, make sure the page is marked as dirty. + * Then submit it and wait. + */ + +static int submit(int rw, pgoff_t page_off, void * page) +{ + int error = 0; + struct bio * bio; + + bio = bio_alloc(GFP_ATOMIC, 1); + if (!bio) + return -ENOMEM; + bio->bi_sector = page_off * (PAGE_SIZE >> 9); + bio_get(bio); + bio->bi_bdev = resume_bdev; + bio->bi_end_io = end_io; + + if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { + printk("swsusp: ERROR: adding page to bio at %ld\n",page_off); + error = -EFAULT; + goto Done; + } + + if (rw == WRITE) + bio_set_pages_dirty(bio); + + atomic_set(&io_done, 1); + submit_bio(rw | (1 << BIO_RW_SYNC), bio); + while (atomic_read(&io_done)) + yield(); + + Done: + bio_put(bio); + return error; +} + +static int bio_read_page(pgoff_t page_off, void * page) +{ + return submit(READ, page_off, page); +} + +static int bio_write_page(pgoff_t page_off, void * page) +{ + return submit(WRITE, page_off, page); +} + +/* + * Sanity check if this image makes sense with this kernel/swap context + * I really don't think that it's foolproof but more than nothing.. + */ + +static const char * sanity_check(void) +{ + dump_info(); + if(swsusp_info.version_code != LINUX_VERSION_CODE) + return "kernel version"; + if(swsusp_info.num_physpages != num_physpages) + return "memory size"; + if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname)) + return "system type"; + if (strcmp(swsusp_info.uts.release,system_utsname.release)) + return "kernel release"; + if (strcmp(swsusp_info.uts.version,system_utsname.version)) + return "version"; + if (strcmp(swsusp_info.uts.machine,system_utsname.machine)) + return "machine"; + if(swsusp_info.cpus != num_online_cpus()) + return "number of cpus"; + return NULL; +} + + +static int check_header(void) +{ + const char * reason = NULL; + int error; + + if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) + return error; + + /* Is this same machine? */ + if ((reason = sanity_check())) { + printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason); + return -EPERM; + } + nr_copy_pages = swsusp_info.image_pages; + return error; +} + +static int check_sig(void) +{ + int error; + + memset(&swsusp_header, 0, sizeof(swsusp_header)); + if ((error = bio_read_page(0, &swsusp_header))) + return error; + if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { + memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); + + /* + * Reset swap signature now. + */ + error = bio_write_page(0, &swsusp_header); + } else { + printk(KERN_ERR "swsusp: Suspend partition has wrong signature?\n"); + return -EINVAL; + } + if (!error) + pr_debug("swsusp: Signature found, resuming\n"); + return error; +} + +/** + * data_read - Read image pages from swap. + * + * You do not need to check for overlaps, check_pagedir() + * already did that. + */ + +static int data_read(struct pbe *pblist) +{ + struct pbe * p; + int error = 0; + int i = 0; + int mod = swsusp_info.image_pages / 100; + + if (!mod) + mod = 1; + + printk("swsusp: Reading image data (%lu pages): ", + swsusp_info.image_pages); + + for_each_pbe (p, pblist) { + if (!(i % mod)) + printk("\b\b\b\b%3d%%", i / mod); + + error = bio_read_page(swp_offset(p->swap_address), + (void *)p->address); + if (error) + return error; + + i++; + } + printk("\b\b\b\bdone\n"); + return error; +} + +extern dev_t name_to_dev_t(const char *line); + +/** + * read_pagedir - Read page backup list pages from swap + */ + +static int read_pagedir(struct pbe *pblist) +{ + struct pbe *pbpage, *p; + unsigned i = 0; + int error; + + if (!pblist) + return -EFAULT; + + printk("swsusp: Reading pagedir (%lu pages)\n", + swsusp_info.pagedir_pages); + + for_each_pb_page (pbpage, pblist) { + unsigned long offset = swp_offset(swsusp_info.pagedir[i++]); + + error = -EFAULT; + if (offset) { + p = (pbpage + PB_PAGE_SKIP)->next; + error = bio_read_page(offset, (void *)pbpage); + (pbpage + PB_PAGE_SKIP)->next = p; + } + if (error) + break; + } + + if (error) + free_page((unsigned long)pblist); + + BUG_ON(i != swsusp_info.pagedir_pages); + + return error; +} + + +static int check_suspend_image(void) +{ + int error = 0; + + if ((error = check_sig())) + return error; + + if ((error = check_header())) + return error; + + return 0; +} + +static int read_suspend_image(void) +{ + int error = 0; + struct pbe *p; + + if (!(p = alloc_pagedir(nr_copy_pages))) + return -ENOMEM; + + if ((error = read_pagedir(p))) + return error; + + create_pbe_list(p, nr_copy_pages); + + if (!(pagedir_nosave = swsusp_pagedir_relocate(p))) + return -ENOMEM; + + /* Allocate memory for the image and read the data from swap */ + + error = check_pagedir(pagedir_nosave); + free_eaten_memory(); + if (!error) + error = data_read(pagedir_nosave); + + if (error) { /* We fail cleanly */ + for_each_pbe (p, pagedir_nosave) + if (p->address) { + free_page(p->address); + p->address = 0UL; + } + free_pagedir(pagedir_nosave); + } + return error; +} + +/** + * swsusp_check - Check for saved image in swap + */ + +int swsusp_check(void) +{ + int error; + + if (!swsusp_resume_device) { + if (!strlen(resume_file)) + return -ENOENT; + swsusp_resume_device = name_to_dev_t(resume_file); + pr_debug("swsusp: Resume From Partition %s\n", resume_file); + } else { + pr_debug("swsusp: Resume From Partition %d:%d\n", + MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); + } + + resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); + if (!IS_ERR(resume_bdev)) { + set_blocksize(resume_bdev, PAGE_SIZE); + error = check_suspend_image(); + if (error) + blkdev_put(resume_bdev); + } else + error = PTR_ERR(resume_bdev); + + if (!error) + pr_debug("swsusp: resume file found\n"); + else + pr_debug("swsusp: Error %d check for resume file\n", error); + return error; +} + +/** + * swsusp_read - Read saved image from swap. + */ + +int swsusp_read(void) +{ + int error; + + if (IS_ERR(resume_bdev)) { + pr_debug("swsusp: block device not initialised\n"); + return PTR_ERR(resume_bdev); + } + + error = read_suspend_image(); + blkdev_put(resume_bdev); + + if (!error) + pr_debug("swsusp: Reading resume file was successful\n"); + else + pr_debug("swsusp: Error %d resuming\n", error); + return error; +} + +/** + * swsusp_close - close swap device. + */ + +void swsusp_close(void) +{ + if (IS_ERR(resume_bdev)) { + pr_debug("swsusp: block device not initialised\n"); + return; + } + + blkdev_put(resume_bdev); +} |