diff options
-rw-r--r-- | Documentation/ABI/testing/sysfs-devices-power | 35 | ||||
-rw-r--r-- | Documentation/ABI/testing/sysfs-power | 59 | ||||
-rw-r--r-- | Documentation/power/suspend-and-cpuhotplug.txt | 2 | ||||
-rw-r--r-- | drivers/base/power/main.c | 10 | ||||
-rw-r--r-- | drivers/base/power/sysfs.c | 54 | ||||
-rw-r--r-- | drivers/base/power/wakeup.c | 174 | ||||
-rw-r--r-- | fs/eventpoll.c | 90 | ||||
-rw-r--r-- | include/linux/capability.h | 5 | ||||
-rw-r--r-- | include/linux/eventpoll.h | 12 | ||||
-rw-r--r-- | include/linux/pm_wakeup.h | 15 | ||||
-rw-r--r-- | include/linux/suspend.h | 14 | ||||
-rw-r--r-- | include/trace/events/power.h | 34 | ||||
-rw-r--r-- | kernel/power/Kconfig | 27 | ||||
-rw-r--r-- | kernel/power/Makefile | 2 | ||||
-rw-r--r-- | kernel/power/autosleep.c | 127 | ||||
-rw-r--r-- | kernel/power/main.c | 160 | ||||
-rw-r--r-- | kernel/power/power.h | 27 | ||||
-rw-r--r-- | kernel/power/swap.c | 62 | ||||
-rw-r--r-- | kernel/power/wakelock.c | 259 |
19 files changed, 1050 insertions, 118 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 840f7d64d483..45000f0db4d4 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -96,16 +96,26 @@ Description: is read-only. If the device is not enabled to wake up the system from sleep states, this attribute is not present. -What: /sys/devices/.../power/wakeup_hit_count -Date: September 2010 +What: /sys/devices/.../power/wakeup_abort_count +Date: February 2012 Contact: Rafael J. Wysocki <rjw@sisk.pl> Description: - The /sys/devices/.../wakeup_hit_count attribute contains the + The /sys/devices/.../wakeup_abort_count attribute contains the number of times the processing of a wakeup event associated with - the device might prevent the system from entering a sleep state. - This attribute is read-only. If the device is not enabled to - wake up the system from sleep states, this attribute is not - present. + the device might have aborted system transition into a sleep + state in progress. This attribute is read-only. If the device + is not enabled to wake up the system from sleep states, this + attribute is not present. + +What: /sys/devices/.../power/wakeup_expire_count +Date: February 2012 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/devices/.../wakeup_expire_count attribute contains the + number of times a wakeup event associated with the device has + been reported with a timeout that expired. This attribute is + read-only. If the device is not enabled to wake up the system + from sleep states, this attribute is not present. What: /sys/devices/.../power/wakeup_active Date: September 2010 @@ -148,6 +158,17 @@ Description: not enabled to wake up the system from sleep states, this attribute is not present. +What: /sys/devices/.../power/wakeup_prevent_sleep_time_ms +Date: February 2012 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/devices/.../wakeup_prevent_sleep_time_ms attribute + contains the total time the device has been preventing + opportunistic transitions to sleep states from occuring. + This attribute is read-only. If the device is not enabled to + wake up the system from sleep states, this attribute is not + present. + What: /sys/devices/.../power/autosuspend_delay_ms Date: September 2010 Contact: Alan Stern <stern@rowland.harvard.edu> diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index b464d12761ba..31725ffeeb3a 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -172,3 +172,62 @@ Description: Reading from this file will display the current value, which is set to 1 MB by default. + +What: /sys/power/autosleep +Date: April 2012 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/power/autosleep file can be written one of the strings + returned by reads from /sys/power/state. If that happens, a + work item attempting to trigger a transition of the system to + the sleep state represented by that string is queued up. This + attempt will only succeed if there are no active wakeup sources + in the system at that time. After every execution, regardless + of whether or not the attempt to put the system to sleep has + succeeded, the work item requeues itself until user space + writes "off" to /sys/power/autosleep. + + Reading from this file causes the last string successfully + written to it to be returned. + +What: /sys/power/wake_lock +Date: February 2012 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/power/wake_lock file allows user space to create + wakeup source objects and activate them on demand (if one of + those wakeup sources is active, reads from the + /sys/power/wakeup_count file block or return false). When a + string without white space is written to /sys/power/wake_lock, + it will be assumed to represent a wakeup source name. If there + is a wakeup source object with that name, it will be activated + (unless active already). Otherwise, a new wakeup source object + will be registered, assigned the given name and activated. + If a string written to /sys/power/wake_lock contains white + space, the part of the string preceding the white space will be + regarded as a wakeup source name and handled as descrived above. + The other part of the string will be regarded as a timeout (in + nanoseconds) such that the wakeup source will be automatically + deactivated after it has expired. The timeout, if present, is + set regardless of the current state of the wakeup source object + in question. + + Reads from this file return a string consisting of the names of + wakeup sources created with the help of it that are active at + the moment, separated with spaces. + + +What: /sys/power/wake_unlock +Date: February 2012 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/power/wake_unlock file allows user space to deactivate + wakeup sources created with the help of /sys/power/wake_lock. + When a string is written to /sys/power/wake_unlock, it will be + assumed to represent the name of a wakeup source to deactivate. + If a wakeup source object of that name exists and is active at + the moment, it will be deactivated. + + Reads from this file return a string consisting of the names of + wakeup sources created with the help of /sys/power/wake_lock + that are inactive at the moment, separated with spaces. diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt index f28f9a6f0347..e13dafc8e8f1 100644 --- a/Documentation/power/suspend-and-cpuhotplug.txt +++ b/Documentation/power/suspend-and-cpuhotplug.txt @@ -29,7 +29,7 @@ More details follow: Write 'mem' to /sys/power/state - syfs file + sysfs file | v Acquire pm_mutex lock diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index b462c0e341cb..e0fb5b0435a3 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -889,6 +889,11 @@ static int dpm_suspend_noirq(pm_message_t state) if (!list_empty(&dev->power.entry)) list_move(&dev->power.entry, &dpm_noirq_list); put_device(dev); + + if (pm_wakeup_pending()) { + error = -EBUSY; + break; + } } mutex_unlock(&dpm_list_mtx); if (error) @@ -962,6 +967,11 @@ static int dpm_suspend_late(pm_message_t state) if (!list_empty(&dev->power.entry)) list_move(&dev->power.entry, &dpm_late_early_list); put_device(dev); + + if (pm_wakeup_pending()) { + error = -EBUSY; + break; + } } mutex_unlock(&dpm_list_mtx); if (error) diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 95c12f6cb5b9..48be2ad4dd2c 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -314,22 +314,41 @@ static ssize_t wakeup_active_count_show(struct device *dev, static DEVICE_ATTR(wakeup_active_count, 0444, wakeup_active_count_show, NULL); -static ssize_t wakeup_hit_count_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t wakeup_abort_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long count = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + count = dev->power.wakeup->wakeup_count; + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_abort_count, 0444, wakeup_abort_count_show, NULL); + +static ssize_t wakeup_expire_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) { unsigned long count = 0; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { - count = dev->power.wakeup->hit_count; + count = dev->power.wakeup->expire_count; enabled = true; } spin_unlock_irq(&dev->power.lock); return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); } -static DEVICE_ATTR(wakeup_hit_count, 0444, wakeup_hit_count_show, NULL); +static DEVICE_ATTR(wakeup_expire_count, 0444, wakeup_expire_count_show, NULL); static ssize_t wakeup_active_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -398,6 +417,27 @@ static ssize_t wakeup_last_time_show(struct device *dev, } static DEVICE_ATTR(wakeup_last_time_ms, 0444, wakeup_last_time_show, NULL); + +#ifdef CONFIG_PM_AUTOSLEEP +static ssize_t wakeup_prevent_sleep_time_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + s64 msec = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + msec = ktime_to_ms(dev->power.wakeup->prevent_sleep_time); + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_prevent_sleep_time_ms, 0444, + wakeup_prevent_sleep_time_show, NULL); +#endif /* CONFIG_PM_AUTOSLEEP */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_ADVANCED_DEBUG @@ -486,11 +526,15 @@ static struct attribute *wakeup_attrs[] = { &dev_attr_wakeup.attr, &dev_attr_wakeup_count.attr, &dev_attr_wakeup_active_count.attr, - &dev_attr_wakeup_hit_count.attr, + &dev_attr_wakeup_abort_count.attr, + &dev_attr_wakeup_expire_count.attr, &dev_attr_wakeup_active.attr, &dev_attr_wakeup_total_time_ms.attr, &dev_attr_wakeup_max_time_ms.attr, &dev_attr_wakeup_last_time_ms.attr, +#ifdef CONFIG_PM_AUTOSLEEP + &dev_attr_wakeup_prevent_sleep_time_ms.attr, +#endif #endif NULL, }; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 2a3e581b8dcd..cbb463b3a750 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -14,16 +14,15 @@ #include <linux/suspend.h> #include <linux/seq_file.h> #include <linux/debugfs.h> +#include <trace/events/power.h> #include "power.h" -#define TIMEOUT 100 - /* * If set, the suspend/hibernate code will abort transitions to a sleep state * if wakeup events are registered during or immediately before the transition. */ -bool events_check_enabled; +bool events_check_enabled __read_mostly; /* * Combined counters of registered wakeup events and wakeup events in progress. @@ -52,6 +51,8 @@ static void pm_wakeup_timer_fn(unsigned long data); static LIST_HEAD(wakeup_sources); +static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue); + /** * wakeup_source_prepare - Prepare a new wakeup source for initialization. * @ws: Wakeup source to prepare. @@ -132,6 +133,7 @@ void wakeup_source_add(struct wakeup_source *ws) spin_lock_init(&ws->lock); setup_timer(&ws->timer, pm_wakeup_timer_fn, (unsigned long)ws); ws->active = false; + ws->last_time = ktime_get(); spin_lock_irq(&events_lock); list_add_rcu(&ws->entry, &wakeup_sources); @@ -374,12 +376,33 @@ EXPORT_SYMBOL_GPL(device_set_wakeup_enable); */ static void wakeup_source_activate(struct wakeup_source *ws) { + unsigned int cec; + ws->active = true; ws->active_count++; ws->last_time = ktime_get(); + if (ws->autosleep_enabled) + ws->start_prevent_time = ws->last_time; /* Increment the counter of events in progress. */ - atomic_inc(&combined_event_count); + cec = atomic_inc_return(&combined_event_count); + + trace_wakeup_source_activate(ws->name, cec); +} + +/** + * wakeup_source_report_event - Report wakeup event using the given source. + * @ws: Wakeup source to report the event for. + */ +static void wakeup_source_report_event(struct wakeup_source *ws) +{ + ws->event_count++; + /* This is racy, but the counter is approximate anyway. */ + if (events_check_enabled) + ws->wakeup_count++; + + if (!ws->active) + wakeup_source_activate(ws); } /** @@ -397,10 +420,7 @@ void __pm_stay_awake(struct wakeup_source *ws) spin_lock_irqsave(&ws->lock, flags); - ws->event_count++; - if (!ws->active) - wakeup_source_activate(ws); - + wakeup_source_report_event(ws); del_timer(&ws->timer); ws->timer_expires = 0; @@ -432,6 +452,17 @@ void pm_stay_awake(struct device *dev) } EXPORT_SYMBOL_GPL(pm_stay_awake); +#ifdef CONFIG_PM_AUTOSLEEP +static void update_prevent_sleep_time(struct wakeup_source *ws, ktime_t now) +{ + ktime_t delta = ktime_sub(now, ws->start_prevent_time); + ws->prevent_sleep_time = ktime_add(ws->prevent_sleep_time, delta); +} +#else +static inline void update_prevent_sleep_time(struct wakeup_source *ws, + ktime_t now) {} +#endif + /** * wakup_source_deactivate - Mark given wakeup source as inactive. * @ws: Wakeup source to handle. @@ -442,6 +473,7 @@ EXPORT_SYMBOL_GPL(pm_stay_awake); */ static void wakeup_source_deactivate(struct wakeup_source *ws) { + unsigned int cnt, inpr, cec; ktime_t duration; ktime_t now; @@ -468,14 +500,23 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) if (ktime_to_ns(duration) > ktime_to_ns(ws->max_time)) ws->max_time = duration; + ws->last_time = now; del_timer(&ws->timer); ws->timer_expires = 0; + if (ws->autosleep_enabled) + update_prevent_sleep_time(ws, now); + /* * Increment the counter of registered wakeup events and decrement the * couter of wakeup events in progress simultaneously. */ - atomic_add(MAX_IN_PROGRESS, &combined_event_count); + cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count); + trace_wakeup_source_deactivate(ws->name, cec); + + split_counters(&cnt, &inpr); + if (!inpr && waitqueue_active(&wakeup_count_wait_queue)) + wake_up(&wakeup_count_wait_queue); } /** @@ -536,8 +577,10 @@ static void pm_wakeup_timer_fn(unsigned long data) spin_lock_irqsave(&ws->lock, flags); if (ws->active && ws->timer_expires - && time_after_eq(jiffies, ws->timer_expires)) + && time_after_eq(jiffies, ws->timer_expires)) { wakeup_source_deactivate(ws); + ws->expire_count++; + } spin_unlock_irqrestore(&ws->lock, flags); } @@ -564,9 +607,7 @@ void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) spin_lock_irqsave(&ws->lock, flags); - ws->event_count++; - if (!ws->active) - wakeup_source_activate(ws); + wakeup_source_report_event(ws); if (!msec) { wakeup_source_deactivate(ws); @@ -609,24 +650,6 @@ void pm_wakeup_event(struct device *dev, unsigned int msec) EXPORT_SYMBOL_GPL(pm_wakeup_event); /** - * pm_wakeup_update_hit_counts - Update hit counts of all active wakeup sources. - */ -static void pm_wakeup_update_hit_counts(void) -{ - unsigned long flags; - struct wakeup_source *ws; - - rcu_read_lock(); - list_for_each_entry_rcu(ws, &wakeup_sources, entry) { - spin_lock_irqsave(&ws->lock, flags); - if (ws->active) - ws->hit_count++; - spin_unlock_irqrestore(&ws->lock, flags); - } - rcu_read_unlock(); -} - -/** * pm_wakeup_pending - Check if power transition in progress should be aborted. * * Compare the current number of registered wakeup events with its preserved @@ -648,32 +671,38 @@ bool pm_wakeup_pending(void) events_check_enabled = !ret; } spin_unlock_irqrestore(&events_lock, flags); - if (ret) - pm_wakeup_update_hit_counts(); return ret; } /** * pm_get_wakeup_count - Read the number of registered wakeup events. * @count: Address to store the value at. + * @block: Whether or not to block. * - * Store the number of registered wakeup events at the address in @count. Block - * if the current number of wakeup events being processed is nonzero. + * Store the number of registered wakeup events at the address in @count. If + * @block is set, block until the current number of wakeup events being + * processed is zero. * - * Return 'false' if the wait for the number of wakeup events being processed to - * drop down to zero has been interrupted by a signal (and the current number - * of wakeup events being processed is still nonzero). Otherwise return 'true'. + * Return 'false' if the current number of wakeup events being processed is + * nonzero. Otherwise return 'true'. */ -bool pm_get_wakeup_count(unsigned int *count) +bool pm_get_wakeup_count(unsigned int *count, bool block) { unsigned int cnt, inpr; - for (;;) { - split_counters(&cnt, &inpr); - if (inpr == 0 || signal_pending(current)) - break; - pm_wakeup_update_hit_counts(); - schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT)); + if (block) { + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&wakeup_count_wait_queue, &wait, + TASK_INTERRUPTIBLE); + split_counters(&cnt, &inpr); + if (inpr == 0 || signal_pending(current)) + break; + + schedule(); + } + finish_wait(&wakeup_count_wait_queue, &wait); } split_counters(&cnt, &inpr); @@ -703,11 +732,37 @@ bool pm_save_wakeup_count(unsigned int count) events_check_enabled = true; } spin_unlock_irq(&events_lock); - if (!events_check_enabled) - pm_wakeup_update_hit_counts(); return events_check_enabled; } +#ifdef CONFIG_PM_AUTOSLEEP +/** + * pm_wakep_autosleep_enabled - Modify autosleep_enabled for all wakeup sources. + * @enabled: Whether to set or to clear the autosleep_enabled flags. + */ +void pm_wakep_autosleep_enabled(bool set) +{ + struct wakeup_source *ws; + ktime_t now = ktime_get(); + + rcu_read_lock(); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + spin_lock_irq(&ws->lock); + if (ws->autosleep_enabled != set) { + ws->autosleep_enabled = set; + if (ws->active) { + if (set) + ws->start_prevent_time = now; + else + update_prevent_sleep_time(ws, now); + } + } + spin_unlock_irq(&ws->lock); + } + rcu_read_unlock(); +} +#endif /* CONFIG_PM_AUTOSLEEP */ + static struct dentry *wakeup_sources_stats_dentry; /** @@ -723,27 +778,37 @@ static int print_wakeup_source_stats(struct seq_file *m, ktime_t max_time; unsigned long active_count; ktime_t active_time; + ktime_t prevent_sleep_time; int ret; spin_lock_irqsave(&ws->lock, flags); total_time = ws->total_time; max_time = ws->max_time; + prevent_sleep_time = ws->prevent_sleep_time; active_count = ws->active_count; if (ws->active) { - active_time = ktime_sub(ktime_get(), ws->last_time); + ktime_t now = ktime_get(); + + active_time = ktime_sub(now, ws->last_time); total_time = ktime_add(total_time, active_time); if (active_time.tv64 > max_time.tv64) max_time = active_time; + + if (ws->autosleep_enabled) + prevent_sleep_time = ktime_add(prevent_sleep_time, + ktime_sub(now, ws->start_prevent_time)); } else { active_time = ktime_set(0, 0); } - ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t" - "%lld\t\t%lld\t\t%lld\t\t%lld\n", - ws->name, active_count, ws->event_count, ws->hit_count, + ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t" + "%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n", + ws->name, active_count, ws->event_count, + ws->wakeup_count, ws->expire_count, ktime_to_ms(active_time), ktime_to_ms(total_time), - ktime_to_ms(max_time), ktime_to_ms(ws->last_time)); + ktime_to_ms(max_time), ktime_to_ms(ws->last_time), + ktime_to_ms(prevent_sleep_time)); spin_unlock_irqrestore(&ws->lock, flags); @@ -758,8 +823,9 @@ static int wakeup_sources_stats_show(struct seq_file *m, void *unused) { struct wakeup_source *ws; - seq_puts(m, "name\t\tactive_count\tevent_count\thit_count\t" - "active_since\ttotal_time\tmax_time\tlast_change\n"); + seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" + "expire_count\tactive_since\ttotal_time\tmax_time\t" + "last_change\tprevent_suspend_time\n"); rcu_read_lock(); list_for_each_entry_rcu(ws, &wakeup_sources, entry) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c0b3c70ee87a..2cf0f2153be5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -33,6 +33,7 @@ #include <linux/bitops.h> #include <linux/mutex.h> #include <linux/anon_inodes.h> +#include <linux/device.h> #include <asm/uaccess.h> #include <asm/io.h> #include <asm/mman.h> @@ -87,7 +88,7 @@ */ /* Epoll private bits inside the event mask */ -#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET) +#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET) /* Maximum number of nesting allowed inside epoll sets */ #define EP_MAX_NESTS 4 @@ -154,6 +155,9 @@ struct epitem { /* List header used to link this item to the "struct file" items list */ struct list_head fllink; + /* wakeup_source used when EPOLLWAKEUP is set */ + struct wakeup_source *ws; + /* The structure that describe the interested events and the source fd */ struct epoll_event event; }; @@ -194,6 +198,9 @@ struct eventpoll { */ struct epitem *ovflist; + /* wakeup_source used when ep_scan_ready_list is running */ + struct wakeup_source *ws; + /* The user that created the eventpoll descriptor */ struct user_struct *user; @@ -588,8 +595,10 @@ static int ep_scan_ready_list(struct eventpoll *ep, * queued into ->ovflist but the "txlist" might already * contain them, and the list_splice() below takes care of them. */ - if (!ep_is_linked(&epi->rdllink)) + if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); + } } /* * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after @@ -602,6 +611,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, * Quickly re-inject items left on "txlist". */ list_splice(&txlist, &ep->rdllist); + __pm_relax(ep->ws); if (!list_empty(&ep->rdllist)) { /* @@ -656,6 +666,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); + wakeup_source_unregister(epi->ws); + /* At this point it is safe to free the eventpoll item */ kmem_cache_free(epi_cache, epi); @@ -706,6 +718,7 @@ static void ep_free(struct eventpoll *ep) mutex_unlock(&epmutex); mutex_destroy(&ep->mtx); free_uid(ep->user); + wakeup_source_unregister(ep->ws); kfree(ep); } @@ -737,6 +750,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, * callback, but it's not actually ready, as far as * caller requested events goes. We can remove it here. */ + __pm_relax(epi->ws); list_del_init(&epi->rdllink); } } @@ -927,13 +941,23 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k if (epi->next == EP_UNACTIVE_PTR) { epi->next = ep->ovflist; ep->ovflist = epi; + if (epi->ws) { + /* + * Activate ep->ws since epi->ws may get + * deactivated at any time. + */ + __pm_stay_awake(ep->ws); + } + } goto out_unlock; } /* If this file is already in the ready list we exit soon */ - if (!ep_is_linked(&epi->rdllink)) + if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); + } /* * Wake up ( if active ) both the eventpoll wait list and the ->poll() @@ -1091,6 +1115,30 @@ static int reverse_path_check(void) return error; } +static int ep_create_wakeup_source(struct epitem *epi) +{ + const char *name; + + if (!epi->ep->ws) { + epi->ep->ws = wakeup_source_register("eventpoll"); + if (!epi->ep->ws) + return -ENOMEM; + } + + name = epi->ffd.file->f_path.dentry->d_name.name; + epi->ws = wakeup_source_register(name); + if (!epi->ws) + return -ENOMEM; + + return 0; +} + +static void ep_destroy_wakeup_source(struct epitem *epi) +{ + wakeup_source_unregister(epi->ws); + epi->ws = NULL; +} + /* * Must be called with "mtx" held. */ @@ -1118,6 +1166,13 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, epi->event = *event; epi->nwait = 0; epi->next = EP_UNACTIVE_PTR; + if (epi->event.events & EPOLLWAKEUP) { + error = ep_create_wakeup_source(epi); + if (error) + goto error_create_wakeup_source; + } else { + epi->ws = NULL; + } /* Initialize the poll table using the queue callback */ epq.epi = epi; @@ -1164,6 +1219,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* If the file is already "ready" we drop it inside the ready list */ if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1204,6 +1260,9 @@ error_unregister: list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); + wakeup_source_unregister(epi->ws); + +error_create_wakeup_source: kmem_cache_free(epi_cache, epi); return error; @@ -1229,6 +1288,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even epi->event.events = event->events; pt._key = event->events; epi->event.data = event->data; /* protected by mtx */ + if (epi->event.events & EPOLLWAKEUP) { + if (!epi->ws) + ep_create_wakeup_source(epi); + } else if (epi->ws) { + ep_destroy_wakeup_source(epi); + } /* * Get current event bits. We can safely use the file* here because @@ -1244,6 +1309,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even spin_lock_irq(&ep->lock); if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1282,6 +1348,18 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, !list_empty(head) && eventcnt < esed->maxevents;) { epi = list_first_entry(head, struct epitem, rdllink); + /* + * Activate ep->ws before deactivating epi->ws to prevent + * triggering auto-suspend here (in case we reactive epi->ws + * below). + * + * This could be rearranged to delay the deactivation of epi->ws + * instead, but then epi->ws would temporarily be out of sync + * with ep_is_linked(). + */ + if (epi->ws && epi->ws->active) + __pm_stay_awake(ep->ws); + __pm_relax(epi->ws); list_del_init(&epi->rdllink); pt._key = epi->event.events; @@ -1298,6 +1376,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, if (__put_user(revents, &uevent->events) || __put_user(epi->event.data, &uevent->data)) { list_add(&epi->rdllink, head); + __pm_stay_awake(epi->ws); return eventcnt ? eventcnt : -EFAULT; } eventcnt++; @@ -1317,6 +1396,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, * poll callback will queue them in ep->ovflist. */ list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); } } } @@ -1629,6 +1709,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, if (!tfile->f_op || !tfile->f_op->poll) goto error_tgt_fput; + /* Check if EPOLLWAKEUP is allowed */ + if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) + goto error_tgt_fput; + /* * We have to check that the file structure underneath the file descriptor * the user passed to us _is_ an eventpoll file. And also we do not permit diff --git a/include/linux/capability.h b/include/linux/capability.h index 12d52dedb229..c398cff3dab7 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -360,8 +360,11 @@ struct cpu_vfs_cap_data { #define CAP_WAKE_ALARM 35 +/* Allow preventing system suspends while epoll events are pending */ -#define CAP_LAST_CAP CAP_WAKE_ALARM +#define CAP_EPOLLWAKEUP 36 + +#define CAP_LAST_CAP CAP_EPOLLWAKEUP #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 657ab55beda0..6f8be328770a 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -26,6 +26,18 @@ #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 +/* + * Request the handling of system wakeup events so as to prevent system suspends + * from happening while those events are being processed. + * + * Assuming neither EPOLLET nor EPOLLONESHOT is set, system suspends will not be + * re-allowed until epoll_wait is called again after consuming the wakeup + * event(s). + * + * Requires CAP_EPOLLWAKEUP + */ +#define EPOLLWAKEUP (1 << 29) + /* Set the One Shot behaviour for the target file descriptor */ #define EPOLLONESHOT (1 << 30) diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index d9f05113e5fb..569781faa504 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -33,12 +33,15 @@ * * @total_time: Total time this wakeup source has been active. * @max_time: Maximum time this wakeup source has been continuously active. - * @last_time: Monotonic clock when the wakeup source's was activated last time. + * @last_time: Monotonic clock when the wakeup source's was touched last time. + * @prevent_sleep_time: Total time this source has been preventing autosleep. * @event_count: Number of signaled wakeup events. * @active_count: Number of times the wakeup sorce was activated. * @relax_count: Number of times the wakeup sorce was deactivated. - * @hit_count: Number of times the wakeup sorce might abort system suspend. + * @expire_count: Number of times the wakeup source's timeout has expired. + * @wakeup_count: Number of times the wakeup source might abort suspend. * @active: Status of the wakeup source. + * @has_timeout: The wakeup source has been activated with a timeout. */ struct wakeup_source { const char *name; @@ -49,11 +52,15 @@ struct wakeup_source { ktime_t total_time; ktime_t max_time; ktime_t last_time; + ktime_t start_prevent_time; + ktime_t prevent_sleep_time; unsigned long event_count; unsigned long active_count; unsigned long relax_count; - unsigned long hit_count; - unsigned int active:1; + unsigned long expire_count; + unsigned long wakeup_count; + bool active:1; + bool autosleep_enabled:1; }; #ifdef CONFIG_PM_SLEEP diff --git a/include/linux/suspend.h b/include/linux/suspend.h index ac1c114c499d..cd83059fb592 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -356,8 +356,9 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern bool events_check_enabled; extern bool pm_wakeup_pending(void); -extern bool pm_get_wakeup_count(unsigned int *count); +extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); +extern void pm_wakep_autosleep_enabled(bool set); static inline void lock_system_sleep(void) { @@ -407,6 +408,17 @@ static inline void unlock_system_sleep(void) {} #endif /* !CONFIG_PM_SLEEP */ +#ifdef CONFIG_PM_AUTOSLEEP + +/* kernel/power/autosleep.c */ +void queue_up_suspend_work(void); + +#else /* !CONFIG_PM_AUTOSLEEP */ + +static inline void queue_up_suspend_work(void) {} + +#endif /* !CONFIG_PM_AUTOSLEEP */ + #ifdef CONFIG_ARCH_SAVE_PAGE_KEYS /* * The ARCH_SAVE_PAGE_KEYS functions can be used by an architecture diff --git a/include/trace/events/power.h b/include/trace/events/power.h index cae9a94f025d..0c9783841a30 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -65,6 +65,40 @@ TRACE_EVENT(machine_suspend, TP_printk("state=%lu", (unsigned long)__entry->state) ); +DECLARE_EVENT_CLASS(wakeup_source, + + TP_PROTO(const char *name, unsigned int state), + + TP_ARGS(name, state), + + TP_STRUCT__entry( + __string( name, name ) + __field( u64, state ) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + ), + + TP_printk("%s state=0x%lx", __get_str(name), + (unsigned long)__entry->state) +); + +DEFINE_EVENT(wakeup_source, wakeup_source_activate, + + TP_PROTO(const char *name, unsigned int state), + + TP_ARGS(name, state) +); + +DEFINE_EVENT(wakeup_source, wakeup_source_deactivate, + + TP_PROTO(const char *name, unsigned int state), + + TP_ARGS(name, state) +); + #ifdef CONFIG_EVENT_POWER_TRACING_DEPRECATED /* diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index deb5461e3216..8f9b4eb974e0 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -103,6 +103,33 @@ config PM_SLEEP_SMP select HOTPLUG select HOTPLUG_CPU +config PM_AUTOSLEEP + bool "Opportunistic sleep" + depends on PM_SLEEP + default n + ---help--- + Allow the kernel to trigger a system transition into a global sleep + state automatically whenever there are no active wakeup sources. + +config PM_WAKELOCKS + bool "User space wakeup sources interface" + depends on PM_SLEEP + default n + ---help--- + Allow user space to create, activate and deactivate wakeup source + objects with the help of a sysfs-based interface. + +config PM_WAKELOCKS_LIMIT + int "Maximum number of user space wakeup sources (0 = no limit)" + range 0 100000 + default 100 + depends on PM_WAKELOCKS + +config PM_WAKELOCKS_GC + bool "Garbage collector for user space wakeup sources" + depends on PM_WAKELOCKS + default y + config PM_RUNTIME bool "Run-time PM core functionality" depends on !IA64_HP_SIM diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 66d808ec5252..29472bff11ef 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -9,5 +9,7 @@ obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ block_io.o +obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o +obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c new file mode 100644 index 000000000000..ca304046d9e2 --- /dev/null +++ b/kernel/power/autosleep.c @@ -0,0 +1,127 @@ +/* + * kernel/power/autosleep.c + * + * Opportunistic sleep support. + * + * Copyright (C) 2012 Rafael J. Wysocki <rjw@sisk.pl> + */ + +#include <linux/device.h> +#include <linux/mutex.h> +#include <linux/pm_wakeup.h> + +#include "power.h" + +static suspend_state_t autosleep_state; +static struct workqueue_struct *autosleep_wq; +/* + * Note: it is only safe to mutex_lock(&autosleep_lock) if a wakeup_source + * is active, otherwise a deadlock with try_to_suspend() is possible. + * Alternatively mutex_lock_interruptible() can be used. This will then fail + * if an auto_sleep cycle tries to freeze processes. + */ +static DEFINE_MUTEX(autosleep_lock); +static struct wakeup_source *autosleep_ws; + +static void try_to_suspend(struct work_struct *work) +{ + unsigned int initial_count, final_count; + + if (!pm_get_wakeup_count(&initial_count, true)) + goto out; + + mutex_lock(&autosleep_lock); + + if (!pm_save_wakeup_count(initial_count)) { + mutex_unlock(&autosleep_lock); + goto out; + } + + if (autosleep_state == PM_SUSPEND_ON) { + mutex_unlock(&autosleep_lock); + return; + } + if (autosleep_state >= PM_SUSPEND_MAX) + hibernate(); + else + pm_suspend(autosleep_state); + + mutex_unlock(&autosleep_lock); + + if (!pm_get_wakeup_count(&final_count, false)) + goto out; + + /* + * If the wakeup occured for an unknown reason, wait to prevent the + * system from trying to suspend and waking up in a tight loop. + */ + if (final_count == initial_count) + schedule_timeout_uninterruptible(HZ / 2); + + out: + queue_up_suspend_work(); +} + +static DECLARE_WORK(suspend_work, try_to_suspend); + +void queue_up_suspend_work(void) +{ + if (!work_pending(&suspend_work) && autosleep_state > PM_SUSPEND_ON) + queue_work(autosleep_wq, &suspend_work); +} + +suspend_state_t pm_autosleep_state(void) +{ + return autosleep_state; +} + +int pm_autosleep_lock(void) +{ + return mutex_lock_interruptible(&autosleep_lock); +} + +void pm_autosleep_unlock(void) +{ + mutex_unlock(&autosleep_lock); +} + +int pm_autosleep_set_state(suspend_state_t state) +{ + +#ifndef CONFIG_HIBERNATION + if (state >= PM_SUSPEND_MAX) + return -EINVAL; +#endif + + __pm_stay_awake(autosleep_ws); + + mutex_lock(&autosleep_lock); + + autosleep_state = state; + + __pm_relax(autosleep_ws); + + if (state > PM_SUSPEND_ON) { + pm_wakep_autosleep_enabled(true); + queue_up_suspend_work(); + } else { + pm_wakep_autosleep_enabled(false); + } + + mutex_unlock(&autosleep_lock); + return 0; +} + +int __init pm_autosleep_init(void) +{ + autosleep_ws = wakeup_source_register("autosleep"); + if (!autosleep_ws) + return -ENOMEM; + + autosleep_wq = alloc_ordered_workqueue("autosleep", 0); + if (autosleep_wq) + return 0; + + wakeup_source_unregister(autosleep_ws); + return -ENOMEM; +} diff --git a/kernel/power/main.c b/kernel/power/main.c index 1c12581f1c62..428f8a034e96 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -269,8 +269,7 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, return (s - buf); } -static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t n) +static suspend_state_t decode_state(const char *buf, size_t n) { #ifdef CONFIG_SUSPEND suspend_state_t state = PM_SUSPEND_STANDBY; @@ -278,27 +277,48 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, #endif char *p; int len; - int error = -EINVAL; p = memchr(buf, '\n', n); len = p ? p - buf : n; - /* First, check if we are requested to hibernate */ - if (len == 4 && !strncmp(buf, "disk", len)) { - error = hibernate(); - goto Exit; - } + /* Check hibernation first. */ + if (len == 4 && !strncmp(buf, "disk", len)) + return PM_SUSPEND_MAX; #ifdef CONFIG_SUSPEND - for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { - if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) { - error = pm_suspend(state); - break; - } - } + for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) + if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) + return state; #endif - Exit: + return PM_SUSPEND_ON; +} + +static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + suspend_state_t state; + int error; + + error = pm_autosleep_lock(); + if (error) + return error; + + if (pm_autosleep_state() > PM_SUSPEND_ON) { + error = -EBUSY; + goto out; + } + + state = decode_state(buf, n); + if (state < PM_SUSPEND_MAX) + error = pm_suspend(state); + else if (state == PM_SUSPEND_MAX) + error = hibernate(); + else + error = -EINVAL; + + out: + pm_autosleep_unlock(); return error ? error : n; } @@ -339,7 +359,8 @@ static ssize_t wakeup_count_show(struct kobject *kobj, { unsigned int val; - return pm_get_wakeup_count(&val) ? sprintf(buf, "%u\n", val) : -EINTR; + return pm_get_wakeup_count(&val, true) ? + sprintf(buf, "%u\n", val) : -EINTR; } static ssize_t wakeup_count_store(struct kobject *kobj, @@ -347,15 +368,106 @@ static ssize_t wakeup_count_store(struct kobject *kobj, const char *buf, size_t n) { unsigned int val; + int error; + + error = pm_autosleep_lock(); + if (error) + return error; + + if (pm_autosleep_state() > PM_SUSPEND_ON) { + error = -EBUSY; + goto out; + } + error = -EINVAL; if (sscanf(buf, "%u", &val) == 1) { if (pm_save_wakeup_count(val)) - return n; + error = n; } - return -EINVAL; + + out: + pm_autosleep_unlock(); + return error; } power_attr(wakeup_count); + +#ifdef CONFIG_PM_AUTOSLEEP +static ssize_t autosleep_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + suspend_state_t state = pm_autosleep_state(); + + if (state == PM_SUSPEND_ON) + return sprintf(buf, "off\n"); + +#ifdef CONFIG_SUSPEND + if (state < PM_SUSPEND_MAX) + return sprintf(buf, "%s\n", valid_state(state) ? + pm_states[state] : "error"); +#endif +#ifdef CONFIG_HIBERNATION + return sprintf(buf, "disk\n"); +#else + return sprintf(buf, "error"); +#endif +} + +static ssize_t autosleep_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + suspend_state_t state = decode_state(buf, n); + int error; + + if (state == PM_SUSPEND_ON + && strcmp(buf, "off") && strcmp(buf, "off\n")) + return -EINVAL; + + error = pm_autosleep_set_state(state); + return error ? error : n; +} + +power_attr(autosleep); +#endif /* CONFIG_PM_AUTOSLEEP */ + +#ifdef CONFIG_PM_WAKELOCKS +static ssize_t wake_lock_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return pm_show_wakelocks(buf, true); +} + +static ssize_t wake_lock_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = pm_wake_lock(buf); + return error ? error : n; +} + +power_attr(wake_lock); + +static ssize_t wake_unlock_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return pm_show_wakelocks(buf, false); +} + +static ssize_t wake_unlock_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = pm_wake_unlock(buf); + return error ? error : n; +} + +power_attr(wake_unlock); + +#endif /* CONFIG_PM_WAKELOCKS */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_TRACE @@ -409,6 +521,13 @@ static struct attribute * g[] = { #ifdef CONFIG_PM_SLEEP &pm_async_attr.attr, &wakeup_count_attr.attr, +#ifdef CONFIG_PM_AUTOSLEEP + &autosleep_attr.attr, +#endif +#ifdef CONFIG_PM_WAKELOCKS + &wake_lock_attr.attr, + &wake_unlock_attr.attr, +#endif #ifdef CONFIG_PM_DEBUG &pm_test_attr.attr, #endif @@ -444,7 +563,10 @@ static int __init pm_init(void) power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; - return sysfs_create_group(power_kobj, &attr_group); + error = sysfs_create_group(power_kobj, &attr_group); + if (error) + return error; + return pm_autosleep_init(); } core_initcall(pm_init); diff --git a/kernel/power/power.h b/kernel/power/power.h index 98f3622d7407..b0bd4beaebfe 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -264,3 +264,30 @@ static inline void suspend_thaw_processes(void) { } #endif + +#ifdef CONFIG_PM_AUTOSLEEP + +/* kernel/power/autosleep.c */ +extern int pm_autosleep_init(void); +extern int pm_autosleep_lock(void); +extern void pm_autosleep_unlock(void); +extern suspend_state_t pm_autosleep_state(void); +extern int pm_autosleep_set_state(suspend_state_t state); + +#else /* !CONFIG_PM_AUTOSLEEP */ + +static inline int pm_autosleep_init(void) { return 0; } +static inline int pm_autosleep_lock(void) { return 0; } +static inline void pm_autosleep_unlock(void) {} +static inline suspend_state_t pm_autosleep_state(void) { return PM_SUSPEND_ON; } + +#endif /* !CONFIG_PM_AUTOSLEEP */ + +#ifdef CONFIG_PM_WAKELOCKS + +/* kernel/power/wakelock.c */ +extern ssize_t pm_show_wakelocks(char *buf, bool show_active); +extern int pm_wake_lock(const char *buf); +extern int pm_wake_unlock(const char *buf); + +#endif /* !CONFIG_PM_WAKELOCKS */ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index eef311a58a64..11e22c068e8b 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -6,7 +6,7 @@ * * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> - * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com> + * Copyright (C) 2010-2012 Bojan Smojver <bojan@rexursive.com> * * This file is released under the GPLv2. * @@ -282,14 +282,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) return -ENOSPC; if (bio_chain) { - src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN | + __GFP_NORETRY); if (src) { copy_page(src, buf); } else { ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ if (ret) return ret; - src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + src = (void *)__get_free_page(__GFP_WAIT | + __GFP_NOWARN | + __GFP_NORETRY); if (src) { copy_page(src, buf); } else { @@ -367,12 +370,17 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, clear_page(handle->cur); handle->cur_swap = offset; handle->k = 0; - } - if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { - error = hib_wait_on_bio_chain(bio_chain); - if (error) - goto out; - handle->reqd_free_pages = reqd_free_pages(); + + if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { + error = hib_wait_on_bio_chain(bio_chain); + if (error) + goto out; + /* + * Recalculate the number of required free pages, to + * make sure we never take more than half. + */ + handle->reqd_free_pages = reqd_free_pages(); + } } out: return error; @@ -419,8 +427,9 @@ static int swap_writer_finish(struct swap_map_handle *handle, /* Maximum number of threads for compression/decompression. */ #define LZO_THREADS 3 -/* Maximum number of pages for read buffering. */ -#define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 8) +/* Minimum/maximum number of pages for read buffering. */ +#define LZO_MIN_RD_PAGES 1024 +#define LZO_MAX_RD_PAGES 8192 /** @@ -631,12 +640,6 @@ static int save_image_lzo(struct swap_map_handle *handle, } /* - * Adjust number of free pages after all allocations have been done. - * We don't want to run out of pages when writing. - */ - handle->reqd_free_pages = reqd_free_pages(); - - /* * Start the CRC32 thread. */ init_waitqueue_head(&crc->go); @@ -657,6 +660,12 @@ static int save_image_lzo(struct swap_map_handle *handle, goto out_clean; } + /* + * Adjust the number of required free pages after all allocations have + * been done. We don't want to run out of pages when writing. + */ + handle->reqd_free_pages = reqd_free_pages(); + printk(KERN_INFO "PM: Using %u thread(s) for compression.\n" "PM: Compressing and saving image data (%u pages) ... ", @@ -1067,7 +1076,7 @@ static int load_image_lzo(struct swap_map_handle *handle, unsigned i, thr, run_threads, nr_threads; unsigned ring = 0, pg = 0, ring_size = 0, have = 0, want, need, asked = 0; - unsigned long read_pages; + unsigned long read_pages = 0; unsigned char **page = NULL; struct dec_data *data = NULL; struct crc_data *crc = NULL; @@ -1079,7 +1088,7 @@ static int load_image_lzo(struct swap_map_handle *handle, nr_threads = num_online_cpus() - 1; nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); - page = vmalloc(sizeof(*page) * LZO_READ_PAGES); + page = vmalloc(sizeof(*page) * LZO_MAX_RD_PAGES); if (!page) { printk(KERN_ERR "PM: Failed to allocate LZO page\n"); ret = -ENOMEM; @@ -1144,15 +1153,22 @@ static int load_image_lzo(struct swap_map_handle *handle, } /* - * Adjust number of pages for read buffering, in case we are short. + * Set the number of pages for read buffering. + * This is complete guesswork, because we'll only know the real + * picture once prepare_image() is called, which is much later on + * during the image load phase. We'll assume the worst case and + * say that none of the image pages are from high memory. */ - read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1; - read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES); + if (low_free_pages() > snapshot_get_image_size()) + read_pages = (low_free_pages() - snapshot_get_image_size()) / 2; + read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES); for (i = 0; i < read_pages; i++) { page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? __GFP_WAIT | __GFP_HIGH : - __GFP_WAIT); + __GFP_WAIT | __GFP_NOWARN | + __GFP_NORETRY); + if (!page[i]) { if (i < LZO_CMP_PAGES) { ring_size = i; diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c new file mode 100644 index 000000000000..c8fba3380076 --- /dev/null +++ b/kernel/power/wakelock.c @@ -0,0 +1,259 @@ +/* + * kernel/power/wakelock.c + * + * User space wakeup sources support. + * + * Copyright (C) 2012 Rafael J. Wysocki <rjw@sisk.pl> + * + * This code is based on the analogous interface allowing user space to + * manipulate wakelocks on Android. + */ + +#include <linux/ctype.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/hrtimer.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/slab.h> + +static DEFINE_MUTEX(wakelocks_lock); + +struct wakelock { + char *name; + struct rb_node node; + struct wakeup_source ws; +#ifdef CONFIG_PM_WAKELOCKS_GC + struct list_head lru; +#endif +}; + +static struct rb_root wakelocks_tree = RB_ROOT; + +ssize_t pm_show_wakelocks(char *buf, bool show_active) +{ + struct rb_node *node; + struct wakelock *wl; + char *str = buf; + char *end = buf + PAGE_SIZE; + + mutex_lock(&wakelocks_lock); + + for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) { + wl = rb_entry(node, struct wakelock, node); + if (wl->ws.active == show_active) + str += scnprintf(str, end - str, "%s ", wl->name); + } + if (str > buf) + str--; + + str += scnprintf(str, end - str, "\n"); + + mutex_unlock(&wakelocks_lock); + return (str - buf); +} + +#if CONFIG_PM_WAKELOCKS_LIMIT > 0 +static unsigned int number_of_wakelocks; + +static inline bool wakelocks_limit_exceeded(void) +{ + return number_of_wakelocks > CONFIG_PM_WAKELOCKS_LIMIT; +} + +static inline void increment_wakelocks_number(void) +{ + number_of_wakelocks++; +} + +static inline void decrement_wakelocks_number(void) +{ + number_of_wakelocks--; +} +#else /* CONFIG_PM_WAKELOCKS_LIMIT = 0 */ +static inline bool wakelocks_limit_exceeded(void) { return false; } +static inline void increment_wakelocks_number(void) {} +static inline void decrement_wakelocks_number(void) {} +#endif /* CONFIG_PM_WAKELOCKS_LIMIT */ + +#ifdef CONFIG_PM_WAKELOCKS_GC +#define WL_GC_COUNT_MAX 100 +#define WL_GC_TIME_SEC 300 + +static LIST_HEAD(wakelocks_lru_list); +static unsigned int wakelocks_gc_count; + +static inline void wakelocks_lru_add(struct wakelock *wl) +{ + list_add(&wl->lru, &wakelocks_lru_list); +} + +static inline void wakelocks_lru_most_recent(struct wakelock *wl) +{ + list_move(&wl->lru, &wakelocks_lru_list); +} + +static void wakelocks_gc(void) +{ + struct wakelock *wl, *aux; + ktime_t now; + + if (++wakelocks_gc_count <= WL_GC_COUNT_MAX) + return; + + now = ktime_get(); + list_for_each_entry_safe_reverse(wl, aux, &wakelocks_lru_list, lru) { + u64 idle_time_ns; + bool active; + + spin_lock_irq(&wl->ws.lock); + idle_time_ns = ktime_to_ns(ktime_sub(now, wl->ws.last_time)); + active = wl->ws.active; + spin_unlock_irq(&wl->ws.lock); + + if (idle_time_ns < ((u64)WL_GC_TIME_SEC * NSEC_PER_SEC)) + break; + + if (!active) { + wakeup_source_remove(&wl->ws); + rb_erase(&wl->node, &wakelocks_tree); + list_del(&wl->lru); + kfree(wl->name); + kfree(wl); + decrement_wakelocks_number(); + } + } + wakelocks_gc_count = 0; +} +#else /* !CONFIG_PM_WAKELOCKS_GC */ +static inline void wakelocks_lru_add(struct wakelock *wl) {} +static inline void wakelocks_lru_most_recent(struct wakelock *wl) {} +static inline void wakelocks_gc(void) {} +#endif /* !CONFIG_PM_WAKELOCKS_GC */ + +static struct wakelock *wakelock_lookup_add(const char *name, size_t len, + bool add_if_not_found) +{ + struct rb_node **node = &wakelocks_tree.rb_node; + struct rb_node *parent = *node; + struct wakelock *wl; + + while (*node) { + int diff; + + parent = *node; + wl = rb_entry(*node, struct wakelock, node); + diff = strncmp(name, wl->name, len); + if (diff == 0) { + if (wl->name[len]) + diff = -1; + else + return wl; + } + if (diff < 0) + node = &(*node)->rb_left; + else + node = &(*node)->rb_right; + } + if (!add_if_not_found) + return ERR_PTR(-EINVAL); + + if (wakelocks_limit_exceeded()) + return ERR_PTR(-ENOSPC); + + /* Not found, we have to add a new one. */ + wl = kzalloc(sizeof(*wl), GFP_KERNEL); + if (!wl) + return ERR_PTR(-ENOMEM); + + wl->name = kstrndup(name, len, GFP_KERNEL); + if (!wl->name) { + kfree(wl); + return ERR_PTR(-ENOMEM); + } + wl->ws.name = wl->name; + wakeup_source_add(&wl->ws); + rb_link_node(&wl->node, parent, node); + rb_insert_color(&wl->node, &wakelocks_tree); + wakelocks_lru_add(wl); + increment_wakelocks_number(); + return wl; +} + +int pm_wake_lock(const char *buf) +{ + const char *str = buf; + struct wakelock *wl; + u64 timeout_ns = 0; + size_t len; + int ret = 0; + + while (*str && !isspace(*str)) + str++; + + len = str - buf; + if (!len) + return -EINVAL; + + if (*str && *str != '\n') { + /* Find out if there's a valid timeout string appended. */ + ret = kstrtou64(skip_spaces(str), 10, &timeout_ns); + if (ret) + return -EINVAL; + } + + mutex_lock(&wakelocks_lock); + + wl = wakelock_lookup_add(buf, len, true); + if (IS_ERR(wl)) { + ret = PTR_ERR(wl); + goto out; + } + if (timeout_ns) { + u64 timeout_ms = timeout_ns + NSEC_PER_MSEC - 1; + + do_div(timeout_ms, NSEC_PER_MSEC); + __pm_wakeup_event(&wl->ws, timeout_ms); + } else { + __pm_stay_awake(&wl->ws); + } + + wakelocks_lru_most_recent(wl); + + out: + mutex_unlock(&wakelocks_lock); + return ret; +} + +int pm_wake_unlock(const char *buf) +{ + struct wakelock *wl; + size_t len; + int ret = 0; + + len = strlen(buf); + if (!len) + return -EINVAL; + + if (buf[len-1] == '\n') + len--; + + if (!len) + return -EINVAL; + + mutex_lock(&wakelocks_lock); + + wl = wakelock_lookup_add(buf, len, false); + if (IS_ERR(wl)) { + ret = PTR_ERR(wl); + goto out; + } + __pm_relax(&wl->ws); + + wakelocks_lru_most_recent(wl); + wakelocks_gc(); + + out: + mutex_unlock(&wakelocks_lock); + return ret; +} |