diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2019-04-25 23:17:07 +0200 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2019-04-25 23:17:07 +0200 |
commit | 40aa4db6d530e2ea71e00765df77ac78477e8348 (patch) | |
tree | 0a8c4d8d7fd610077b7053609acf981f18c5b043 | |
parent | 3ccf3f0cd1971e007680114ff732e8a717aafbf8 (diff) | |
parent | e94999688e3aa3c0a8ad5a60352cdc3ca3030434 (diff) | |
download | lwn-40aa4db6d530e2ea71e00765df77ac78477e8348.tar.gz lwn-40aa4db6d530e2ea71e00765df77ac78477e8348.zip |
Merge cpuidle material depended on by the subsequent changes.
-rw-r--r-- | drivers/base/power/domain.c | 77 | ||||
-rw-r--r-- | drivers/base/power/domain_governor.c | 67 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-exynos.c | 2 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle.c | 19 | ||||
-rw-r--r-- | include/linux/cpuidle.h | 1 | ||||
-rw-r--r-- | include/linux/pm_domain.h | 21 | ||||
-rw-r--r-- | include/linux/tick.h | 7 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 12 |
8 files changed, 197 insertions, 9 deletions
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index d97bcf6918fb..8362dfe187f5 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -22,6 +22,7 @@ #include <linux/sched.h> #include <linux/suspend.h> #include <linux/export.h> +#include <linux/cpu.h> #include "power.h" @@ -128,6 +129,7 @@ static const struct genpd_lock_ops genpd_spin_ops = { #define genpd_is_irq_safe(genpd) (genpd->flags & GENPD_FLAG_IRQ_SAFE) #define genpd_is_always_on(genpd) (genpd->flags & GENPD_FLAG_ALWAYS_ON) #define genpd_is_active_wakeup(genpd) (genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP) +#define genpd_is_cpu_domain(genpd) (genpd->flags & GENPD_FLAG_CPU_DOMAIN) static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, const struct generic_pm_domain *genpd) @@ -1452,6 +1454,56 @@ static void genpd_free_dev_data(struct device *dev, dev_pm_put_subsys_data(dev); } +static void __genpd_update_cpumask(struct generic_pm_domain *genpd, + int cpu, bool set, unsigned int depth) +{ + struct gpd_link *link; + + if (!genpd_is_cpu_domain(genpd)) + return; + + list_for_each_entry(link, &genpd->slave_links, slave_node) { + struct generic_pm_domain *master = link->master; + + genpd_lock_nested(master, depth + 1); + __genpd_update_cpumask(master, cpu, set, depth + 1); + genpd_unlock(master); + } + + if (set) + cpumask_set_cpu(cpu, genpd->cpus); + else + cpumask_clear_cpu(cpu, genpd->cpus); +} + +static void genpd_update_cpumask(struct generic_pm_domain *genpd, + struct device *dev, bool set) +{ + int cpu; + + if (!genpd_is_cpu_domain(genpd)) + return; + + for_each_possible_cpu(cpu) { + if (get_cpu_device(cpu) == dev) { + __genpd_update_cpumask(genpd, cpu, set, 0); + return; + } + } +} + +static void genpd_set_cpumask(struct generic_pm_domain *genpd, + struct device *dev) +{ + genpd_update_cpumask(genpd, dev, true); +} + +static void genpd_clear_cpumask(struct generic_pm_domain *genpd, + struct device *dev) +{ + genpd_update_cpumask(genpd, dev, false); +} + static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, struct gpd_timing_data *td) { @@ -1473,6 +1525,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, genpd_lock(genpd); + genpd_set_cpumask(genpd, dev); dev_pm_domain_set(dev, &genpd->domain); genpd->device_count++; @@ -1530,6 +1583,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, genpd->device_count--; genpd->max_off_time_changed = true; + genpd_clear_cpumask(genpd, dev); dev_pm_domain_set(dev, NULL); list_del_init(&pdd->list_node); @@ -1684,6 +1738,12 @@ out: } EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain); +static void genpd_free_default_power_state(struct genpd_power_state *states, + unsigned int state_count) +{ + kfree(states); +} + static int genpd_set_default_power_state(struct generic_pm_domain *genpd) { struct genpd_power_state *state; @@ -1694,7 +1754,7 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd) genpd->states = state; genpd->state_count = 1; - genpd->free = state; + genpd->free_states = genpd_free_default_power_state; return 0; } @@ -1760,11 +1820,18 @@ int pm_genpd_init(struct generic_pm_domain *genpd, if (genpd_is_always_on(genpd) && !genpd_status_on(genpd)) return -EINVAL; + if (genpd_is_cpu_domain(genpd) && + !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL)) + return -ENOMEM; + /* Use only one "off" state if there were no states declared */ if (genpd->state_count == 0) { ret = genpd_set_default_power_state(genpd); - if (ret) + if (ret) { + if (genpd_is_cpu_domain(genpd)) + free_cpumask_var(genpd->cpus); return ret; + } } else if (!gov && genpd->state_count > 1) { pr_warn("%s: no governor for states\n", genpd->name); } @@ -1810,7 +1877,11 @@ static int genpd_remove(struct generic_pm_domain *genpd) list_del(&genpd->gpd_list_node); genpd_unlock(genpd); cancel_work_sync(&genpd->power_off_work); - kfree(genpd->free); + if (genpd_is_cpu_domain(genpd)) + free_cpumask_var(genpd->cpus); + if (genpd->free_states) + genpd->free_states(genpd->states, genpd->state_count); + pr_debug("%s: removed %s\n", __func__, genpd->name); return 0; diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 4d07e38a8247..7912bc957244 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -10,6 +10,9 @@ #include <linux/pm_domain.h> #include <linux/pm_qos.h> #include <linux/hrtimer.h> +#include <linux/cpuidle.h> +#include <linux/cpumask.h> +#include <linux/ktime.h> static int dev_update_qos_constraint(struct device *dev, void *data) { @@ -210,8 +213,10 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) struct generic_pm_domain *genpd = pd_to_genpd(pd); struct gpd_link *link; - if (!genpd->max_off_time_changed) + if (!genpd->max_off_time_changed) { + genpd->state_idx = genpd->cached_power_down_state_idx; return genpd->cached_power_down_ok; + } /* * We have to invalidate the cached results for the masters, so @@ -236,6 +241,7 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) genpd->state_idx--; } + genpd->cached_power_down_state_idx = genpd->state_idx; return genpd->cached_power_down_ok; } @@ -244,6 +250,65 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain) return false; } +#ifdef CONFIG_CPU_IDLE +static bool cpu_power_down_ok(struct dev_pm_domain *pd) +{ + struct generic_pm_domain *genpd = pd_to_genpd(pd); + struct cpuidle_device *dev; + ktime_t domain_wakeup, next_hrtimer; + s64 idle_duration_ns; + int cpu, i; + + /* Validate dev PM QoS constraints. */ + if (!default_power_down_ok(pd)) + return false; + + if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN)) + return true; + + /* + * Find the next wakeup for any of the online CPUs within the PM domain + * and its subdomains. Note, we only need the genpd->cpus, as it already + * contains a mask of all CPUs from subdomains. + */ + domain_wakeup = ktime_set(KTIME_SEC_MAX, 0); + for_each_cpu_and(cpu, genpd->cpus, cpu_online_mask) { + dev = per_cpu(cpuidle_devices, cpu); + if (dev) { + next_hrtimer = READ_ONCE(dev->next_hrtimer); + if (ktime_before(next_hrtimer, domain_wakeup)) + domain_wakeup = next_hrtimer; + } + } + + /* The minimum idle duration is from now - until the next wakeup. */ + idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, ktime_get())); + if (idle_duration_ns <= 0) + return false; + + /* + * Find the deepest idle state that has its residency value satisfied + * and by also taking into account the power off latency for the state. + * Start at the state picked by the dev PM QoS constraint validation. + */ + i = genpd->state_idx; + do { + if (idle_duration_ns >= (genpd->states[i].residency_ns + + genpd->states[i].power_off_latency_ns)) { + genpd->state_idx = i; + return true; + } + } while (--i >= 0); + + return false; +} + +struct dev_power_governor pm_domain_cpu_gov = { + .suspend_ok = default_suspend_ok, + .power_down_ok = cpu_power_down_ok, +}; +#endif + struct dev_power_governor simple_qos_governor = { .suspend_ok = default_suspend_ok, .power_down_ok = default_power_down_ok, diff --git a/drivers/cpuidle/cpuidle-exynos.c b/drivers/cpuidle/cpuidle-exynos.c index 0171a6e190d7..f7199a35cbb6 100644 --- a/drivers/cpuidle/cpuidle-exynos.c +++ b/drivers/cpuidle/cpuidle-exynos.c @@ -84,7 +84,7 @@ static struct cpuidle_driver exynos_idle_driver = { [1] = { .enter = exynos_enter_lowpower, .exit_latency = 300, - .target_residency = 100000, + .target_residency = 10000, .name = "C1", .desc = "ARM power down", }, diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 7f108309e871..0f4b7c45df3e 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -328,9 +328,23 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) { + int ret = 0; + + /* + * Store the next hrtimer, which becomes either next tick or the next + * timer event, whatever expires first. Additionally, to make this data + * useful for consumers outside cpuidle, we rely on that the governor's + * ->select() callback have decided, whether to stop the tick or not. + */ + WRITE_ONCE(dev->next_hrtimer, tick_nohz_get_next_hrtimer()); + if (cpuidle_state_is_coupled(drv, index)) - return cpuidle_enter_state_coupled(dev, drv, index); - return cpuidle_enter_state(dev, drv, index); + ret = cpuidle_enter_state_coupled(dev, drv, index); + else + ret = cpuidle_enter_state(dev, drv, index); + + WRITE_ONCE(dev->next_hrtimer, 0); + return ret; } /** @@ -511,6 +525,7 @@ static void __cpuidle_device_init(struct cpuidle_device *dev) { memset(dev->states_usage, 0, sizeof(dev->states_usage)); dev->last_residency = 0; + dev->next_hrtimer = 0; } /** diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 3b39472324a3..bb9a0db89f1a 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -83,6 +83,7 @@ struct cpuidle_device { unsigned int use_deepest_state:1; unsigned int poll_time_limit:1; unsigned int cpu; + ktime_t next_hrtimer; int last_residency; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1ed5874bcee0..bc82e74560ee 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -16,6 +16,7 @@ #include <linux/of.h> #include <linux/notifier.h> #include <linux/spinlock.h> +#include <linux/cpumask.h> /* * Flags to control the behaviour of a genpd. @@ -42,11 +43,22 @@ * GENPD_FLAG_ACTIVE_WAKEUP: Instructs genpd to keep the PM domain powered * on, in case any of its attached devices is used * in the wakeup path to serve system wakeups. + * + * GENPD_FLAG_CPU_DOMAIN: Instructs genpd that it should expect to get + * devices attached, which may belong to CPUs or + * possibly have subdomains with CPUs attached. + * This flag enables the genpd backend driver to + * deploy idle power management support for CPUs + * and groups of CPUs. Note that, the backend + * driver must then comply with the so called, + * last-man-standing algorithm, for the CPUs in the + * PM domain. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) #define GENPD_FLAG_ALWAYS_ON (1U << 2) #define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3) +#define GENPD_FLAG_CPU_DOMAIN (1U << 4) enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ @@ -69,6 +81,7 @@ struct genpd_power_state { s64 residency_ns; struct fwnode_handle *fwnode; ktime_t idle_time; + void *data; }; struct genpd_lock_ops; @@ -93,6 +106,7 @@ struct generic_pm_domain { unsigned int suspended_count; /* System suspend device counter */ unsigned int prepared_count; /* Suspend counter of prepared devices */ unsigned int performance_state; /* Aggregated max performance state */ + cpumask_var_t cpus; /* A cpumask of the attached CPUs */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); struct opp_table *opp_table; /* OPP table of the genpd */ @@ -104,15 +118,17 @@ struct generic_pm_domain { s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ bool max_off_time_changed; bool cached_power_down_ok; + bool cached_power_down_state_idx; int (*attach_dev)(struct generic_pm_domain *domain, struct device *dev); void (*detach_dev)(struct generic_pm_domain *domain, struct device *dev); unsigned int flags; /* Bit field of configs for genpd */ struct genpd_power_state *states; + void (*free_states)(struct genpd_power_state *states, + unsigned int state_count); unsigned int state_count; /* number of states */ unsigned int state_idx; /* state that genpd will go to when off */ - void *free; /* Free the state that was allocated for default */ ktime_t on_time; ktime_t accounting_time; const struct genpd_lock_ops *lock_ops; @@ -187,6 +203,9 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; +#ifdef CONFIG_CPU_IDLE +extern struct dev_power_governor pm_domain_cpu_gov; +#endif #else static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) diff --git a/include/linux/tick.h b/include/linux/tick.h index 55388ab45fd4..8891b5ac3e40 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -122,6 +122,7 @@ extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern bool tick_nohz_idle_got_tick(void); +extern ktime_t tick_nohz_get_next_hrtimer(void); extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next); extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); @@ -145,7 +146,11 @@ static inline void tick_nohz_idle_restart_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } static inline bool tick_nohz_idle_got_tick(void) { return false; } - +static inline ktime_t tick_nohz_get_next_hrtimer(void) +{ + /* Next wake up is the tick period, assume it starts now */ + return ktime_add(ktime_get(), TICK_NSEC); +} static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) { *delta_next = TICK_NSEC; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6fa52cd6df0b..8d18e03124ff 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1023,6 +1023,18 @@ bool tick_nohz_idle_got_tick(void) } /** + * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer + * or the tick, whatever that expires first. Note that, if the tick has been + * stopped, it returns the next hrtimer. + * + * Called from power state control code with interrupts disabled + */ +ktime_t tick_nohz_get_next_hrtimer(void) +{ + return __this_cpu_read(tick_cpu_device.evtdev)->next_event; +} + +/** * tick_nohz_get_sleep_length - return the expected length of the current sleep * @delta_next: duration until the next event if the tick cannot be stopped * |