summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2019-04-25 23:17:07 +0200
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2019-04-25 23:17:07 +0200
commit40aa4db6d530e2ea71e00765df77ac78477e8348 (patch)
tree0a8c4d8d7fd610077b7053609acf981f18c5b043
parent3ccf3f0cd1971e007680114ff732e8a717aafbf8 (diff)
parente94999688e3aa3c0a8ad5a60352cdc3ca3030434 (diff)
downloadlwn-40aa4db6d530e2ea71e00765df77ac78477e8348.tar.gz
lwn-40aa4db6d530e2ea71e00765df77ac78477e8348.zip
Merge cpuidle material depended on by the subsequent changes.
-rw-r--r--drivers/base/power/domain.c77
-rw-r--r--drivers/base/power/domain_governor.c67
-rw-r--r--drivers/cpuidle/cpuidle-exynos.c2
-rw-r--r--drivers/cpuidle/cpuidle.c19
-rw-r--r--include/linux/cpuidle.h1
-rw-r--r--include/linux/pm_domain.h21
-rw-r--r--include/linux/tick.h7
-rw-r--r--kernel/time/tick-sched.c12
8 files changed, 197 insertions, 9 deletions
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index d97bcf6918fb..8362dfe187f5 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -22,6 +22,7 @@
#include <linux/sched.h>
#include <linux/suspend.h>
#include <linux/export.h>
+#include <linux/cpu.h>
#include "power.h"
@@ -128,6 +129,7 @@ static const struct genpd_lock_ops genpd_spin_ops = {
#define genpd_is_irq_safe(genpd) (genpd->flags & GENPD_FLAG_IRQ_SAFE)
#define genpd_is_always_on(genpd) (genpd->flags & GENPD_FLAG_ALWAYS_ON)
#define genpd_is_active_wakeup(genpd) (genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP)
+#define genpd_is_cpu_domain(genpd) (genpd->flags & GENPD_FLAG_CPU_DOMAIN)
static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
const struct generic_pm_domain *genpd)
@@ -1452,6 +1454,56 @@ static void genpd_free_dev_data(struct device *dev,
dev_pm_put_subsys_data(dev);
}
+static void __genpd_update_cpumask(struct generic_pm_domain *genpd,
+ int cpu, bool set, unsigned int depth)
+{
+ struct gpd_link *link;
+
+ if (!genpd_is_cpu_domain(genpd))
+ return;
+
+ list_for_each_entry(link, &genpd->slave_links, slave_node) {
+ struct generic_pm_domain *master = link->master;
+
+ genpd_lock_nested(master, depth + 1);
+ __genpd_update_cpumask(master, cpu, set, depth + 1);
+ genpd_unlock(master);
+ }
+
+ if (set)
+ cpumask_set_cpu(cpu, genpd->cpus);
+ else
+ cpumask_clear_cpu(cpu, genpd->cpus);
+}
+
+static void genpd_update_cpumask(struct generic_pm_domain *genpd,
+ struct device *dev, bool set)
+{
+ int cpu;
+
+ if (!genpd_is_cpu_domain(genpd))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ if (get_cpu_device(cpu) == dev) {
+ __genpd_update_cpumask(genpd, cpu, set, 0);
+ return;
+ }
+ }
+}
+
+static void genpd_set_cpumask(struct generic_pm_domain *genpd,
+ struct device *dev)
+{
+ genpd_update_cpumask(genpd, dev, true);
+}
+
+static void genpd_clear_cpumask(struct generic_pm_domain *genpd,
+ struct device *dev)
+{
+ genpd_update_cpumask(genpd, dev, false);
+}
+
static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
struct gpd_timing_data *td)
{
@@ -1473,6 +1525,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
genpd_lock(genpd);
+ genpd_set_cpumask(genpd, dev);
dev_pm_domain_set(dev, &genpd->domain);
genpd->device_count++;
@@ -1530,6 +1583,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd,
genpd->device_count--;
genpd->max_off_time_changed = true;
+ genpd_clear_cpumask(genpd, dev);
dev_pm_domain_set(dev, NULL);
list_del_init(&pdd->list_node);
@@ -1684,6 +1738,12 @@ out:
}
EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain);
+static void genpd_free_default_power_state(struct genpd_power_state *states,
+ unsigned int state_count)
+{
+ kfree(states);
+}
+
static int genpd_set_default_power_state(struct generic_pm_domain *genpd)
{
struct genpd_power_state *state;
@@ -1694,7 +1754,7 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd)
genpd->states = state;
genpd->state_count = 1;
- genpd->free = state;
+ genpd->free_states = genpd_free_default_power_state;
return 0;
}
@@ -1760,11 +1820,18 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
if (genpd_is_always_on(genpd) && !genpd_status_on(genpd))
return -EINVAL;
+ if (genpd_is_cpu_domain(genpd) &&
+ !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL))
+ return -ENOMEM;
+
/* Use only one "off" state if there were no states declared */
if (genpd->state_count == 0) {
ret = genpd_set_default_power_state(genpd);
- if (ret)
+ if (ret) {
+ if (genpd_is_cpu_domain(genpd))
+ free_cpumask_var(genpd->cpus);
return ret;
+ }
} else if (!gov && genpd->state_count > 1) {
pr_warn("%s: no governor for states\n", genpd->name);
}
@@ -1810,7 +1877,11 @@ static int genpd_remove(struct generic_pm_domain *genpd)
list_del(&genpd->gpd_list_node);
genpd_unlock(genpd);
cancel_work_sync(&genpd->power_off_work);
- kfree(genpd->free);
+ if (genpd_is_cpu_domain(genpd))
+ free_cpumask_var(genpd->cpus);
+ if (genpd->free_states)
+ genpd->free_states(genpd->states, genpd->state_count);
+
pr_debug("%s: removed %s\n", __func__, genpd->name);
return 0;
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index 4d07e38a8247..7912bc957244 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -10,6 +10,9 @@
#include <linux/pm_domain.h>
#include <linux/pm_qos.h>
#include <linux/hrtimer.h>
+#include <linux/cpuidle.h>
+#include <linux/cpumask.h>
+#include <linux/ktime.h>
static int dev_update_qos_constraint(struct device *dev, void *data)
{
@@ -210,8 +213,10 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
struct generic_pm_domain *genpd = pd_to_genpd(pd);
struct gpd_link *link;
- if (!genpd->max_off_time_changed)
+ if (!genpd->max_off_time_changed) {
+ genpd->state_idx = genpd->cached_power_down_state_idx;
return genpd->cached_power_down_ok;
+ }
/*
* We have to invalidate the cached results for the masters, so
@@ -236,6 +241,7 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
genpd->state_idx--;
}
+ genpd->cached_power_down_state_idx = genpd->state_idx;
return genpd->cached_power_down_ok;
}
@@ -244,6 +250,65 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain)
return false;
}
+#ifdef CONFIG_CPU_IDLE
+static bool cpu_power_down_ok(struct dev_pm_domain *pd)
+{
+ struct generic_pm_domain *genpd = pd_to_genpd(pd);
+ struct cpuidle_device *dev;
+ ktime_t domain_wakeup, next_hrtimer;
+ s64 idle_duration_ns;
+ int cpu, i;
+
+ /* Validate dev PM QoS constraints. */
+ if (!default_power_down_ok(pd))
+ return false;
+
+ if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN))
+ return true;
+
+ /*
+ * Find the next wakeup for any of the online CPUs within the PM domain
+ * and its subdomains. Note, we only need the genpd->cpus, as it already
+ * contains a mask of all CPUs from subdomains.
+ */
+ domain_wakeup = ktime_set(KTIME_SEC_MAX, 0);
+ for_each_cpu_and(cpu, genpd->cpus, cpu_online_mask) {
+ dev = per_cpu(cpuidle_devices, cpu);
+ if (dev) {
+ next_hrtimer = READ_ONCE(dev->next_hrtimer);
+ if (ktime_before(next_hrtimer, domain_wakeup))
+ domain_wakeup = next_hrtimer;
+ }
+ }
+
+ /* The minimum idle duration is from now - until the next wakeup. */
+ idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, ktime_get()));
+ if (idle_duration_ns <= 0)
+ return false;
+
+ /*
+ * Find the deepest idle state that has its residency value satisfied
+ * and by also taking into account the power off latency for the state.
+ * Start at the state picked by the dev PM QoS constraint validation.
+ */
+ i = genpd->state_idx;
+ do {
+ if (idle_duration_ns >= (genpd->states[i].residency_ns +
+ genpd->states[i].power_off_latency_ns)) {
+ genpd->state_idx = i;
+ return true;
+ }
+ } while (--i >= 0);
+
+ return false;
+}
+
+struct dev_power_governor pm_domain_cpu_gov = {
+ .suspend_ok = default_suspend_ok,
+ .power_down_ok = cpu_power_down_ok,
+};
+#endif
+
struct dev_power_governor simple_qos_governor = {
.suspend_ok = default_suspend_ok,
.power_down_ok = default_power_down_ok,
diff --git a/drivers/cpuidle/cpuidle-exynos.c b/drivers/cpuidle/cpuidle-exynos.c
index 0171a6e190d7..f7199a35cbb6 100644
--- a/drivers/cpuidle/cpuidle-exynos.c
+++ b/drivers/cpuidle/cpuidle-exynos.c
@@ -84,7 +84,7 @@ static struct cpuidle_driver exynos_idle_driver = {
[1] = {
.enter = exynos_enter_lowpower,
.exit_latency = 300,
- .target_residency = 100000,
+ .target_residency = 10000,
.name = "C1",
.desc = "ARM power down",
},
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 7f108309e871..0f4b7c45df3e 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -328,9 +328,23 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
int index)
{
+ int ret = 0;
+
+ /*
+ * Store the next hrtimer, which becomes either next tick or the next
+ * timer event, whatever expires first. Additionally, to make this data
+ * useful for consumers outside cpuidle, we rely on that the governor's
+ * ->select() callback have decided, whether to stop the tick or not.
+ */
+ WRITE_ONCE(dev->next_hrtimer, tick_nohz_get_next_hrtimer());
+
if (cpuidle_state_is_coupled(drv, index))
- return cpuidle_enter_state_coupled(dev, drv, index);
- return cpuidle_enter_state(dev, drv, index);
+ ret = cpuidle_enter_state_coupled(dev, drv, index);
+ else
+ ret = cpuidle_enter_state(dev, drv, index);
+
+ WRITE_ONCE(dev->next_hrtimer, 0);
+ return ret;
}
/**
@@ -511,6 +525,7 @@ static void __cpuidle_device_init(struct cpuidle_device *dev)
{
memset(dev->states_usage, 0, sizeof(dev->states_usage));
dev->last_residency = 0;
+ dev->next_hrtimer = 0;
}
/**
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 3b39472324a3..bb9a0db89f1a 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -83,6 +83,7 @@ struct cpuidle_device {
unsigned int use_deepest_state:1;
unsigned int poll_time_limit:1;
unsigned int cpu;
+ ktime_t next_hrtimer;
int last_residency;
struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX];
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 1ed5874bcee0..bc82e74560ee 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -16,6 +16,7 @@
#include <linux/of.h>
#include <linux/notifier.h>
#include <linux/spinlock.h>
+#include <linux/cpumask.h>
/*
* Flags to control the behaviour of a genpd.
@@ -42,11 +43,22 @@
* GENPD_FLAG_ACTIVE_WAKEUP: Instructs genpd to keep the PM domain powered
* on, in case any of its attached devices is used
* in the wakeup path to serve system wakeups.
+ *
+ * GENPD_FLAG_CPU_DOMAIN: Instructs genpd that it should expect to get
+ * devices attached, which may belong to CPUs or
+ * possibly have subdomains with CPUs attached.
+ * This flag enables the genpd backend driver to
+ * deploy idle power management support for CPUs
+ * and groups of CPUs. Note that, the backend
+ * driver must then comply with the so called,
+ * last-man-standing algorithm, for the CPUs in the
+ * PM domain.
*/
#define GENPD_FLAG_PM_CLK (1U << 0)
#define GENPD_FLAG_IRQ_SAFE (1U << 1)
#define GENPD_FLAG_ALWAYS_ON (1U << 2)
#define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3)
+#define GENPD_FLAG_CPU_DOMAIN (1U << 4)
enum gpd_status {
GPD_STATE_ACTIVE = 0, /* PM domain is active */
@@ -69,6 +81,7 @@ struct genpd_power_state {
s64 residency_ns;
struct fwnode_handle *fwnode;
ktime_t idle_time;
+ void *data;
};
struct genpd_lock_ops;
@@ -93,6 +106,7 @@ struct generic_pm_domain {
unsigned int suspended_count; /* System suspend device counter */
unsigned int prepared_count; /* Suspend counter of prepared devices */
unsigned int performance_state; /* Aggregated max performance state */
+ cpumask_var_t cpus; /* A cpumask of the attached CPUs */
int (*power_off)(struct generic_pm_domain *domain);
int (*power_on)(struct generic_pm_domain *domain);
struct opp_table *opp_table; /* OPP table of the genpd */
@@ -104,15 +118,17 @@ struct generic_pm_domain {
s64 max_off_time_ns; /* Maximum allowed "suspended" time. */
bool max_off_time_changed;
bool cached_power_down_ok;
+ bool cached_power_down_state_idx;
int (*attach_dev)(struct generic_pm_domain *domain,
struct device *dev);
void (*detach_dev)(struct generic_pm_domain *domain,
struct device *dev);
unsigned int flags; /* Bit field of configs for genpd */
struct genpd_power_state *states;
+ void (*free_states)(struct genpd_power_state *states,
+ unsigned int state_count);
unsigned int state_count; /* number of states */
unsigned int state_idx; /* state that genpd will go to when off */
- void *free; /* Free the state that was allocated for default */
ktime_t on_time;
ktime_t accounting_time;
const struct genpd_lock_ops *lock_ops;
@@ -187,6 +203,9 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state);
extern struct dev_power_governor simple_qos_governor;
extern struct dev_power_governor pm_domain_always_on_gov;
+#ifdef CONFIG_CPU_IDLE
+extern struct dev_power_governor pm_domain_cpu_gov;
+#endif
#else
static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev)
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 55388ab45fd4..8891b5ac3e40 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -122,6 +122,7 @@ extern void tick_nohz_idle_enter(void);
extern void tick_nohz_idle_exit(void);
extern void tick_nohz_irq_exit(void);
extern bool tick_nohz_idle_got_tick(void);
+extern ktime_t tick_nohz_get_next_hrtimer(void);
extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next);
extern unsigned long tick_nohz_get_idle_calls(void);
extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
@@ -145,7 +146,11 @@ static inline void tick_nohz_idle_restart_tick(void) { }
static inline void tick_nohz_idle_enter(void) { }
static inline void tick_nohz_idle_exit(void) { }
static inline bool tick_nohz_idle_got_tick(void) { return false; }
-
+static inline ktime_t tick_nohz_get_next_hrtimer(void)
+{
+ /* Next wake up is the tick period, assume it starts now */
+ return ktime_add(ktime_get(), TICK_NSEC);
+}
static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
{
*delta_next = TICK_NSEC;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6fa52cd6df0b..8d18e03124ff 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1023,6 +1023,18 @@ bool tick_nohz_idle_got_tick(void)
}
/**
+ * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
+ * or the tick, whatever that expires first. Note that, if the tick has been
+ * stopped, it returns the next hrtimer.
+ *
+ * Called from power state control code with interrupts disabled
+ */
+ktime_t tick_nohz_get_next_hrtimer(void)
+{
+ return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
+}
+
+/**
* tick_nohz_get_sleep_length - return the expected length of the current sleep
* @delta_next: duration until the next event if the tick cannot be stopped
*