From e5f1b245870d59be0e6cc3b33edf5406a3b59648 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 5 Oct 2016 09:33:12 +0200 Subject: cpuidle: governors: Remove remaining old module code The governor's code use try_module_get() and put_module() to refcount the governor's module. But the governors are not compiled as module. The refcount does not prevent to switch the governor or unload a module as they aren't compiled as modules. The code is pointless, so remove it. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governor.c | 4 ---- drivers/cpuidle/governors/ladder.c | 2 -- drivers/cpuidle/governors/menu.c | 2 -- include/linux/cpuidle.h | 2 -- 4 files changed, 10 deletions(-) diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index fb9f511cca23..4e78263e34a4 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -9,7 +9,6 @@ */ #include -#include #include #include "cpuidle.h" @@ -53,14 +52,11 @@ int cpuidle_switch_governor(struct cpuidle_governor *gov) if (cpuidle_curr_governor) { list_for_each_entry(dev, &cpuidle_detected_devices, device_list) cpuidle_disable_device(dev); - module_put(cpuidle_curr_governor->owner); } cpuidle_curr_governor = gov; if (gov) { - if (!try_module_get(cpuidle_curr_governor->owner)) - return -EINVAL; list_for_each_entry(dev, &cpuidle_detected_devices, device_list) cpuidle_enable_device(dev); cpuidle_install_idle_handler(); diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index 63bd5a403e22..fe8f08948fcb 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -177,7 +176,6 @@ static struct cpuidle_governor ladder_governor = { .enable = ladder_enable_device, .select = ladder_select_state, .reflect = ladder_reflect, - .owner = THIS_MODULE, }; /** diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 03d38c291de6..d9b5b9398a0f 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -19,7 +19,6 @@ #include #include #include -#include /* * Please note when changing the tuning values: @@ -484,7 +483,6 @@ static struct cpuidle_governor menu_governor = { .enable = menu_enable_device, .select = menu_select, .reflect = menu_reflect, - .owner = THIS_MODULE, }; /** diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index bb31373c3478..15deea449edc 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -235,8 +235,6 @@ struct cpuidle_governor { int (*select) (struct cpuidle_driver *drv, struct cpuidle_device *dev); void (*reflect) (struct cpuidle_device *dev, int index); - - struct module *owner; }; #ifdef CONFIG_CPU_IDLE -- cgit v1.2.3 From f5261402494a4893e7c5c6cd5ab99b8f7589b717 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Mon, 10 Oct 2016 20:44:22 +0800 Subject: PM / AVS: rockchip-io: make the log more consistent When testing SD hotplug automatically, I got bunch of useless log like this: [ 588.357838] mmc0: card 0007 removed [ 589.492664] rockchip-iodomain ff770000.syscon:io-domains: Setting to 3300000 done [ 589.500698] vccio_sd: ramp_delay not set [ 589.504817] rockchip-iodomain ff770000.syscon:io-domains: Setting to 3300000 done [ 589.669705] rockchip-iodomain ff770000.syscon:io-domains: Setting to 3300000 done [ 589.677593] vccio_sd: ramp_delay not set [ 589.681581] rockchip-iodomain ff770000.syscon:io-domains: Setting to 1800000 done [ 590.032820] dwmmc_rockchip ff0c0000.dwmmc: Successfully tuned phase to 140 [ 590.039725] mmc0: new ultra high speed SDR50 SDHC card at address 0007 [ 590.046641] mmcblk0: mmc0:0007 SD32G 29.3 GiB [ 590.052163] mmcblk0: p1 Moreover the code is intent to print the 'uV' for debug but later print it using dev_info. It looks more like to me that it should be the real intention of the code. Anyway, let's mark this verbose log as debug message. Signed-off-by: Shawn Lin Reviewed-by: Heiko Stuebner Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/power/avs/rockchip-io-domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c index 01b6d3f9b8fb..56bce1908be2 100644 --- a/drivers/power/avs/rockchip-io-domain.c +++ b/drivers/power/avs/rockchip-io-domain.c @@ -143,7 +143,7 @@ static int rockchip_iodomain_notify(struct notifier_block *nb, if (ret && event == REGULATOR_EVENT_PRE_VOLTAGE_CHANGE) return NOTIFY_BAD; - dev_info(supply->iod->dev, "Setting to %d done\n", uV); + dev_dbg(supply->iod->dev, "Setting to %d done\n", uV); return NOTIFY_OK; } -- cgit v1.2.3 From 974f86498ed28d86575cd2327bd83e53abd872da Mon Sep 17 00:00:00 2001 From: "Prakash, Prashanth" Date: Fri, 14 Oct 2016 12:00:23 -0600 Subject: cpufreq / CPPC: Add MODULE_DEVICE_TABLE for cppc_cpufreq driver MODULE_DEVICE_TABLE is added so that CPPC cpufreq module can be automatically loaded when we have a acpi processor device with "ACPI0007" hid. Signed-off-by: Prashanth Prakash Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cppc_cpufreq.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 4852d9efe74e..e82bb3c30b92 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -247,3 +247,10 @@ MODULE_DESCRIPTION("CPUFreq driver based on the ACPI CPPC v5.0+ spec"); MODULE_LICENSE("GPL"); late_initcall(cppc_cpufreq_init); + +static const struct acpi_device_id cppc_acpi_ids[] = { + {ACPI_PROCESSOR_DEVICE_HID, }, + {} +}; + +MODULE_DEVICE_TABLE(acpi, cppc_acpi_ids); -- cgit v1.2.3 From 4bee77dc734ccd4119bb750479809ebc82776bef Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 20 Oct 2016 15:44:53 +0900 Subject: PM / OPP: make _of_get_opp_desc_node() a static function Since commit f47b72a15a96 ("PM / OPP: Move CONFIG_OF dependent code in a separate file"), this function is defined and called only in drivers/base/power/opp/of.c . Signed-off-by: Masahiro Yamada Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp/of.c | 2 +- drivers/base/power/opp/opp.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c index 5552211e6fcd..6480137bd256 100644 --- a/drivers/base/power/opp/of.c +++ b/drivers/base/power/opp/of.c @@ -198,7 +198,7 @@ void dev_pm_opp_of_remove_table(struct device *dev) EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table); /* Returns opp descriptor node for a device, caller must do of_node_put() */ -struct device_node *_of_get_opp_desc_node(struct device *dev) +static struct device_node *_of_get_opp_desc_node(struct device *dev) { /* * TODO: Support for multiple OPP tables. diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index fabd5ca1a083..96cd30ac6c1d 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -190,7 +190,6 @@ struct opp_table { /* Routines internal to opp core */ struct opp_table *_find_opp_table(struct device *dev); struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table); -struct device_node *_of_get_opp_desc_node(struct device *dev); void _dev_pm_opp_remove_table(struct device *dev, bool remove_all); struct dev_pm_opp *_allocate_opp(struct device *dev, struct opp_table **opp_table); int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table); -- cgit v1.2.3 From 349aa92e81bd14552d8d2335aff490f306038603 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 20 Oct 2016 16:12:49 +0900 Subject: PM / OPP: fix debug/error messages in dev_pm_opp_of_get_sharing_cpus() These log messages are wrong because _of_get_opp_desc_node() returns an operating-points-v2 node. Commit a6eed752f5fb ("PM / OPP: passing NULL to PTR_ERR()") fixed static checker warnings, and reworded the messages at the same time (but the latter was not mentioned in the git-log). Restore the correct messages. Signed-off-by: Masahiro Yamada Acked-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp/of.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c index 6480137bd256..5b3755e49731 100644 --- a/drivers/base/power/opp/of.c +++ b/drivers/base/power/opp/of.c @@ -562,7 +562,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, /* Get OPP descriptor node */ np = _of_get_opp_desc_node(cpu_dev); if (!np) { - dev_dbg(cpu_dev, "%s: Couldn't find cpu_dev node.\n", __func__); + dev_dbg(cpu_dev, "%s: Couldn't find opp node.\n", __func__); return -ENOENT; } @@ -587,7 +587,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, /* Get OPP descriptor node */ tmp_np = _of_get_opp_desc_node(tcpu_dev); if (!tmp_np) { - dev_err(tcpu_dev, "%s: Couldn't find tcpu_dev node.\n", + dev_err(tcpu_dev, "%s: Couldn't find opp node.\n", __func__); ret = -ENOENT; goto put_cpu_node; -- cgit v1.2.3 From 62006c1702b3b1be0c0726949e0ee0ea2326be9c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 17 Oct 2016 20:16:58 +0200 Subject: PM / Runtime: Remove the exported function pm_children_suspended() The exported function pm_children_suspended() has only one caller, which is the runtime PM internal function, rpm_check_suspend_allowed(). Let's clean-up this code, by removing pm_children_suspended() altogether and instead do the one-liner check directly in rpm_check_suspend_allowed(). Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 3 ++- include/linux/pm_runtime.h | 7 ------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 82a081ea4317..53b427dfc403 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -241,7 +241,8 @@ static int rpm_check_suspend_allowed(struct device *dev) retval = -EACCES; else if (atomic_read(&dev->power.usage_count) > 0) retval = -EAGAIN; - else if (!pm_children_suspended(dev)) + else if (!dev->power.ignore_children && + atomic_read(&dev->power.child_count)) retval = -EBUSY; /* Pending resume requests take precedence over suspends. */ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 2e14d2667b6c..61ea5666c94c 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -61,12 +61,6 @@ static inline void pm_suspend_ignore_children(struct device *dev, bool enable) dev->power.ignore_children = enable; } -static inline bool pm_children_suspended(struct device *dev) -{ - return dev->power.ignore_children - || !atomic_read(&dev->power.child_count); -} - static inline void pm_runtime_get_noresume(struct device *dev) { atomic_inc(&dev->power.usage_count); @@ -162,7 +156,6 @@ static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} -static inline bool pm_children_suspended(struct device *dev) { return false; } static inline void pm_runtime_get_noresume(struct device *dev) {} static inline void pm_runtime_put_noidle(struct device *dev) {} static inline bool device_run_wake(struct device *dev) { return false; } -- cgit v1.2.3 From 216ef0b6b8c7f041a618913e94da52c3fdf82a99 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 17 Oct 2016 20:16:59 +0200 Subject: PM / Runtime: Clarify comment in rpm_resume() when resuming the parent Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 53b427dfc403..117db71e84cb 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -713,8 +713,8 @@ static int rpm_resume(struct device *dev, int rpmflags) spin_lock(&parent->power.lock); /* - * We can resume if the parent's runtime PM is disabled or it - * is set to ignore children. + * Resume the parent if it has runtime PM enabled and not been + * set to ignore its children. */ if (!parent->power.disable_depth && !parent->power.ignore_children) { -- cgit v1.2.3 From b1a60995a684f2b6052cda640b0704361ab40089 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 17 Oct 2016 20:17:00 +0200 Subject: PM / Runtime: Convert pm_runtime_set_suspended() to return an int Because pm_runtime_set_suspended() invokes __pm_runtime_set_status(), which can fail, pm_runtime_set_suspended() can also fail. Instead of hiding a potential error, let's propagate it by converting pm_runtime_set_suspended() from a void to return an int. In this way users are able to check the error code and act accordingly. Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 61ea5666c94c..4957fc185ea9 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -258,9 +258,9 @@ static inline int pm_runtime_set_active(struct device *dev) return __pm_runtime_set_status(dev, RPM_ACTIVE); } -static inline void pm_runtime_set_suspended(struct device *dev) +static inline int pm_runtime_set_suspended(struct device *dev) { - __pm_runtime_set_status(dev, RPM_SUSPENDED); + return __pm_runtime_set_status(dev, RPM_SUSPENDED); } static inline void pm_runtime_disable(struct device *dev) -- cgit v1.2.3 From 1d29815ef2d2dd0fb1a62494a9c158c0dc49e2c8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 19 Oct 2016 02:53:26 +0200 Subject: cpufreq: intel_pstate: Drop boost_iowait flag The "IOwait boosting" mechanism is only used by the get_target_pstate_use_cpu_load() governor function and the boost_iowait flag in pid_params is always set when that function is in use (and it is never set otherwise). This means that the boost_iowait flag is in fact redundant and may be dropped. For this reason, replace the boost_iowait flag check in intel_pstate_update_util() with an equivalent check against pstate_funcs.get_target_pstate and drop that flag. Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada --- drivers/cpufreq/intel_pstate.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f535f8123258..16d071a618b7 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -233,7 +233,6 @@ static struct cpudata **all_cpu_data; * @p_gain_pct: PID proportional gain * @i_gain_pct: PID integral gain * @d_gain_pct: PID derivative gain - * @boost_iowait: Whether or not to use iowait boosting. * * Stores per CPU model static PID configuration data. */ @@ -245,7 +244,6 @@ struct pstate_adjust_policy { int p_gain_pct; int d_gain_pct; int i_gain_pct; - bool boost_iowait; }; /** @@ -1043,7 +1041,6 @@ static const struct cpu_defaults silvermont_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, - .boost_iowait = true, }, .funcs = { .get_max = atom_get_max_pstate, @@ -1065,7 +1062,6 @@ static const struct cpu_defaults airmont_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, - .boost_iowait = true, }, .funcs = { .get_max = atom_get_max_pstate, @@ -1107,7 +1103,6 @@ static const struct cpu_defaults bxt_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, - .boost_iowait = true, }, .funcs = { .get_max = core_get_max_pstate, @@ -1347,7 +1342,7 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, struct cpudata *cpu = container_of(data, struct cpudata, update_util); u64 delta_ns; - if (pid_params.boost_iowait) { + if (pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load) { if (flags & SCHED_CPUFREQ_IOWAIT) { cpu->iowait_boost = int_tofp(1); } else if (cpu->iowait_boost) { -- cgit v1.2.3 From 185d82456edaa5a7a8c8827840b8a675c60579c0 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 21 Oct 2016 09:38:20 -0700 Subject: cpufreq: intel_pstate: Remove PID debugfs when not used When target state is calculated using get_target_pstate_use_cpu_load(), PID controller is not used, hence it has no effect on performance. So don't present debugfs entries to tune PID controller. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 16d071a618b7..d46e881ff4d7 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -636,8 +636,10 @@ static void __init intel_pstate_debug_expose_params(void) struct dentry *debugfs_parent; int i = 0; - if (hwp_active) + if (hwp_active || + pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load) return; + debugfs_parent = debugfs_create_dir("pstate_snb", NULL); if (IS_ERR_OR_NULL(debugfs_parent)) return; -- cgit v1.2.3 From b8b97a4252547159fa4f3f46a8aa862c401cf969 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 20 Oct 2016 13:37:06 -0700 Subject: Documentation: intel_pstate: PID tuning is not always available PID tuning is not available when the get_target_pstate_use_cpu_load() is used to calculate target_state. So update the documentation. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/intel-pstate.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt index e6bd1e6512a5..5528d6d59687 100644 --- a/Documentation/cpu-freq/intel-pstate.txt +++ b/Documentation/cpu-freq/intel-pstate.txt @@ -122,11 +122,12 @@ will run at is selected by the processor itself. Tuning Intel P-State driver -When HWP mode is not used, debugfs files have also been added to allow the -tuning of the internal governor algorithm. These files are located at -/sys/kernel/debug/pstate_snb/. The algorithm uses a PID (Proportional -Integral Derivative) controller. The PID tunable parameters are: +When the performance can be tuned using PID (Proportional Integral +Derivative) controller, debugfs files are provided for adjusting performance. +They are presented under: +/sys/kernel/debug/pstate_snb/ +The PID tunable parameters are: deadband d_gain_pct i_gain_pct -- cgit v1.2.3 From 59d65b73a23cee48e6f3e44686f199d79b7ee854 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 14 Oct 2016 10:47:49 -0700 Subject: PM / Domains: Make genpd state allocation dynamic Allow PM Domain states to be defined dynamically by the drivers. This removes the limitation on the maximum number of states possible for a domain. Suggested-by: Ulf Hansson Signed-off-by: Lina Iyer Acked-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-imx/gpc.c | 17 ++++++++++------- drivers/base/power/domain.c | 35 +++++++++++++++++++++++------------ include/linux/pm_domain.h | 5 ++--- 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index 0df062d8b2c9..57a410bbb6a2 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -380,13 +380,6 @@ static struct pu_domain imx6q_pu_domain = { .name = "PU", .power_off = imx6q_pm_pu_power_off, .power_on = imx6q_pm_pu_power_on, - .states = { - [0] = { - .power_off_latency_ns = 25000, - .power_on_latency_ns = 2000000, - }, - }, - .state_count = 1, }, }; @@ -430,6 +423,16 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) return 0; + imx6q_pu_domain.base.states = devm_kzalloc(dev, + sizeof(*imx6q_pu_domain.base.states), + GFP_KERNEL); + if (!imx6q_pu_domain.base.states) + return -ENOMEM; + + imx6q_pu_domain.base.states[0].power_off_latency_ns = 25000; + imx6q_pu_domain.base.states[0].power_on_latency_ns = 2000000; + imx6q_pu_domain.base.state_count = 1; + pm_genpd_init(&imx6q_pu_domain.base, NULL, false); return of_genpd_add_provider_onecell(dev->of_node, &imx_gpc_onecell_data); diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index e023066e4215..37ab7f1ef178 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1282,6 +1282,21 @@ out: } EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain); +static int genpd_set_default_power_state(struct generic_pm_domain *genpd) +{ + struct genpd_power_state *state; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + genpd->states = state; + genpd->state_count = 1; + genpd->free = state; + + return 0; +} + /** * pm_genpd_init - Initialize a generic I/O PM domain object. * @genpd: PM domain object to initialize. @@ -1293,6 +1308,8 @@ EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain); int pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off) { + int ret; + if (IS_ERR_OR_NULL(genpd)) return -EINVAL; @@ -1325,19 +1342,12 @@ int pm_genpd_init(struct generic_pm_domain *genpd, genpd->dev_ops.start = pm_clk_resume; } - if (genpd->state_idx >= GENPD_MAX_NUM_STATES) { - pr_warn("Initial state index out of bounds.\n"); - genpd->state_idx = GENPD_MAX_NUM_STATES - 1; - } - - if (genpd->state_count > GENPD_MAX_NUM_STATES) { - pr_warn("Limiting states to %d\n", GENPD_MAX_NUM_STATES); - genpd->state_count = GENPD_MAX_NUM_STATES; - } - /* Use only one "off" state if there were no states declared */ - if (genpd->state_count == 0) - genpd->state_count = 1; + if (genpd->state_count == 0) { + ret = genpd_set_default_power_state(genpd); + if (ret) + return ret; + } mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); @@ -1377,6 +1387,7 @@ static int genpd_remove(struct generic_pm_domain *genpd) list_del(&genpd->gpd_list_node); mutex_unlock(&genpd->lock); cancel_work_sync(&genpd->power_off_work); + kfree(genpd->free); pr_debug("%s: removed %s\n", __func__, genpd->name); return 0; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index a09fe5c009c8..de1d8f331b03 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -19,8 +19,6 @@ /* Defines used for the flags field in the struct generic_pm_domain */ #define GENPD_FLAG_PM_CLK (1U << 0) /* PM domain uses PM clk */ -#define GENPD_MAX_NUM_STATES 8 /* Number of possible low power states */ - enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ GPD_STATE_POWER_OFF, /* PM domain is off */ @@ -70,9 +68,10 @@ struct generic_pm_domain { void (*detach_dev)(struct generic_pm_domain *domain, struct device *dev); unsigned int flags; /* Bit field of configs for genpd */ - struct genpd_power_state states[GENPD_MAX_NUM_STATES]; + struct genpd_power_state *states; unsigned int state_count; /* number of states */ unsigned int state_idx; /* state that genpd will go to when off */ + void *free; /* Free the state that was allocated for default */ }; -- cgit v1.2.3 From 405f7226014093a2809f27ba32a8230e770ac876 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 14 Oct 2016 10:47:50 -0700 Subject: PM / Domains: Add residency property to genpd states Residency of a domain's idle state indicates that the minimum idle time for the domain's idle state to be beneficial for power. Add the parameter to the state node. Future patches, will use the residency value in the genpd governor to determine if it is worth while to enter an idle state. Signed-off-by: Lina Iyer Acked-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index de1d8f331b03..f4492eb71701 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -38,6 +38,7 @@ struct gpd_dev_ops { struct genpd_power_state { s64 power_off_latency_ns; s64 power_on_latency_ns; + s64 residency_ns; }; struct generic_pm_domain { -- cgit v1.2.3 From 30f604283e05d34cb10108c7ba017e5f4fc9d62c Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 14 Oct 2016 10:47:51 -0700 Subject: PM / Domains: Allow domain power states to be read from DT This patch allows domains to define idle states in the DT. SoC's can define domain idle states in DT using the "domain-idle-states" property of the domain provider. Add API to read the idle states from DT that can be set in the genpd object. This patch is based on the original patch by Marc Titinger. Signed-off-by: Marc Titinger Signed-off-by: Ulf Hansson Signed-off-by: Lina Iyer Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 94 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/pm_domain.h | 8 ++++ 2 files changed, 102 insertions(+) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 37ab7f1ef178..9af75ba0472a 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1916,6 +1916,100 @@ out: return ret ? -EPROBE_DEFER : 0; } EXPORT_SYMBOL_GPL(genpd_dev_pm_attach); + +static const struct of_device_id idle_state_match[] = { + { .compatible = "arm,idle-state", }, + { } +}; + +static int genpd_parse_state(struct genpd_power_state *genpd_state, + struct device_node *state_node) +{ + int err; + u32 residency; + u32 entry_latency, exit_latency; + const struct of_device_id *match_id; + + match_id = of_match_node(idle_state_match, state_node); + if (!match_id) + return -EINVAL; + + err = of_property_read_u32(state_node, "entry-latency-us", + &entry_latency); + if (err) { + pr_debug(" * %s missing entry-latency-us property\n", + state_node->full_name); + return -EINVAL; + } + + err = of_property_read_u32(state_node, "exit-latency-us", + &exit_latency); + if (err) { + pr_debug(" * %s missing exit-latency-us property\n", + state_node->full_name); + return -EINVAL; + } + + err = of_property_read_u32(state_node, "min-residency-us", &residency); + if (!err) + genpd_state->residency_ns = 1000 * residency; + + genpd_state->power_on_latency_ns = 1000 * exit_latency; + genpd_state->power_off_latency_ns = 1000 * entry_latency; + + return 0; +} + +/** + * of_genpd_parse_idle_states: Return array of idle states for the genpd. + * + * @dn: The genpd device node + * @states: The pointer to which the state array will be saved. + * @n: The count of elements in the array returned from this function. + * + * Returns the device states parsed from the OF node. The memory for the states + * is allocated by this function and is the responsibility of the caller to + * free the memory after use. + */ +int of_genpd_parse_idle_states(struct device_node *dn, + struct genpd_power_state **states, int *n) +{ + struct genpd_power_state *st; + struct device_node *np; + int i = 0; + int err, ret; + int count; + struct of_phandle_iterator it; + + count = of_count_phandle_with_args(dn, "domain-idle-states", NULL); + if (!count) + return -EINVAL; + + st = kcalloc(count, sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + /* Loop over the phandles until all the requested entry is found */ + of_for_each_phandle(&it, err, dn, "domain-idle-states", NULL, 0) { + np = it.node; + ret = genpd_parse_state(&st[i++], np); + if (ret) { + pr_err + ("Parsing idle state node %s failed with err %d\n", + np->full_name, ret); + of_node_put(np); + kfree(st); + return ret; + } + } + + *n = count; + *states = st; + + return 0; +} +EXPORT_SYMBOL_GPL(of_genpd_parse_idle_states); + #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f4492eb71701..b4894969fbec 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -205,6 +205,8 @@ extern int of_genpd_add_device(struct of_phandle_args *args, extern int of_genpd_add_subdomain(struct of_phandle_args *parent, struct of_phandle_args *new_subdomain); extern struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); +extern int of_genpd_parse_idle_states(struct device_node *dn, + struct genpd_power_state **states, int *n); int genpd_dev_pm_attach(struct device *dev); #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */ @@ -234,6 +236,12 @@ static inline int of_genpd_add_subdomain(struct of_phandle_args *parent, return -ENODEV; } +static inline int of_genpd_parse_idle_states(struct device_node *dn, + struct genpd_power_state **states, int *n) +{ + return -ENODEV; +} + static inline int genpd_dev_pm_attach(struct device *dev) { return -ENODEV; -- cgit v1.2.3 From 0c9b694a8a7d4853318c4f2ce315afa2bd3664b6 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 14 Oct 2016 10:47:52 -0700 Subject: PM / Domains: Save the fwnode in genpd_power_state Save the fwnode for the genpd state in the state node. PM Domain clients may use the fwnode to read in the platform specific domain state properties and associate them with the state. Signed-off-by: Lina Iyer Acked-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 1 + include/linux/pm_domain.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 9af75ba0472a..1a6073aaca0e 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1956,6 +1956,7 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state, genpd_state->power_on_latency_ns = 1000 * exit_latency; genpd_state->power_off_latency_ns = 1000 * entry_latency; + genpd_state->fwnode = &state_node->fwnode; return 0; } diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index b4894969fbec..6a8988166899 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -39,6 +39,7 @@ struct genpd_power_state { s64 power_off_latency_ns; s64 power_on_latency_ns; s64 residency_ns; + struct fwnode_handle *fwnode; }; struct generic_pm_domain { -- cgit v1.2.3 From 70bb510e42797792d9fc4842f01fa2fb685717b6 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 14 Oct 2016 10:47:53 -0700 Subject: dt/bindings / PM/Domains: Update binding for PM domain idle states Update DT bindings to describe idle states of PM domains. This patch is based on the original patch by Marc Titinger. Signed-off-by: Marc Titinger Signed-off-by: Ulf Hansson Signed-off-by: Lina Iyer Acked-by: Rob Herring Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- .../devicetree/bindings/power/power_domain.txt | 43 ++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt index 025b5e7df61c..e1650364b296 100644 --- a/Documentation/devicetree/bindings/power/power_domain.txt +++ b/Documentation/devicetree/bindings/power/power_domain.txt @@ -29,6 +29,15 @@ Optional properties: specified by this binding. More details about power domain specifier are available in the next section. +- domain-idle-states : A phandle of an idle-state that shall be soaked into a + generic domain power state. The idle state definitions are + compatible with arm,idle-state specified in [1]. + The domain-idle-state property reflects the idle state of this PM domain and + not the idle states of the devices or sub-domains in the PM domain. Devices + and sub-domains have their own idle-states independent of the parent + domain's idle states. In the absence of this property, the domain would be + considered as capable of being powered-on or powered-off. + Example: power: power-controller@12340000 { @@ -59,6 +68,38 @@ The nodes above define two power controllers: 'parent' and 'child'. Domains created by the 'child' power controller are subdomains of '0' power domain provided by the 'parent' power controller. +Example 3: + parent: power-controller@12340000 { + compatible = "foo,power-controller"; + reg = <0x12340000 0x1000>; + #power-domain-cells = <0>; + domain-idle-states = <&DOMAIN_RET>, <&DOMAIN_PWR_DN>; + }; + + child: power-controller@12341000 { + compatible = "foo,power-controller"; + reg = <0x12341000 0x1000>; + power-domains = <&parent 0>; + #power-domain-cells = <0>; + domain-idle-states = <&DOMAIN_PWR_DN>; + }; + + DOMAIN_RET: state@0 { + compatible = "arm,idle-state"; + reg = <0x0>; + entry-latency-us = <1000>; + exit-latency-us = <2000>; + min-residency-us = <10000>; + }; + + DOMAIN_PWR_DN: state@1 { + compatible = "arm,idle-state"; + reg = <0x1>; + entry-latency-us = <5000>; + exit-latency-us = <8000>; + min-residency-us = <7000>; + }; + ==PM domain consumers== Required properties: @@ -76,3 +117,5 @@ Example: The node above defines a typical PM domain consumer device, which is located inside a PM domain with index 0 of a power controller represented by a node with the label "power". + +[1]. Documentation/devicetree/bindings/arm/idle-states.txt -- cgit v1.2.3 From 35241d12f750d2f1556a9c85f175ce7044716881 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 14 Oct 2016 10:47:54 -0700 Subject: PM / Domains: Abstract genpd locking Abstract genpd lock/unlock calls, in preparation for domain specific locks added in the following patches. Signed-off-by: Lina Iyer Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 121 +++++++++++++++++++++++++++++--------------- include/linux/pm_domain.h | 5 +- 2 files changed, 85 insertions(+), 41 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 1a6073aaca0e..4194012cdf86 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -39,6 +39,46 @@ static LIST_HEAD(gpd_list); static DEFINE_MUTEX(gpd_list_lock); +struct genpd_lock_ops { + void (*lock)(struct generic_pm_domain *genpd); + void (*lock_nested)(struct generic_pm_domain *genpd, int depth); + int (*lock_interruptible)(struct generic_pm_domain *genpd); + void (*unlock)(struct generic_pm_domain *genpd); +}; + +static void genpd_lock_mtx(struct generic_pm_domain *genpd) +{ + mutex_lock(&genpd->mlock); +} + +static void genpd_lock_nested_mtx(struct generic_pm_domain *genpd, + int depth) +{ + mutex_lock_nested(&genpd->mlock, depth); +} + +static int genpd_lock_interruptible_mtx(struct generic_pm_domain *genpd) +{ + return mutex_lock_interruptible(&genpd->mlock); +} + +static void genpd_unlock_mtx(struct generic_pm_domain *genpd) +{ + return mutex_unlock(&genpd->mlock); +} + +static const struct genpd_lock_ops genpd_mtx_ops = { + .lock = genpd_lock_mtx, + .lock_nested = genpd_lock_nested_mtx, + .lock_interruptible = genpd_lock_interruptible_mtx, + .unlock = genpd_unlock_mtx, +}; + +#define genpd_lock(p) p->lock_ops->lock(p) +#define genpd_lock_nested(p, d) p->lock_ops->lock_nested(p, d) +#define genpd_lock_interruptible(p) p->lock_ops->lock_interruptible(p) +#define genpd_unlock(p) p->lock_ops->unlock(p) + /* * Get the generic PM domain for a particular struct device. * This validates the struct device pointer, the PM domain pointer, @@ -200,9 +240,9 @@ static int genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth) genpd_sd_counter_inc(master); - mutex_lock_nested(&master->lock, depth + 1); + genpd_lock_nested(master, depth + 1); ret = genpd_poweron(master, depth + 1); - mutex_unlock(&master->lock); + genpd_unlock(master); if (ret) { genpd_sd_counter_dec(master); @@ -255,9 +295,9 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb, spin_unlock_irq(&dev->power.lock); if (!IS_ERR(genpd)) { - mutex_lock(&genpd->lock); + genpd_lock(genpd); genpd->max_off_time_changed = true; - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); } dev = dev->parent; @@ -354,9 +394,9 @@ static void genpd_power_off_work_fn(struct work_struct *work) genpd = container_of(work, struct generic_pm_domain, power_off_work); - mutex_lock(&genpd->lock); + genpd_lock(genpd); genpd_poweroff(genpd, true); - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); } /** @@ -472,9 +512,9 @@ static int genpd_runtime_suspend(struct device *dev) if (dev->power.irq_safe) return 0; - mutex_lock(&genpd->lock); + genpd_lock(genpd); genpd_poweroff(genpd, false); - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); return 0; } @@ -509,9 +549,9 @@ static int genpd_runtime_resume(struct device *dev) goto out; } - mutex_lock(&genpd->lock); + genpd_lock(genpd); ret = genpd_poweron(genpd, 0); - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); if (ret) return ret; @@ -547,9 +587,9 @@ err_stop: genpd_stop_dev(genpd, dev); err_poweroff: if (!dev->power.irq_safe) { - mutex_lock(&genpd->lock); + genpd_lock(genpd); genpd_poweroff(genpd, 0); - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); } return ret; @@ -732,20 +772,20 @@ static int pm_genpd_prepare(struct device *dev) if (resume_needed(dev, genpd)) pm_runtime_resume(dev); - mutex_lock(&genpd->lock); + genpd_lock(genpd); if (genpd->prepared_count++ == 0) genpd->suspended_count = 0; - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); ret = pm_generic_prepare(dev); if (ret) { - mutex_lock(&genpd->lock); + genpd_lock(genpd); genpd->prepared_count--; - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); } return ret; @@ -936,13 +976,13 @@ static void pm_genpd_complete(struct device *dev) pm_generic_complete(dev); - mutex_lock(&genpd->lock); + genpd_lock(genpd); genpd->prepared_count--; if (!genpd->prepared_count) genpd_queue_power_off_work(genpd); - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); } /** @@ -1071,7 +1111,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, if (IS_ERR(gpd_data)) return PTR_ERR(gpd_data); - mutex_lock(&genpd->lock); + genpd_lock(genpd); if (genpd->prepared_count > 0) { ret = -EAGAIN; @@ -1088,7 +1128,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, list_add_tail(&gpd_data->base.list_node, &genpd->dev_list); out: - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); if (ret) genpd_free_dev_data(dev, gpd_data); @@ -1130,7 +1170,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, gpd_data = to_gpd_data(pdd); dev_pm_qos_remove_notifier(dev, &gpd_data->nb); - mutex_lock(&genpd->lock); + genpd_lock(genpd); if (genpd->prepared_count > 0) { ret = -EAGAIN; @@ -1145,14 +1185,14 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, list_del_init(&pdd->list_node); - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); genpd_free_dev_data(dev, gpd_data); return 0; out: - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); dev_pm_qos_add_notifier(dev, &gpd_data->nb); return ret; @@ -1187,8 +1227,8 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd, if (!link) return -ENOMEM; - mutex_lock(&subdomain->lock); - mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING); + genpd_lock(subdomain); + genpd_lock_nested(genpd, SINGLE_DEPTH_NESTING); if (genpd->status == GPD_STATE_POWER_OFF && subdomain->status != GPD_STATE_POWER_OFF) { @@ -1211,8 +1251,8 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd, genpd_sd_counter_inc(genpd); out: - mutex_unlock(&genpd->lock); - mutex_unlock(&subdomain->lock); + genpd_unlock(genpd); + genpd_unlock(subdomain); if (ret) kfree(link); return ret; @@ -1250,8 +1290,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain)) return -EINVAL; - mutex_lock(&subdomain->lock); - mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING); + genpd_lock(subdomain); + genpd_lock_nested(genpd, SINGLE_DEPTH_NESTING); if (!list_empty(&subdomain->master_links) || subdomain->device_count) { pr_warn("%s: unable to remove subdomain %s\n", genpd->name, @@ -1275,8 +1315,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, } out: - mutex_unlock(&genpd->lock); - mutex_unlock(&subdomain->lock); + genpd_unlock(genpd); + genpd_unlock(subdomain); return ret; } @@ -1316,7 +1356,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd, INIT_LIST_HEAD(&genpd->master_links); INIT_LIST_HEAD(&genpd->slave_links); INIT_LIST_HEAD(&genpd->dev_list); - mutex_init(&genpd->lock); + mutex_init(&genpd->mlock); + genpd->lock_ops = &genpd_mtx_ops; genpd->gov = gov; INIT_WORK(&genpd->power_off_work, genpd_power_off_work_fn); atomic_set(&genpd->sd_count, 0); @@ -1364,16 +1405,16 @@ static int genpd_remove(struct generic_pm_domain *genpd) if (IS_ERR_OR_NULL(genpd)) return -EINVAL; - mutex_lock(&genpd->lock); + genpd_lock(genpd); if (genpd->has_provider) { - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); pr_err("Provider present, unable to remove %s\n", genpd->name); return -EBUSY; } if (!list_empty(&genpd->master_links) || genpd->device_count) { - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); pr_err("%s: unable to remove %s\n", __func__, genpd->name); return -EBUSY; } @@ -1385,7 +1426,7 @@ static int genpd_remove(struct generic_pm_domain *genpd) } list_del(&genpd->gpd_list_node); - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); cancel_work_sync(&genpd->power_off_work); kfree(genpd->free); pr_debug("%s: removed %s\n", __func__, genpd->name); @@ -1909,9 +1950,9 @@ int genpd_dev_pm_attach(struct device *dev) dev->pm_domain->detach = genpd_dev_pm_detach; dev->pm_domain->sync = genpd_dev_pm_sync; - mutex_lock(&pd->lock); + genpd_lock(pd); ret = genpd_poweron(pd, 0); - mutex_unlock(&pd->lock); + genpd_unlock(pd); out: return ret ? -EPROBE_DEFER : 0; } @@ -2064,7 +2105,7 @@ static int pm_genpd_summary_one(struct seq_file *s, char state[16]; int ret; - ret = mutex_lock_interruptible(&genpd->lock); + ret = genpd_lock_interruptible(genpd); if (ret) return -ERESTARTSYS; @@ -2101,7 +2142,7 @@ static int pm_genpd_summary_one(struct seq_file *s, seq_puts(s, "\n"); exit: - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); return 0; } diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 6a8988166899..811b968eb740 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -42,13 +42,14 @@ struct genpd_power_state { struct fwnode_handle *fwnode; }; +struct genpd_lock_ops; + struct generic_pm_domain { struct dev_pm_domain domain; /* PM domain operations */ struct list_head gpd_list_node; /* Node in the global PM domains list */ struct list_head master_links; /* Links with PM domain as a master */ struct list_head slave_links; /* Links with PM domain as a slave */ struct list_head dev_list; /* List of devices */ - struct mutex lock; struct dev_power_governor *gov; struct work_struct power_off_work; struct fwnode_handle *provider; /* Identity of the domain provider */ @@ -74,6 +75,8 @@ struct generic_pm_domain { unsigned int state_count; /* number of states */ unsigned int state_idx; /* state that genpd will go to when off */ void *free; /* Free the state that was allocated for default */ + const struct genpd_lock_ops *lock_ops; + struct mutex mlock; }; -- cgit v1.2.3 From d716f4798ff8c65ace4a6ab291f9a4ff265df4ba Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 14 Oct 2016 10:47:55 -0700 Subject: PM / Domains: Support IRQ safe PM domains Generic Power Domains currently support turning on/off only in process context. This prevents the usage of PM domains for domains that could be powered on/off in a context where IRQs are disabled. Many such domains exist today and do not get powered off, when the IRQ safe devices in that domain are powered off, because of this limitation. However, not all domains can operate in IRQ safe contexts. Genpd therefore, has to support both cases where the domain may or may not operate in IRQ safe contexts. Configuring genpd to use an appropriate lock for that domain, would allow domains that have IRQ safe devices to runtime suspend and resume, in atomic context. To achieve domain specific locking, set the domain's ->flag to GENPD_FLAG_IRQ_SAFE while defining the domain. This indicates that genpd should use a spinlock instead of a mutex for locking the domain. Locking is abstracted through genpd_lock() and genpd_unlock() functions that use the flag to determine the appropriate lock to be used for that domain. Domains that have lower latency to suspend and resume and can operate with IRQs disabled may now be able to save power, when the component devices and sub-domains are idle at runtime. The restriction this imposes on the domain hierarchy is that non-IRQ safe domains may not have IRQ-safe subdomains, but IRQ safe domains may have IRQ safe and non-IRQ safe subdomains and devices. Signed-off-by: Lina Iyer Acked-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 111 ++++++++++++++++++++++++++++++++++++++++---- include/linux/pm_domain.h | 10 +++- 2 files changed, 110 insertions(+), 11 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 4194012cdf86..aac656a889dc 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -74,11 +74,70 @@ static const struct genpd_lock_ops genpd_mtx_ops = { .unlock = genpd_unlock_mtx, }; +static void genpd_lock_spin(struct generic_pm_domain *genpd) + __acquires(&genpd->slock) +{ + unsigned long flags; + + spin_lock_irqsave(&genpd->slock, flags); + genpd->lock_flags = flags; +} + +static void genpd_lock_nested_spin(struct generic_pm_domain *genpd, + int depth) + __acquires(&genpd->slock) +{ + unsigned long flags; + + spin_lock_irqsave_nested(&genpd->slock, flags, depth); + genpd->lock_flags = flags; +} + +static int genpd_lock_interruptible_spin(struct generic_pm_domain *genpd) + __acquires(&genpd->slock) +{ + unsigned long flags; + + spin_lock_irqsave(&genpd->slock, flags); + genpd->lock_flags = flags; + return 0; +} + +static void genpd_unlock_spin(struct generic_pm_domain *genpd) + __releases(&genpd->slock) +{ + spin_unlock_irqrestore(&genpd->slock, genpd->lock_flags); +} + +static const struct genpd_lock_ops genpd_spin_ops = { + .lock = genpd_lock_spin, + .lock_nested = genpd_lock_nested_spin, + .lock_interruptible = genpd_lock_interruptible_spin, + .unlock = genpd_unlock_spin, +}; + #define genpd_lock(p) p->lock_ops->lock(p) #define genpd_lock_nested(p, d) p->lock_ops->lock_nested(p, d) #define genpd_lock_interruptible(p) p->lock_ops->lock_interruptible(p) #define genpd_unlock(p) p->lock_ops->unlock(p) +#define genpd_is_irq_safe(genpd) (genpd->flags & GENPD_FLAG_IRQ_SAFE) + +static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, + struct generic_pm_domain *genpd) +{ + bool ret; + + ret = pm_runtime_is_irq_safe(dev) && !genpd_is_irq_safe(genpd); + + /* Warn once for each IRQ safe dev in no sleep domain */ + if (ret) + dev_warn_once(dev, "PM domain %s will not be powered off\n", + genpd->name); + + return ret; +} + /* * Get the generic PM domain for a particular struct device. * This validates the struct device pointer, the PM domain pointer, @@ -343,7 +402,12 @@ static int genpd_poweroff(struct generic_pm_domain *genpd, bool is_async) if (stat > PM_QOS_FLAGS_NONE) return -EBUSY; - if (!pm_runtime_suspended(pdd->dev) || pdd->dev->power.irq_safe) + /* + * Do not allow PM domain to be powered off, when an IRQ safe + * device is part of a non-IRQ safe domain. + */ + if (!pm_runtime_suspended(pdd->dev) || + irq_safe_dev_in_no_sleep_domain(pdd->dev, genpd)) not_suspended++; } @@ -506,10 +570,10 @@ static int genpd_runtime_suspend(struct device *dev) } /* - * If power.irq_safe is set, this routine will be run with interrupts - * off, so it can't use mutexes. + * If power.irq_safe is set, this routine may be run with + * IRQs disabled, so suspend only if the PM domain also is irq_safe. */ - if (dev->power.irq_safe) + if (irq_safe_dev_in_no_sleep_domain(dev, genpd)) return 0; genpd_lock(genpd); @@ -543,8 +607,11 @@ static int genpd_runtime_resume(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - /* If power.irq_safe, the PM domain is never powered off. */ - if (dev->power.irq_safe) { + /* + * As we don't power off a non IRQ safe domain, which holds + * an IRQ safe device, we don't need to restore power to it. + */ + if (irq_safe_dev_in_no_sleep_domain(dev, genpd)) { timed = false; goto out; } @@ -586,7 +653,8 @@ static int genpd_runtime_resume(struct device *dev) err_stop: genpd_stop_dev(genpd, dev); err_poweroff: - if (!dev->power.irq_safe) { + if (!pm_runtime_is_irq_safe(dev) || + (pm_runtime_is_irq_safe(dev) && genpd_is_irq_safe(genpd))) { genpd_lock(genpd); genpd_poweroff(genpd, 0); genpd_unlock(genpd); @@ -1223,6 +1291,17 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd, || genpd == subdomain) return -EINVAL; + /* + * If the domain can be powered on/off in an IRQ safe + * context, ensure that the subdomain can also be + * powered on/off in that context. + */ + if (!genpd_is_irq_safe(genpd) && genpd_is_irq_safe(subdomain)) { + WARN("Parent %s of subdomain %s must be IRQ safe\n", + genpd->name, subdomain->name); + return -EINVAL; + } + link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) return -ENOMEM; @@ -1337,6 +1416,17 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd) return 0; } +static void genpd_lock_init(struct generic_pm_domain *genpd) +{ + if (genpd->flags & GENPD_FLAG_IRQ_SAFE) { + spin_lock_init(&genpd->slock); + genpd->lock_ops = &genpd_spin_ops; + } else { + mutex_init(&genpd->mlock); + genpd->lock_ops = &genpd_mtx_ops; + } +} + /** * pm_genpd_init - Initialize a generic I/O PM domain object. * @genpd: PM domain object to initialize. @@ -1356,8 +1446,7 @@ int pm_genpd_init(struct generic_pm_domain *genpd, INIT_LIST_HEAD(&genpd->master_links); INIT_LIST_HEAD(&genpd->slave_links); INIT_LIST_HEAD(&genpd->dev_list); - mutex_init(&genpd->mlock); - genpd->lock_ops = &genpd_mtx_ops; + genpd_lock_init(genpd); genpd->gov = gov; INIT_WORK(&genpd->power_off_work, genpd_power_off_work_fn); atomic_set(&genpd->sd_count, 0); @@ -2131,7 +2220,9 @@ static int pm_genpd_summary_one(struct seq_file *s, } list_for_each_entry(pm_data, &genpd->dev_list, list_node) { - kobj_path = kobject_get_path(&pm_data->dev->kobj, GFP_KERNEL); + kobj_path = kobject_get_path(&pm_data->dev->kobj, + genpd_is_irq_safe(genpd) ? + GFP_ATOMIC : GFP_KERNEL); if (kobj_path == NULL) continue; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 811b968eb740..81ece61075df 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -15,9 +15,11 @@ #include #include #include +#include /* Defines used for the flags field in the struct generic_pm_domain */ #define GENPD_FLAG_PM_CLK (1U << 0) /* PM domain uses PM clk */ +#define GENPD_FLAG_IRQ_SAFE (1U << 1) /* PM domain operates in atomic */ enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ @@ -76,7 +78,13 @@ struct generic_pm_domain { unsigned int state_idx; /* state that genpd will go to when off */ void *free; /* Free the state that was allocated for default */ const struct genpd_lock_ops *lock_ops; - struct mutex mlock; + union { + struct mutex mlock; + struct { + spinlock_t slock; + unsigned long lock_flags; + }; + }; }; -- cgit v1.2.3 From 411bf2a8db76d7ed6f710df77121e5113ee8fe64 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Mon, 24 Oct 2016 14:38:47 -0700 Subject: PM / doc: Update device documentation for devices in IRQ-safe PM domains Update documentation to reflect the changes made to support IRQ safe PM domains. Suggested-by: Rafael J. Wysocki Signed-off-by: Lina Iyer Signed-off-by: Rafael J. Wysocki --- Documentation/power/devices.txt | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 8ba6625fdd63..73ddea39a9ce 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -607,7 +607,9 @@ individually. Instead, a set of devices sharing a power resource can be put into a low-power state together at the same time by turning off the shared power resource. Of course, they also need to be put into the full-power state together, by turning the shared power resource on. A set of devices with this -property is often referred to as a power domain. +property is often referred to as a power domain. A power domain may also be +nested inside another power domain. The nested domain is referred to as the +sub-domain of the parent domain. Support for power domains is provided through the pm_domain field of struct device. This field is a pointer to an object of type struct dev_pm_domain, @@ -629,6 +631,16 @@ support for power domains into subsystem-level callbacks, for example by modifying the platform bus type. Other platforms need not implement it or take it into account in any way. +Devices may be defined as IRQ-safe which indicates to the PM core that their +runtime PM callbacks may be invoked with disabled interrupts (see +Documentation/power/runtime_pm.txt for more information). If an IRQ-safe +device belongs to a PM domain, the runtime PM of the domain will be +disallowed, unless the domain itself is defined as IRQ-safe. However, it +makes sense to define a PM domain as IRQ-safe only if all the devices in it +are IRQ-safe. Moreover, if an IRQ-safe domain has a parent domain, the runtime +PM of the parent is only allowed if the parent itself is IRQ-safe too with the +additional restriction that all child domains of an IRQ-safe parent must also +be IRQ-safe. Device Low Power (suspend) States --------------------------------- -- cgit v1.2.3 From a1fee00dc95644e0590b4f1a3755c9f6b1243b3a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 25 Oct 2016 17:33:27 +0100 Subject: PM / Domains: check for negative return from of_count_phandle_with_args() The return from of_count_phandle_with_args can be negative, so we should avoid kcalloc of a negative count of genpd_power_stat structs by sanity checking if count is zero or less. Signed-off-by: Colin Ian King Acked-by: Kevin Hilman Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index aac656a889dc..661737c2bae0 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2113,7 +2113,7 @@ int of_genpd_parse_idle_states(struct device_node *dn, struct of_phandle_iterator it; count = of_count_phandle_with_args(dn, "domain-idle-states", NULL); - if (!count) + if (count <= 0) return -EINVAL; st = kcalloc(count, sizeof(*st), GFP_KERNEL); -- cgit v1.2.3 From 650ec6cfe323311165f0c7b6882a63dfd7ae8b63 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 25 Oct 2016 09:21:24 +0200 Subject: cpufreq: enable the DT cpufreq driver on the Integrators This enables the generic DT and OPP-based cpufreq driver on the ARM Integrator/AP and Integrator/CP. Acked-by: Viresh Kumar Signed-off-by: Linus Walleij Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt-platdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 71267626456b..142a6f615e52 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -26,6 +26,9 @@ static const struct of_device_id machines[] __initconst = { { .compatible = "allwinner,sun8i-a83t", }, { .compatible = "allwinner,sun8i-h3", }, + { .compatible = "arm,integrator-ap", }, + { .compatible = "arm,integrator-cp", }, + { .compatible = "hisilicon,hi6220", }, { .compatible = "fsl,imx27", }, -- cgit v1.2.3 From ae8b8d8f86a03c19c5ecfd848609b2e9438f1cf2 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 25 Oct 2016 09:21:25 +0200 Subject: cpufreq: retire the Integrator cpufreq driver After switching the core module clocks controlling the Integrator clock frequencies to the common clock framework, defining the operating points in the device tree, and activating the generic DT-based CPUfreq driver, we can retire the old Integrator cpufreq driver. Acked-by: Viresh Kumar Signed-off-by: Linus Walleij Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 8 -- drivers/cpufreq/Makefile | 1 - drivers/cpufreq/integrator-cpufreq.c | 239 ----------------------------------- 3 files changed, 248 deletions(-) delete mode 100644 drivers/cpufreq/integrator-cpufreq.c diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index d89b8afe23b6..fdbc630272b3 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -60,14 +60,6 @@ config ARM_IMX6Q_CPUFREQ If in doubt, say N. -config ARM_INTEGRATOR - tristate "CPUfreq driver for ARM Integrator CPUs" - depends on ARCH_INTEGRATOR - default y - help - This enables the CPUfreq driver for ARM Integrator CPUs. - If in doubt, say Y. - config ARM_KIRKWOOD_CPUFREQ def_bool MACH_KIRKWOOD help diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 0a9b6a093646..7dde82179d62 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -56,7 +56,6 @@ obj-$(CONFIG_UX500_SOC_DB8500) += dbx500-cpufreq.o obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ) += exynos5440-cpufreq.o obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o -obj-$(CONFIG_ARM_INTEGRATOR) += integrator-cpufreq.o obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o obj-$(CONFIG_ARM_MT8173_CPUFREQ) += mt8173-cpufreq.o obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o diff --git a/drivers/cpufreq/integrator-cpufreq.c b/drivers/cpufreq/integrator-cpufreq.c deleted file mode 100644 index 79e3ff2771a6..000000000000 --- a/drivers/cpufreq/integrator-cpufreq.c +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (C) 2001-2002 Deep Blue Solutions Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * CPU support functions - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -static void __iomem *cm_base; -/* The cpufreq driver only use the OSC register */ -#define INTEGRATOR_HDR_OSC_OFFSET 0x08 -#define INTEGRATOR_HDR_LOCK_OFFSET 0x14 - -static struct cpufreq_driver integrator_driver; - -static const struct icst_params lclk_params = { - .ref = 24000000, - .vco_max = ICST525_VCO_MAX_5V, - .vco_min = ICST525_VCO_MIN, - .vd_min = 8, - .vd_max = 132, - .rd_min = 24, - .rd_max = 24, - .s2div = icst525_s2div, - .idx2s = icst525_idx2s, -}; - -static const struct icst_params cclk_params = { - .ref = 24000000, - .vco_max = ICST525_VCO_MAX_5V, - .vco_min = ICST525_VCO_MIN, - .vd_min = 12, - .vd_max = 160, - .rd_min = 24, - .rd_max = 24, - .s2div = icst525_s2div, - .idx2s = icst525_idx2s, -}; - -/* - * Validate the speed policy. - */ -static int integrator_verify_policy(struct cpufreq_policy *policy) -{ - struct icst_vco vco; - - cpufreq_verify_within_cpu_limits(policy); - - vco = icst_hz_to_vco(&cclk_params, policy->max * 1000); - policy->max = icst_hz(&cclk_params, vco) / 1000; - - vco = icst_hz_to_vco(&cclk_params, policy->min * 1000); - policy->min = icst_hz(&cclk_params, vco) / 1000; - - cpufreq_verify_within_cpu_limits(policy); - return 0; -} - - -static int integrator_set_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - cpumask_t cpus_allowed; - int cpu = policy->cpu; - struct icst_vco vco; - struct cpufreq_freqs freqs; - u_int cm_osc; - - /* - * Save this threads cpus_allowed mask. - */ - cpus_allowed = current->cpus_allowed; - - /* - * Bind to the specified CPU. When this call returns, - * we should be running on the right CPU. - */ - set_cpus_allowed_ptr(current, cpumask_of(cpu)); - BUG_ON(cpu != smp_processor_id()); - - /* get current setting */ - cm_osc = __raw_readl(cm_base + INTEGRATOR_HDR_OSC_OFFSET); - - if (machine_is_integrator()) - vco.s = (cm_osc >> 8) & 7; - else if (machine_is_cintegrator()) - vco.s = 1; - vco.v = cm_osc & 255; - vco.r = 22; - freqs.old = icst_hz(&cclk_params, vco) / 1000; - - /* icst_hz_to_vco rounds down -- so we need the next - * larger freq in case of CPUFREQ_RELATION_L. - */ - if (relation == CPUFREQ_RELATION_L) - target_freq += 999; - if (target_freq > policy->max) - target_freq = policy->max; - vco = icst_hz_to_vco(&cclk_params, target_freq * 1000); - freqs.new = icst_hz(&cclk_params, vco) / 1000; - - if (freqs.old == freqs.new) { - set_cpus_allowed_ptr(current, &cpus_allowed); - return 0; - } - - cpufreq_freq_transition_begin(policy, &freqs); - - cm_osc = __raw_readl(cm_base + INTEGRATOR_HDR_OSC_OFFSET); - - if (machine_is_integrator()) { - cm_osc &= 0xfffff800; - cm_osc |= vco.s << 8; - } else if (machine_is_cintegrator()) { - cm_osc &= 0xffffff00; - } - cm_osc |= vco.v; - - __raw_writel(0xa05f, cm_base + INTEGRATOR_HDR_LOCK_OFFSET); - __raw_writel(cm_osc, cm_base + INTEGRATOR_HDR_OSC_OFFSET); - __raw_writel(0, cm_base + INTEGRATOR_HDR_LOCK_OFFSET); - - /* - * Restore the CPUs allowed mask. - */ - set_cpus_allowed_ptr(current, &cpus_allowed); - - cpufreq_freq_transition_end(policy, &freqs, 0); - - return 0; -} - -static unsigned int integrator_get(unsigned int cpu) -{ - cpumask_t cpus_allowed; - unsigned int current_freq; - u_int cm_osc; - struct icst_vco vco; - - cpus_allowed = current->cpus_allowed; - - set_cpus_allowed_ptr(current, cpumask_of(cpu)); - BUG_ON(cpu != smp_processor_id()); - - /* detect memory etc. */ - cm_osc = __raw_readl(cm_base + INTEGRATOR_HDR_OSC_OFFSET); - - if (machine_is_integrator()) - vco.s = (cm_osc >> 8) & 7; - else - vco.s = 1; - vco.v = cm_osc & 255; - vco.r = 22; - - current_freq = icst_hz(&cclk_params, vco) / 1000; /* current freq */ - - set_cpus_allowed_ptr(current, &cpus_allowed); - - return current_freq; -} - -static int integrator_cpufreq_init(struct cpufreq_policy *policy) -{ - - /* set default policy and cpuinfo */ - policy->max = policy->cpuinfo.max_freq = 160000; - policy->min = policy->cpuinfo.min_freq = 12000; - policy->cpuinfo.transition_latency = 1000000; /* 1 ms, assumed */ - - return 0; -} - -static struct cpufreq_driver integrator_driver = { - .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, - .verify = integrator_verify_policy, - .target = integrator_set_target, - .get = integrator_get, - .init = integrator_cpufreq_init, - .name = "integrator", -}; - -static int __init integrator_cpufreq_probe(struct platform_device *pdev) -{ - struct resource *res; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENODEV; - - cm_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); - if (!cm_base) - return -ENODEV; - - return cpufreq_register_driver(&integrator_driver); -} - -static int __exit integrator_cpufreq_remove(struct platform_device *pdev) -{ - return cpufreq_unregister_driver(&integrator_driver); -} - -static const struct of_device_id integrator_cpufreq_match[] = { - { .compatible = "arm,core-module-integrator"}, - { }, -}; - -MODULE_DEVICE_TABLE(of, integrator_cpufreq_match); - -static struct platform_driver integrator_cpufreq_driver = { - .driver = { - .name = "integrator-cpufreq", - .of_match_table = integrator_cpufreq_match, - }, - .remove = __exit_p(integrator_cpufreq_remove), -}; - -module_platform_driver_probe(integrator_cpufreq_driver, - integrator_cpufreq_probe); - -MODULE_AUTHOR("Russell M. King"); -MODULE_DESCRIPTION("cpufreq driver for ARM Integrator CPUs"); -MODULE_LICENSE("GPL"); -- cgit v1.2.3 From eae48f046ffa117afb782cd9b3ae5469df0042e2 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 25 Oct 2016 13:20:40 -0700 Subject: cpufreq: intel_pstate: Per CPU P-State limits Intel P-State offers two interface to set performance limits: - Intel P-State sysfs /sys/devices/system/cpu/intel_pstate/max_perf_pct /sys/devices/system/cpu/intel_pstate/min_perf_pct - cpufreq /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq /sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq In the current implementation both of the above methods, change limits to every CPU in the system. Moreover the limits placed using cpufreq policy interface also presented in the Intel P-State sysfs via modified max_perf_pct and min_per_pct during sysfs reads. This allows to check percent of reduced/increased performance, irrespective of method used to limit. There are some new generations of processors, where it is possible to have limits placed on individual CPU cores. Using cpufreq interface it is possible to set limits on each CPU. But the current processing will use last limits placed on all CPUs. So the per core limit feature of CPUs can't be used. This change brings in capability to set P-States limits for each CPU, with some limitations. In this case what should be the read of max_perf_pct and min_perf_pct? It can be most restrictive limits placed on any CPU or max possible performance on any given CPU on which no limits are placed. In either case someone will have issue. So the consensus is, we can't have both sysfs controls present when user wants to use limit per core limits. - By default per-core-control feature is not enabled. So no one will notice any difference. - The way to enable is by kernel command line intel_pstate=per_cpu_perf_limits - When the per-core-controls are enabled there is no display of for both read and write on /sys/devices/system/cpu/intel_pstate/max_perf_pct /sys/devices/system/cpu/intel_pstate/min_perf_pct - User can change limits using /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq /sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor - User can still observe turbo percent and number of P-States from /sys/devices/system/cpu/intel_pstate/turbo_pct /sys/devices/system/cpu/intel_pstate/num_pstates - User can read write system wide turbo status /sys/devices/system/cpu/no_turbo While changing this BUG_ON is changed to WARN_ON, as they are not fatal errors for the system. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 236 +++++++++++++++++++++++++++-------------- 1 file changed, 156 insertions(+), 80 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index d7a9195a8351..b6e9b49bf151 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -176,6 +176,48 @@ struct _pid { int32_t last_err; }; +/** + * struct perf_limits - Store user and policy limits + * @no_turbo: User requested turbo state from intel_pstate sysfs + * @turbo_disabled: Platform turbo status either from msr + * MSR_IA32_MISC_ENABLE or when maximum available pstate + * matches the maximum turbo pstate + * @max_perf_pct: Effective maximum performance limit in percentage, this + * is minimum of either limits enforced by cpufreq policy + * or limits from user set limits via intel_pstate sysfs + * @min_perf_pct: Effective minimum performance limit in percentage, this + * is maximum of either limits enforced by cpufreq policy + * or limits from user set limits via intel_pstate sysfs + * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct + * This value is used to limit max pstate + * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct + * This value is used to limit min pstate + * @max_policy_pct: The maximum performance in percentage enforced by + * cpufreq setpolicy interface + * @max_sysfs_pct: The maximum performance in percentage enforced by + * intel pstate sysfs interface, unused when per cpu + * controls are enforced + * @min_policy_pct: The minimum performance in percentage enforced by + * cpufreq setpolicy interface + * @min_sysfs_pct: The minimum performance in percentage enforced by + * intel pstate sysfs interface, unused when per cpu + * controls are enforced + * + * Storage for user and policy defined limits. + */ +struct perf_limits { + int no_turbo; + int turbo_disabled; + int max_perf_pct; + int min_perf_pct; + int32_t max_perf; + int32_t min_perf; + int max_policy_pct; + int max_sysfs_pct; + int min_policy_pct; + int min_sysfs_pct; +}; + /** * struct cpudata - Per CPU instance data storage * @cpu: CPU number for this instance data @@ -194,6 +236,9 @@ struct _pid { * @prev_cummulative_iowait: IO Wait time difference from last and * current sample * @sample: Storage for storing last Sample data + * @perf_limits: Pointer to perf_limit unique to this CPU + * Not all field in the structure are applicable + * when per cpu controls are enforced * @acpi_perf_data: Stores ACPI perf information read from _PSS * @valid_pss_table: Set to true for valid ACPI _PSS entries found * @@ -217,6 +262,7 @@ struct cpudata { u64 prev_tsc; u64 prev_cummulative_iowait; struct sample sample; + struct perf_limits *perf_limits; #ifdef CONFIG_ACPI struct acpi_processor_performance acpi_perf_data; bool valid_pss_table; @@ -289,51 +335,12 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu); static struct pstate_adjust_policy pid_params __read_mostly; static struct pstate_funcs pstate_funcs __read_mostly; static int hwp_active __read_mostly; +static bool per_cpu_limits __read_mostly; #ifdef CONFIG_ACPI static bool acpi_ppc; #endif -/** - * struct perf_limits - Store user and policy limits - * @no_turbo: User requested turbo state from intel_pstate sysfs - * @turbo_disabled: Platform turbo status either from msr - * MSR_IA32_MISC_ENABLE or when maximum available pstate - * matches the maximum turbo pstate - * @max_perf_pct: Effective maximum performance limit in percentage, this - * is minimum of either limits enforced by cpufreq policy - * or limits from user set limits via intel_pstate sysfs - * @min_perf_pct: Effective minimum performance limit in percentage, this - * is maximum of either limits enforced by cpufreq policy - * or limits from user set limits via intel_pstate sysfs - * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct - * This value is used to limit max pstate - * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct - * This value is used to limit min pstate - * @max_policy_pct: The maximum performance in percentage enforced by - * cpufreq setpolicy interface - * @max_sysfs_pct: The maximum performance in percentage enforced by - * intel pstate sysfs interface - * @min_policy_pct: The minimum performance in percentage enforced by - * cpufreq setpolicy interface - * @min_sysfs_pct: The minimum performance in percentage enforced by - * intel pstate sysfs interface - * - * Storage for user and policy defined limits. - */ -struct perf_limits { - int no_turbo; - int turbo_disabled; - int max_perf_pct; - int min_perf_pct; - int32_t max_perf; - int32_t min_perf; - int max_policy_pct; - int max_sysfs_pct; - int min_policy_pct; - int min_sysfs_pct; -}; - static struct perf_limits performance_limits = { .no_turbo = 0, .turbo_disabled = 0, @@ -560,21 +567,30 @@ static inline void update_turbo_state(void) static void intel_pstate_hwp_set(const struct cpumask *cpumask) { int min, hw_min, max, hw_max, cpu, range, adj_range; + struct perf_limits *perf_limits = limits; u64 value, cap; for_each_cpu(cpu, cpumask) { + int max_perf_pct, min_perf_pct; + + if (per_cpu_limits) + perf_limits = all_cpu_data[cpu]->perf_limits; + rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); hw_min = HWP_LOWEST_PERF(cap); hw_max = HWP_HIGHEST_PERF(cap); range = hw_max - hw_min; + max_perf_pct = perf_limits->max_perf_pct; + min_perf_pct = perf_limits->min_perf_pct; + rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); - adj_range = limits->min_perf_pct * range / 100; + adj_range = min_perf_pct * range / 100; min = hw_min + adj_range; value &= ~HWP_MIN_PERF(~0L); value |= HWP_MIN_PERF(min); - adj_range = limits->max_perf_pct * range / 100; + adj_range = max_perf_pct * range / 100; max = hw_min + adj_range; if (limits->no_turbo) { hw_max = HWP_GUARANTEED_PERF(cap); @@ -787,8 +803,6 @@ define_one_global_ro(num_pstates); static struct attribute *intel_pstate_attributes[] = { &no_turbo.attr, - &max_perf_pct.attr, - &min_perf_pct.attr, &turbo_pct.attr, &num_pstates.attr, NULL @@ -805,9 +819,26 @@ static void __init intel_pstate_sysfs_expose_params(void) intel_pstate_kobject = kobject_create_and_add("intel_pstate", &cpu_subsys.dev_root->kobj); - BUG_ON(!intel_pstate_kobject); + if (WARN_ON(!intel_pstate_kobject)) + return; + rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group); - BUG_ON(rc); + if (WARN_ON(rc)) + return; + + /* + * If per cpu limits are enforced there are no global limits, so + * return without creating max/min_perf_pct attributes + */ + if (per_cpu_limits) + return; + + rc = sysfs_create_file(intel_pstate_kobject, &max_perf_pct.attr); + WARN_ON(rc); + + rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr); + WARN_ON(rc); + } /************************** sysfs end ************************/ @@ -1124,20 +1155,24 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) int max_perf = cpu->pstate.turbo_pstate; int max_perf_adj; int min_perf; + struct perf_limits *perf_limits = limits; if (limits->no_turbo || limits->turbo_disabled) max_perf = cpu->pstate.max_pstate; + if (per_cpu_limits) + perf_limits = cpu->perf_limits; + /* * performance can be limited by user through sysfs, by cpufreq * policy, or by cpu specific default values determined through * experimentation. */ - max_perf_adj = fp_toint(max_perf * limits->max_perf); + max_perf_adj = fp_toint(max_perf * perf_limits->max_perf); *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); - min_perf = fp_toint(max_perf * limits->min_perf); + min_perf = fp_toint(max_perf * perf_limits->min_perf); *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); } @@ -1421,11 +1456,23 @@ static int intel_pstate_init_cpu(unsigned int cpunum) { struct cpudata *cpu; - if (!all_cpu_data[cpunum]) - all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), - GFP_KERNEL); - if (!all_cpu_data[cpunum]) - return -ENOMEM; + cpu = all_cpu_data[cpunum]; + + if (!cpu) { + unsigned int size = sizeof(struct cpudata); + + if (per_cpu_limits) + size += sizeof(struct perf_limits); + + cpu = kzalloc(size, GFP_KERNEL); + if (!cpu) + return -ENOMEM; + + all_cpu_data[cpunum] = cpu; + if (per_cpu_limits) + cpu->perf_limits = (struct perf_limits *)(cpu + 1); + + } cpu = all_cpu_data[cpunum]; @@ -1493,9 +1540,40 @@ static void intel_pstate_set_performance_limits(struct perf_limits *limits) limits->min_sysfs_pct = 0; } +static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, + struct perf_limits *limits) +{ + limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; + limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0, 100); + limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, + policy->cpuinfo.max_freq); + limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100); + + /* Normalize user input to [min_policy_pct, max_policy_pct] */ + limits->min_perf_pct = max(limits->min_policy_pct, + limits->min_sysfs_pct); + limits->min_perf_pct = min(limits->max_policy_pct, + limits->min_perf_pct); + limits->max_perf_pct = min(limits->max_policy_pct, + limits->max_sysfs_pct); + limits->max_perf_pct = max(limits->min_policy_pct, + limits->max_perf_pct); + + /* Make sure min_perf_pct <= max_perf_pct */ + limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); + + limits->min_perf = div_fp(limits->min_perf_pct, 100); + limits->max_perf = div_fp(limits->max_perf_pct, 100); + limits->max_perf = round_up(limits->max_perf, FRAC_BITS); + + pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu, + limits->max_perf_pct, limits->min_perf_pct); +} + static int intel_pstate_set_policy(struct cpufreq_policy *policy) { struct cpudata *cpu; + struct perf_limits *perf_limits = NULL; if (!policy->cpuinfo.max_freq) return -ENODEV; @@ -1513,41 +1591,29 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) policy->max = policy->cpuinfo.max_freq; } - if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { - limits = &performance_limits; + if (per_cpu_limits) + perf_limits = cpu->perf_limits; + + if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + if (!perf_limits) { + limits = &performance_limits; + perf_limits = limits; + } if (policy->max >= policy->cpuinfo.max_freq) { pr_debug("set performance\n"); - intel_pstate_set_performance_limits(limits); + intel_pstate_set_performance_limits(perf_limits); goto out; } } else { pr_debug("set powersave\n"); - limits = &powersave_limits; - } - - limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; - limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100); - limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, - policy->cpuinfo.max_freq); - limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100); - - /* Normalize user input to [min_policy_pct, max_policy_pct] */ - limits->min_perf_pct = max(limits->min_policy_pct, - limits->min_sysfs_pct); - limits->min_perf_pct = min(limits->max_policy_pct, - limits->min_perf_pct); - limits->max_perf_pct = min(limits->max_policy_pct, - limits->max_sysfs_pct); - limits->max_perf_pct = max(limits->min_policy_pct, - limits->max_perf_pct); - - /* Make sure min_perf_pct <= max_perf_pct */ - limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); + if (!perf_limits) { + limits = &powersave_limits; + perf_limits = limits; + } - limits->min_perf = div_fp(limits->min_perf_pct, 100); - limits->max_perf = div_fp(limits->max_perf_pct, 100); - limits->max_perf = round_up(limits->max_perf, FRAC_BITS); + } + intel_pstate_update_perf_limits(policy, perf_limits); out: if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { /* @@ -1607,6 +1673,14 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) else policy->policy = CPUFREQ_POLICY_POWERSAVE; + /* + * We need sane value in the cpu->perf_limits, so inherit from global + * perf_limits limits, which are seeded with values based on the + * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up. + */ + if (per_cpu_limits) + memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits)); + policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; @@ -1888,6 +1962,8 @@ static int __init intel_pstate_setup(char *str) force_load = 1; if (!strcmp(str, "hwp_only")) hwp_only = 1; + if (!strcmp(str, "per_cpu_perf_limits")) + per_cpu_limits = true; #ifdef CONFIG_ACPI if (!strcmp(str, "support_acpi_ppc")) -- cgit v1.2.3 From 5879f877398a2a5e5006c6e16a4288e9d4c308a1 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 25 Oct 2016 13:20:41 -0700 Subject: cpufreq: intel_pstate: Reduce impact due to rounding error When policy->max and policy->min are same, in some cases they don't result in the same frequency cap. The max_policy_pct is rounded up but not min_perf_pct. So even when they are same, results in different percentage or maximum and minimum. Since minimum is a conservative value for power, a lower value without rounding is better in most of the cases, unless user wants policy->max = policy->min. This change uses use the same policy percentage when policy->max and policy->min are same. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b6e9b49bf151..8e7390bb2175 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1543,11 +1543,17 @@ static void intel_pstate_set_performance_limits(struct perf_limits *limits) static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, struct perf_limits *limits) { - limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; - limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0, 100); limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, policy->cpuinfo.max_freq); limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100); + if (policy->max == policy->min) { + limits->min_policy_pct = limits->max_policy_pct; + } else { + limits->min_policy_pct = (policy->min * 100) / + policy->cpuinfo.max_freq; + limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, + 0, 100); + } /* Normalize user input to [min_policy_pct, max_policy_pct] */ limits->min_perf_pct = max(limits->min_policy_pct, -- cgit v1.2.3 From 9e472f95e480d6746ed8646634e6b11994207015 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 25 Oct 2016 13:20:42 -0700 Subject: Documentation: intel_pstate: Update per core limits Document restriction on per core P-State control. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/intel-pstate.txt | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt index 5528d6d59687..198b2ef88aec 100644 --- a/Documentation/cpu-freq/intel-pstate.txt +++ b/Documentation/cpu-freq/intel-pstate.txt @@ -48,7 +48,7 @@ In addition to the frequency-controlling interfaces provided by the cpufreq core, the driver provides its own sysfs files to control the P-State selection. These files have been added to /sys/devices/system/cpu/intel_pstate/. Any changes made to these files are applicable to all CPUs (even in a -multi-package system). +multi-package system, Refer to later section on placing "Per-CPU limits"). max_perf_pct: Limits the maximum P-State that will be requested by the driver. It states it as a percentage of the available performance. The @@ -120,6 +120,25 @@ frequency is fictional for Intel Core processors. Even if the scaling driver selects a single P-State, the actual frequency the processor will run at is selected by the processor itself. +Per-CPU limits + +The kernel command line option "intel_pstate=per_cpu_perf_limits" forces +the intel_pstate driver to use per-CPU performance limits. When it is set, +the sysfs control interface described above is subject to limitations. +- The following controls are not available for both read and write + /sys/devices/system/cpu/intel_pstate/max_perf_pct + /sys/devices/system/cpu/intel_pstate/min_perf_pct +- The following controls can be used to set performance limits, as far as the +architecture of the processor permits: + /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq + /sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq + /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor +- User can still observe turbo percent and number of P-States from + /sys/devices/system/cpu/intel_pstate/turbo_pct + /sys/devices/system/cpu/intel_pstate/num_pstates +- User can read write system wide turbo status + /sys/devices/system/cpu/no_turbo + Tuning Intel P-State driver When the performance can be tuned using PID (Proportional Integral -- cgit v1.2.3 From 22449c0cba0e68489e5b7b033215c8dc6effde35 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 25 Oct 2016 13:20:43 -0700 Subject: Documentation: kernel parameters: per core P-State control Additional command line control to enable per core performance control. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- Documentation/kernel-parameters.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 37babf91f2cb..fa2336ec1f56 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1780,6 +1780,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Description Table, specifies preferred power management profile as "Enterprise Server" or "Performance Server", then this feature is turned on by default. + per_cpu_perf_limits + Allow per-logical-CPU P-State performance control limits using + cpufreq sysfs interface intremap= [X86-64, Intel-IOMMU] on enable Interrupt Remapping (default) -- cgit v1.2.3 From 1758b3374bc5b56f60fc07ccc37863a64fc67ecc Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 27 Oct 2016 01:41:33 +0900 Subject: cpufreq: dt: add Socionext UniPhier SoCs support Add compatible strings for Pro5, PXs2, LD6b, LD11, LD20 SoCs to use the generic cpufreq driver. Signed-off-by: Masahiro Yamada Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt-platdev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 142a6f615e52..fba3f61f320c 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -75,6 +75,12 @@ static const struct of_device_id machines[] __initconst = { { .compatible = "sigma,tango4" }, + { .compatible = "socionext,uniphier-pro5", }, + { .compatible = "socionext,uniphier-pxs2", }, + { .compatible = "socionext,uniphier-ld6b", }, + { .compatible = "socionext,uniphier-ld11", }, + { .compatible = "socionext,uniphier-ld20", }, + { .compatible = "ti,am33xx", }, { .compatible = "ti,dra7", }, { .compatible = "ti,omap2", }, -- cgit v1.2.3 From bb446b574e1a0c7fcaeb4ff59a3693e3194f57fe Mon Sep 17 00:00:00 2001 From: Markus Mayer Date: Thu, 27 Oct 2016 14:05:34 -0700 Subject: dt: cpufreq: brcm: New binding document for brcmstb-avs-cpufreq Add the binding document for the new brcmstb-avs-cpufreq driver. Signed-off-by: Markus Mayer Acked-by: Viresh Kumar Acked-by: Rob Herring Signed-off-by: Rafael J. Wysocki --- .../bindings/cpufreq/brcm,stb-avs-cpu-freq.txt | 78 ++++++++++++++++++++++ MAINTAINERS | 7 ++ 2 files changed, 85 insertions(+) create mode 100644 Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt diff --git a/Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt b/Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt new file mode 100644 index 000000000000..af2385795d78 --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt @@ -0,0 +1,78 @@ +Broadcom AVS mail box and interrupt register bindings +===================================================== + +A total of three DT nodes are required. One node (brcm,avs-cpu-data-mem) +references the mailbox register used to communicate with the AVS CPU[1]. The +second node (brcm,avs-cpu-l2-intr) is required to trigger an interrupt on +the AVS CPU. The interrupt tells the AVS CPU that it needs to process a +command sent to it by a driver. Interrupting the AVS CPU is mandatory for +commands to be processed. + +The interface also requires a reference to the AVS host interrupt controller, +so a driver can react to interrupts generated by the AVS CPU whenever a command +has been processed. See [2] for more information on the brcm,l2-intc node. + +[1] The AVS CPU is an independent co-processor that runs proprietary +firmware. On some SoCs, this firmware supports DFS and DVFS in addition to +Adaptive Voltage Scaling. + +[2] Documentation/devicetree/bindings/interrupt-controller/brcm,l2-intc.txt + + +Node brcm,avs-cpu-data-mem +-------------------------- + +Required properties: +- compatible: must include: brcm,avs-cpu-data-mem and + should include: one of brcm,bcm7271-avs-cpu-data-mem or + brcm,bcm7268-avs-cpu-data-mem +- reg: Specifies base physical address and size of the registers. +- interrupts: The interrupt that the AVS CPU will use to interrupt the host + when a command completed. +- interrupt-parent: The interrupt controller the above interrupt is routed + through. +- interrupt-names: The name of the interrupt used to interrupt the host. + +Optional properties: +- None + +Node brcm,avs-cpu-l2-intr +------------------------- + +Required properties: +- compatible: must include: brcm,avs-cpu-l2-intr and + should include: one of brcm,bcm7271-avs-cpu-l2-intr or + brcm,bcm7268-avs-cpu-l2-intr +- reg: Specifies base physical address and size of the registers. + +Optional properties: +- None + + +Example +======= + + avs_host_l2_intc: interrupt-controller@f04d1200 { + #interrupt-cells = <1>; + compatible = "brcm,l2-intc"; + interrupt-parent = <&intc>; + reg = <0xf04d1200 0x48>; + interrupt-controller; + interrupts = <0x0 0x19 0x0>; + interrupt-names = "avs"; + }; + + avs-cpu-data-mem@f04c4000 { + compatible = "brcm,bcm7271-avs-cpu-data-mem", + "brcm,avs-cpu-data-mem"; + reg = <0xf04c4000 0x60>; + interrupts = <0x1a>; + interrupt-parent = <&avs_host_l2_intc>; + interrupt-names = "sw_intr"; + }; + + avs-cpu-l2-intr@f04d1100 { + compatible = "brcm,bcm7271-avs-cpu-l2-intr", + "brcm,avs-cpu-l2-intr"; + reg = <0xf04d1100 0x10>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 1cd38a7e0064..9379aca4bec4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2738,6 +2738,13 @@ L: bcm-kernel-feedback-list@broadcom.com S: Maintained F: drivers/mtd/nand/brcmnand/ +BROADCOM STB AVS CPUFREQ DRIVER +M: Markus Mayer +M: bcm-kernel-feedback-list@broadcom.com +L: linux-pm@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt + BROADCOM SPECIFIC AMBA DRIVER (BCMA) M: Rafał Miłecki L: linux-wireless@vger.kernel.org -- cgit v1.2.3 From de322e085995b9417582d6f72229dadb5c09d163 Mon Sep 17 00:00:00 2001 From: Markus Mayer Date: Thu, 27 Oct 2016 14:05:35 -0700 Subject: cpufreq: brcmstb-avs-cpufreq: AVS CPUfreq driver for Broadcom STB SoCs This driver supports voltage and frequency scaling on Broadcom STB SoCs using AVS firmware with DFS and DVFS support. Actual frequency or voltage scaling is done exclusively by the AVS firmware. The driver merely provides a standard CPUfreq interface to other kernel components and userland, and instructs the AVS firmware to perform frequency or voltage changes on its behalf. Signed-off-by: Markus Mayer Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 1 + drivers/cpufreq/Kconfig.arm | 11 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/brcmstb-avs-cpufreq.c | 736 ++++++++++++++++++++++++++++++++++ 4 files changed, 749 insertions(+) create mode 100644 drivers/cpufreq/brcmstb-avs-cpufreq.c diff --git a/MAINTAINERS b/MAINTAINERS index 9379aca4bec4..d0ca1198d8ab 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2744,6 +2744,7 @@ M: bcm-kernel-feedback-list@broadcom.com L: linux-pm@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/cpufreq/brcm,stb-avs-cpu-freq.txt +F: drivers/cpufreq/brcmstb* BROADCOM SPECIFIC AMBA DRIVER (BCMA) M: Rafał Miłecki diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index fdbc630272b3..04f7862eacd3 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -12,6 +12,17 @@ config ARM_BIG_LITTLE_CPUFREQ help This enables the Generic CPUfreq driver for ARM big.LITTLE platforms. +config ARM_BRCMSTB_AVS_CPUFREQ + tristate "Broadcom STB AVS CPUfreq driver" + depends on ARCH_BRCMSTB || COMPILE_TEST + default y + help + Some Broadcom STB SoCs use a co-processor running proprietary firmware + ("AVS") to handle voltage and frequency scaling. This driver provides + a standard CPUfreq interface to to the firmware. + + Say Y, if you have a Broadcom SoC with AVS support for DFS or DVFS. + config ARM_DT_BL_CPUFREQ tristate "Generic probing via DT for ARM big LITTLE CPUfreq driver" depends on ARM_BIG_LITTLE_CPUFREQ && OF diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 7dde82179d62..1e46c3918e7a 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -51,6 +51,7 @@ obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ) += arm_big_little.o # LITTLE drivers, so that it is probed last. obj-$(CONFIG_ARM_DT_BL_CPUFREQ) += arm_big_little_dt.o +obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ) += brcmstb-avs-cpufreq.o obj-$(CONFIG_ARCH_DAVINCI) += davinci-cpufreq.o obj-$(CONFIG_UX500_SOC_DB8500) += dbx500-cpufreq.o obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ) += exynos5440-cpufreq.o diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c new file mode 100644 index 000000000000..4415fa051283 --- /dev/null +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -0,0 +1,736 @@ +/* + * CPU frequency scaling for Broadcom SoCs with AVS firmware that + * supports DVS or DVFS + * + * Copyright (c) 2016 Broadcom + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * "AVS" is the name of a firmware developed at Broadcom. It derives + * its name from the technique called "Adaptive Voltage Scaling". + * Adaptive voltage scaling was the original purpose of this firmware. + * The AVS firmware still supports "AVS mode", where all it does is + * adaptive voltage scaling. However, on some newer Broadcom SoCs, the + * AVS Firmware, despite its unchanged name, also supports DFS mode and + * DVFS mode. + * + * In the context of this document and the related driver, "AVS" by + * itself always means the Broadcom firmware and never refers to the + * technique called "Adaptive Voltage Scaling". + * + * The Broadcom STB AVS CPUfreq driver provides voltage and frequency + * scaling on Broadcom SoCs using AVS firmware with support for DFS and + * DVFS. The AVS firmware is running on its own co-processor. The + * driver supports both uniprocessor (UP) and symmetric multiprocessor + * (SMP) systems which share clock and voltage across all CPUs. + * + * Actual voltage and frequency scaling is done solely by the AVS + * firmware. This driver does not change frequency or voltage itself. + * It provides a standard CPUfreq interface to the rest of the kernel + * and to userland. It interfaces with the AVS firmware to effect the + * requested changes and to report back the current system status in a + * way that is expected by existing tools. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Max number of arguments AVS calls take */ +#define AVS_MAX_CMD_ARGS 4 +/* + * This macro is used to generate AVS parameter register offsets. For + * x >= AVS_MAX_CMD_ARGS, it returns 0 to protect against accidental memory + * access outside of the parameter range. (Offset 0 is the first parameter.) + */ +#define AVS_PARAM_MULT(x) ((x) < AVS_MAX_CMD_ARGS ? (x) : 0) + +/* AVS Mailbox Register offsets */ +#define AVS_MBOX_COMMAND 0x00 +#define AVS_MBOX_STATUS 0x04 +#define AVS_MBOX_VOLTAGE0 0x08 +#define AVS_MBOX_TEMP0 0x0c +#define AVS_MBOX_PV0 0x10 +#define AVS_MBOX_MV0 0x14 +#define AVS_MBOX_PARAM(x) (0x18 + AVS_PARAM_MULT(x) * sizeof(u32)) +#define AVS_MBOX_REVISION 0x28 +#define AVS_MBOX_PSTATE 0x2c +#define AVS_MBOX_HEARTBEAT 0x30 +#define AVS_MBOX_MAGIC 0x34 +#define AVS_MBOX_SIGMA_HVT 0x38 +#define AVS_MBOX_SIGMA_SVT 0x3c +#define AVS_MBOX_VOLTAGE1 0x40 +#define AVS_MBOX_TEMP1 0x44 +#define AVS_MBOX_PV1 0x48 +#define AVS_MBOX_MV1 0x4c +#define AVS_MBOX_FREQUENCY 0x50 + +/* AVS Commands */ +#define AVS_CMD_AVAILABLE 0x00 +#define AVS_CMD_DISABLE 0x10 +#define AVS_CMD_ENABLE 0x11 +#define AVS_CMD_S2_ENTER 0x12 +#define AVS_CMD_S2_EXIT 0x13 +#define AVS_CMD_BBM_ENTER 0x14 +#define AVS_CMD_BBM_EXIT 0x15 +#define AVS_CMD_S3_ENTER 0x16 +#define AVS_CMD_S3_EXIT 0x17 +#define AVS_CMD_BALANCE 0x18 +/* PMAP and P-STATE commands */ +#define AVS_CMD_GET_PMAP 0x30 +#define AVS_CMD_SET_PMAP 0x31 +#define AVS_CMD_GET_PSTATE 0x40 +#define AVS_CMD_SET_PSTATE 0x41 + +/* Different modes AVS supports (for GET_PMAP/SET_PMAP) */ +#define AVS_MODE_AVS 0x0 +#define AVS_MODE_DFS 0x1 +#define AVS_MODE_DVS 0x2 +#define AVS_MODE_DVFS 0x3 + +/* + * PMAP parameter p1 + * unused:31-24, mdiv_p0:23-16, unused:15-14, pdiv:13-10 , ndiv_int:9-0 + */ +#define NDIV_INT_SHIFT 0 +#define NDIV_INT_MASK 0x3ff +#define PDIV_SHIFT 10 +#define PDIV_MASK 0xf +#define MDIV_P0_SHIFT 16 +#define MDIV_P0_MASK 0xff +/* + * PMAP parameter p2 + * mdiv_p4:31-24, mdiv_p3:23-16, mdiv_p2:15:8, mdiv_p1:7:0 + */ +#define MDIV_P1_SHIFT 0 +#define MDIV_P1_MASK 0xff +#define MDIV_P2_SHIFT 8 +#define MDIV_P2_MASK 0xff +#define MDIV_P3_SHIFT 16 +#define MDIV_P3_MASK 0xff +#define MDIV_P4_SHIFT 24 +#define MDIV_P4_MASK 0xff + +/* Different P-STATES AVS supports (for GET_PSTATE/SET_PSTATE) */ +#define AVS_PSTATE_P0 0x0 +#define AVS_PSTATE_P1 0x1 +#define AVS_PSTATE_P2 0x2 +#define AVS_PSTATE_P3 0x3 +#define AVS_PSTATE_P4 0x4 +#define AVS_PSTATE_MAX AVS_PSTATE_P4 + +/* CPU L2 Interrupt Controller Registers */ +#define AVS_CPU_L2_SET0 0x04 +#define AVS_CPU_L2_INT_MASK BIT(31) + +/* AVS Command Status Values */ +#define AVS_STATUS_CLEAR 0x00 +/* Command/notification accepted */ +#define AVS_STATUS_SUCCESS 0xf0 +/* Command/notification rejected */ +#define AVS_STATUS_FAILURE 0xff +/* Invalid command/notification (unknown) */ +#define AVS_STATUS_INVALID 0xf1 +/* Non-AVS modes are not supported */ +#define AVS_STATUS_NO_SUPP 0xf2 +/* Cannot set P-State until P-Map supplied */ +#define AVS_STATUS_NO_MAP 0xf3 +/* Cannot change P-Map after initial P-Map set */ +#define AVS_STATUS_MAP_SET 0xf4 +/* Max AVS status; higher numbers are used for debugging */ +#define AVS_STATUS_MAX 0xff + +/* Other AVS related constants */ +#define AVS_LOOP_LIMIT 10000 +#define AVS_TIMEOUT 300 /* in ms; expected completion is < 10ms */ +#define AVS_FIRMWARE_MAGIC 0xa11600d1 + +#define BRCM_AVS_CPUFREQ_PREFIX "brcmstb-avs" +#define BRCM_AVS_CPUFREQ_NAME BRCM_AVS_CPUFREQ_PREFIX "-cpufreq" +#define BRCM_AVS_CPU_DATA "brcm,avs-cpu-data-mem" +#define BRCM_AVS_CPU_INTR "brcm,avs-cpu-l2-intr" +#define BRCM_AVS_HOST_INTR "sw_intr" + +struct pmap { + unsigned int mode; + unsigned int p1; + unsigned int p2; + unsigned int state; +}; + +struct private_data { + void __iomem *base; + void __iomem *avs_intr_base; + struct device *dev; + struct completion done; + struct semaphore sem; + struct pmap pmap; +}; + +static void __iomem *__map_region(const char *name) +{ + struct device_node *np; + void __iomem *ptr; + + np = of_find_compatible_node(NULL, NULL, name); + if (!np) + return NULL; + + ptr = of_iomap(np, 0); + of_node_put(np); + + return ptr; +} + +static int __issue_avs_command(struct private_data *priv, int cmd, bool is_send, + u32 args[]) +{ + unsigned long time_left = msecs_to_jiffies(AVS_TIMEOUT); + void __iomem *base = priv->base; + unsigned int i; + int ret; + u32 val; + + ret = down_interruptible(&priv->sem); + if (ret) + return ret; + + /* + * Make sure no other command is currently running: cmd is 0 if AVS + * co-processor is idle. Due to the guard above, we should almost never + * have to wait here. + */ + for (i = 0, val = 1; val != 0 && i < AVS_LOOP_LIMIT; i++) + val = readl(base + AVS_MBOX_COMMAND); + + /* Give the caller a chance to retry if AVS is busy. */ + if (i == AVS_LOOP_LIMIT) { + ret = -EAGAIN; + goto out; + } + + /* Clear status before we begin. */ + writel(AVS_STATUS_CLEAR, base + AVS_MBOX_STATUS); + + /* We need to send arguments for this command. */ + if (args && is_send) { + for (i = 0; i < AVS_MAX_CMD_ARGS; i++) + writel(args[i], base + AVS_MBOX_PARAM(i)); + } + + /* Protect from spurious interrupts. */ + reinit_completion(&priv->done); + + /* Now issue the command & tell firmware to wake up to process it. */ + writel(cmd, base + AVS_MBOX_COMMAND); + writel(AVS_CPU_L2_INT_MASK, priv->avs_intr_base + AVS_CPU_L2_SET0); + + /* Wait for AVS co-processor to finish processing the command. */ + time_left = wait_for_completion_timeout(&priv->done, time_left); + + /* + * If the AVS status is not in the expected range, it means AVS didn't + * complete our command in time, and we return an error. Also, if there + * is no "time left", we timed out waiting for the interrupt. + */ + val = readl(base + AVS_MBOX_STATUS); + if (time_left == 0 || val == 0 || val > AVS_STATUS_MAX) { + dev_err(priv->dev, "AVS command %#x didn't complete in time\n", + cmd); + dev_err(priv->dev, " Time left: %u ms, AVS status: %#x\n", + jiffies_to_msecs(time_left), val); + ret = -ETIMEDOUT; + goto out; + } + + /* This command returned arguments, so we read them back. */ + if (args && !is_send) { + for (i = 0; i < AVS_MAX_CMD_ARGS; i++) + args[i] = readl(base + AVS_MBOX_PARAM(i)); + } + + /* Clear status to tell AVS co-processor we are done. */ + writel(AVS_STATUS_CLEAR, base + AVS_MBOX_STATUS); + + /* Convert firmware errors to errno's as much as possible. */ + switch (val) { + case AVS_STATUS_INVALID: + ret = -EINVAL; + break; + case AVS_STATUS_NO_SUPP: + ret = -ENOTSUPP; + break; + case AVS_STATUS_NO_MAP: + ret = -ENOENT; + break; + case AVS_STATUS_MAP_SET: + ret = -EEXIST; + break; + case AVS_STATUS_FAILURE: + ret = -EIO; + break; + } + +out: + up(&priv->sem); + + return ret; +} + +static irqreturn_t irq_handler(int irq, void *data) +{ + struct private_data *priv = data; + + /* AVS command completed execution. Wake up __issue_avs_command(). */ + complete(&priv->done); + + return IRQ_HANDLED; +} + +static char *brcm_avs_mode_to_string(unsigned int mode) +{ + switch (mode) { + case AVS_MODE_AVS: + return "AVS"; + case AVS_MODE_DFS: + return "DFS"; + case AVS_MODE_DVS: + return "DVS"; + case AVS_MODE_DVFS: + return "DVFS"; + } + return NULL; +} + +static void brcm_avs_parse_p1(u32 p1, unsigned int *mdiv_p0, unsigned int *pdiv, + unsigned int *ndiv) +{ + *mdiv_p0 = (p1 >> MDIV_P0_SHIFT) & MDIV_P0_MASK; + *pdiv = (p1 >> PDIV_SHIFT) & PDIV_MASK; + *ndiv = (p1 >> NDIV_INT_SHIFT) & NDIV_INT_MASK; +} + +static void brcm_avs_parse_p2(u32 p2, unsigned int *mdiv_p1, + unsigned int *mdiv_p2, unsigned int *mdiv_p3, + unsigned int *mdiv_p4) +{ + *mdiv_p4 = (p2 >> MDIV_P4_SHIFT) & MDIV_P4_MASK; + *mdiv_p3 = (p2 >> MDIV_P3_SHIFT) & MDIV_P3_MASK; + *mdiv_p2 = (p2 >> MDIV_P2_SHIFT) & MDIV_P2_MASK; + *mdiv_p1 = (p2 >> MDIV_P1_SHIFT) & MDIV_P1_MASK; +} + +static int brcm_avs_get_pmap(struct private_data *priv, struct pmap *pmap) +{ + u32 args[AVS_MAX_CMD_ARGS]; + int ret; + + ret = __issue_avs_command(priv, AVS_CMD_GET_PMAP, false, args); + if (ret || !pmap) + return ret; + + pmap->mode = args[0]; + pmap->p1 = args[1]; + pmap->p2 = args[2]; + pmap->state = args[3]; + + return 0; +} + +static int brcm_avs_set_pmap(struct private_data *priv, struct pmap *pmap) +{ + u32 args[AVS_MAX_CMD_ARGS]; + + args[0] = pmap->mode; + args[1] = pmap->p1; + args[2] = pmap->p2; + args[3] = pmap->state; + + return __issue_avs_command(priv, AVS_CMD_SET_PMAP, true, args); +} + +static int brcm_avs_get_pstate(struct private_data *priv, unsigned int *pstate) +{ + u32 args[AVS_MAX_CMD_ARGS]; + int ret; + + ret = __issue_avs_command(priv, AVS_CMD_GET_PSTATE, false, args); + if (ret) + return ret; + *pstate = args[0]; + + return 0; +} + +static int brcm_avs_set_pstate(struct private_data *priv, unsigned int pstate) +{ + u32 args[AVS_MAX_CMD_ARGS]; + + args[0] = pstate; + + return __issue_avs_command(priv, AVS_CMD_SET_PSTATE, true, args); +} + +static unsigned long brcm_avs_get_voltage(void __iomem *base) +{ + return readl(base + AVS_MBOX_VOLTAGE1); +} + +static unsigned long brcm_avs_get_frequency(void __iomem *base) +{ + return readl(base + AVS_MBOX_FREQUENCY) * 1000; /* in kHz */ +} + +/* + * We determine which frequencies are supported by cycling through all P-states + * and reading back what frequency we are running at for each P-state. + */ +static struct cpufreq_frequency_table * +brcm_avs_get_freq_table(struct device *dev, struct private_data *priv) +{ + struct cpufreq_frequency_table *table; + unsigned int pstate; + int i, ret; + + /* Remember P-state for later */ + ret = brcm_avs_get_pstate(priv, &pstate); + if (ret) + return ERR_PTR(ret); + + table = devm_kzalloc(dev, (AVS_PSTATE_MAX + 1) * sizeof(*table), + GFP_KERNEL); + if (!table) + return ERR_PTR(-ENOMEM); + + for (i = AVS_PSTATE_P0; i <= AVS_PSTATE_MAX; i++) { + ret = brcm_avs_set_pstate(priv, i); + if (ret) + return ERR_PTR(ret); + table[i].frequency = brcm_avs_get_frequency(priv->base); + table[i].driver_data = i; + } + table[i].frequency = CPUFREQ_TABLE_END; + + /* Restore P-state */ + ret = brcm_avs_set_pstate(priv, pstate); + if (ret) + return ERR_PTR(ret); + + return table; +} + +/* + * To ensure the right firmware is running we need to + * - check the MAGIC matches what we expect + * - brcm_avs_get_pmap() doesn't return -ENOTSUPP or -EINVAL + * We need to set up our interrupt handling before calling brcm_avs_get_pmap()! + */ +static bool brcm_avs_is_firmware_loaded(struct private_data *priv) +{ + u32 magic; + int rc; + + rc = brcm_avs_get_pmap(priv, NULL); + magic = readl(priv->base + AVS_MBOX_MAGIC); + + return (magic == AVS_FIRMWARE_MAGIC) && (rc != -ENOTSUPP) && + (rc != -EINVAL); +} + +static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct private_data *priv = policy->driver_data; + + return brcm_avs_get_frequency(priv->base); +} + +static int brcm_avs_target_index(struct cpufreq_policy *policy, + unsigned int index) +{ + return brcm_avs_set_pstate(policy->driver_data, + policy->freq_table[index].driver_data); +} + +static int brcm_avs_suspend(struct cpufreq_policy *policy) +{ + struct private_data *priv = policy->driver_data; + + return brcm_avs_get_pmap(priv, &priv->pmap); +} + +static int brcm_avs_resume(struct cpufreq_policy *policy) +{ + struct private_data *priv = policy->driver_data; + int ret; + + ret = brcm_avs_set_pmap(priv, &priv->pmap); + if (ret == -EEXIST) { + struct platform_device *pdev = cpufreq_get_driver_data(); + struct device *dev = &pdev->dev; + + dev_warn(dev, "PMAP was already set\n"); + ret = 0; + } + + return ret; +} + +/* + * All initialization code that we only want to execute once goes here. Setup + * code that can be re-tried on every core (if it failed before) can go into + * brcm_avs_cpufreq_init(). + */ +static int brcm_avs_prepare_init(struct platform_device *pdev) +{ + struct private_data *priv; + struct device *dev; + int host_irq, ret; + + dev = &pdev->dev; + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->dev = dev; + sema_init(&priv->sem, 1); + init_completion(&priv->done); + platform_set_drvdata(pdev, priv); + + priv->base = __map_region(BRCM_AVS_CPU_DATA); + if (!priv->base) { + dev_err(dev, "Couldn't find property %s in device tree.\n", + BRCM_AVS_CPU_DATA); + return -ENOENT; + } + + priv->avs_intr_base = __map_region(BRCM_AVS_CPU_INTR); + if (!priv->avs_intr_base) { + dev_err(dev, "Couldn't find property %s in device tree.\n", + BRCM_AVS_CPU_INTR); + ret = -ENOENT; + goto unmap_base; + } + + host_irq = platform_get_irq_byname(pdev, BRCM_AVS_HOST_INTR); + if (host_irq < 0) { + dev_err(dev, "Couldn't find interrupt %s -- %d\n", + BRCM_AVS_HOST_INTR, host_irq); + ret = host_irq; + goto unmap_intr_base; + } + + ret = devm_request_irq(dev, host_irq, irq_handler, IRQF_TRIGGER_RISING, + BRCM_AVS_HOST_INTR, priv); + if (ret) { + dev_err(dev, "IRQ request failed: %s (%d) -- %d\n", + BRCM_AVS_HOST_INTR, host_irq, ret); + goto unmap_intr_base; + } + + if (brcm_avs_is_firmware_loaded(priv)) + return 0; + + dev_err(dev, "AVS firmware is not loaded or doesn't support DVFS\n"); + ret = -ENODEV; + +unmap_intr_base: + iounmap(priv->avs_intr_base); +unmap_base: + iounmap(priv->base); + platform_set_drvdata(pdev, NULL); + + return ret; +} + +static int brcm_avs_cpufreq_init(struct cpufreq_policy *policy) +{ + struct cpufreq_frequency_table *freq_table; + struct platform_device *pdev; + struct private_data *priv; + struct device *dev; + int ret; + + pdev = cpufreq_get_driver_data(); + priv = platform_get_drvdata(pdev); + policy->driver_data = priv; + dev = &pdev->dev; + + freq_table = brcm_avs_get_freq_table(dev, priv); + if (IS_ERR(freq_table)) { + ret = PTR_ERR(freq_table); + dev_err(dev, "Couldn't determine frequency table (%d).\n", ret); + return ret; + } + + ret = cpufreq_table_validate_and_show(policy, freq_table); + if (ret) { + dev_err(dev, "invalid frequency table: %d\n", ret); + return ret; + } + + /* All cores share the same clock and thus the same policy. */ + cpumask_setall(policy->cpus); + + ret = __issue_avs_command(priv, AVS_CMD_ENABLE, false, NULL); + if (!ret) { + unsigned int pstate; + + ret = brcm_avs_get_pstate(priv, &pstate); + if (!ret) { + policy->cur = freq_table[pstate].frequency; + dev_info(dev, "registered\n"); + return 0; + } + } + + dev_err(dev, "couldn't initialize driver (%d)\n", ret); + + return ret; +} + +static ssize_t show_brcm_avs_pstate(struct cpufreq_policy *policy, char *buf) +{ + struct private_data *priv = policy->driver_data; + unsigned int pstate; + + if (brcm_avs_get_pstate(priv, &pstate)) + return sprintf(buf, "\n"); + + return sprintf(buf, "%u\n", pstate); +} + +static ssize_t show_brcm_avs_mode(struct cpufreq_policy *policy, char *buf) +{ + struct private_data *priv = policy->driver_data; + struct pmap pmap; + + if (brcm_avs_get_pmap(priv, &pmap)) + return sprintf(buf, "\n"); + + return sprintf(buf, "%s %u\n", brcm_avs_mode_to_string(pmap.mode), + pmap.mode); +} + +static ssize_t show_brcm_avs_pmap(struct cpufreq_policy *policy, char *buf) +{ + unsigned int mdiv_p0, mdiv_p1, mdiv_p2, mdiv_p3, mdiv_p4; + struct private_data *priv = policy->driver_data; + unsigned int ndiv, pdiv; + struct pmap pmap; + + if (brcm_avs_get_pmap(priv, &pmap)) + return sprintf(buf, "\n"); + + brcm_avs_parse_p1(pmap.p1, &mdiv_p0, &pdiv, &ndiv); + brcm_avs_parse_p2(pmap.p2, &mdiv_p1, &mdiv_p2, &mdiv_p3, &mdiv_p4); + + return sprintf(buf, "0x%08x 0x%08x %u %u %u %u %u %u %u\n", + pmap.p1, pmap.p2, ndiv, pdiv, mdiv_p0, mdiv_p1, mdiv_p2, + mdiv_p3, mdiv_p4); +} + +static ssize_t show_brcm_avs_voltage(struct cpufreq_policy *policy, char *buf) +{ + struct private_data *priv = policy->driver_data; + + return sprintf(buf, "0x%08lx\n", brcm_avs_get_voltage(priv->base)); +} + +static ssize_t show_brcm_avs_frequency(struct cpufreq_policy *policy, char *buf) +{ + struct private_data *priv = policy->driver_data; + + return sprintf(buf, "0x%08lx\n", brcm_avs_get_frequency(priv->base)); +} + +cpufreq_freq_attr_ro(brcm_avs_pstate); +cpufreq_freq_attr_ro(brcm_avs_mode); +cpufreq_freq_attr_ro(brcm_avs_pmap); +cpufreq_freq_attr_ro(brcm_avs_voltage); +cpufreq_freq_attr_ro(brcm_avs_frequency); + +struct freq_attr *brcm_avs_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + &brcm_avs_pstate, + &brcm_avs_mode, + &brcm_avs_pmap, + &brcm_avs_voltage, + &brcm_avs_frequency, + NULL +}; + +static struct cpufreq_driver brcm_avs_driver = { + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .verify = cpufreq_generic_frequency_table_verify, + .target_index = brcm_avs_target_index, + .get = brcm_avs_cpufreq_get, + .suspend = brcm_avs_suspend, + .resume = brcm_avs_resume, + .init = brcm_avs_cpufreq_init, + .attr = brcm_avs_cpufreq_attr, + .name = BRCM_AVS_CPUFREQ_PREFIX, +}; + +static int brcm_avs_cpufreq_probe(struct platform_device *pdev) +{ + int ret; + + ret = brcm_avs_prepare_init(pdev); + if (ret) + return ret; + + brcm_avs_driver.driver_data = pdev; + + return cpufreq_register_driver(&brcm_avs_driver); +} + +static int brcm_avs_cpufreq_remove(struct platform_device *pdev) +{ + struct private_data *priv; + int ret; + + ret = cpufreq_unregister_driver(&brcm_avs_driver); + if (ret) + return ret; + + priv = platform_get_drvdata(pdev); + iounmap(priv->base); + iounmap(priv->avs_intr_base); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static const struct of_device_id brcm_avs_cpufreq_match[] = { + { .compatible = BRCM_AVS_CPU_DATA }, + { } +}; +MODULE_DEVICE_TABLE(of, brcm_avs_cpufreq_match); + +static struct platform_driver brcm_avs_cpufreq_platdrv = { + .driver = { + .name = BRCM_AVS_CPUFREQ_NAME, + .of_match_table = brcm_avs_cpufreq_match, + }, + .probe = brcm_avs_cpufreq_probe, + .remove = brcm_avs_cpufreq_remove, +}; +module_platform_driver(brcm_avs_cpufreq_platdrv); + +MODULE_AUTHOR("Markus Mayer "); +MODULE_DESCRIPTION("CPUfreq driver for Broadcom STB AVS"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 33de45c133b40a129a880c175af4c2bb39dafc88 Mon Sep 17 00:00:00 2001 From: Markus Mayer Date: Thu, 27 Oct 2016 14:05:36 -0700 Subject: cpufreq: brcmstb-avs-cpufreq: add debugfs support In order to aid debugging, we add a debugfs interface to the driver that allows direct interaction with the AVS co-processor. The debugfs interface provides a means for reading all and writing some of the mailbox registers directly from the shell prompt and enables a user to execute the communications protocol between ARM CPU and AVS CPU step-by-step. This interface should be used for debugging purposes only. Signed-off-by: Markus Mayer Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 10 ++ drivers/cpufreq/brcmstb-avs-cpufreq.c | 323 +++++++++++++++++++++++++++++++++- 2 files changed, 332 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 04f7862eacd3..920c469f3953 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -23,6 +23,16 @@ config ARM_BRCMSTB_AVS_CPUFREQ Say Y, if you have a Broadcom SoC with AVS support for DFS or DVFS. +config ARM_BRCMSTB_AVS_CPUFREQ_DEBUG + bool "Broadcom STB AVS CPUfreq driver sysfs debug capability" + depends on ARM_BRCMSTB_AVS_CPUFREQ + help + Enabling this option turns on debug support via sysfs under + /sys/kernel/debug/brcmstb-avs-cpufreq. It is possible to read all and + write some AVS mailbox registers through sysfs entries. + + If in doubt, say N. + config ARM_DT_BL_CPUFREQ tristate "Generic probing via DT for ARM big LITTLE CPUfreq driver" depends on ARM_BIG_LITTLE_CPUFREQ && OF diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 4415fa051283..b761d54bb08d 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -49,6 +49,13 @@ #include #include +#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG +#include +#include +#include +#include +#endif + /* Max number of arguments AVS calls take */ #define AVS_MAX_CMD_ARGS 4 /* @@ -175,11 +182,88 @@ struct private_data { void __iomem *base; void __iomem *avs_intr_base; struct device *dev; +#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG + struct dentry *debugfs; +#endif struct completion done; struct semaphore sem; struct pmap pmap; }; +#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG + +enum debugfs_format { + DEBUGFS_NORMAL, + DEBUGFS_FLOAT, + DEBUGFS_REV, +}; + +struct debugfs_data { + struct debugfs_entry *entry; + struct private_data *priv; +}; + +struct debugfs_entry { + char *name; + u32 offset; + fmode_t mode; + enum debugfs_format format; +}; + +#define DEBUGFS_ENTRY(name, mode, format) { \ + #name, AVS_MBOX_##name, mode, format \ +} + +/* + * These are used for debugfs only. Otherwise we use AVS_MBOX_PARAM() directly. + */ +#define AVS_MBOX_PARAM1 AVS_MBOX_PARAM(0) +#define AVS_MBOX_PARAM2 AVS_MBOX_PARAM(1) +#define AVS_MBOX_PARAM3 AVS_MBOX_PARAM(2) +#define AVS_MBOX_PARAM4 AVS_MBOX_PARAM(3) + +/* + * This table stores the name, access permissions and offset for each hardware + * register and is used to generate debugfs entries. + */ +static struct debugfs_entry debugfs_entries[] = { + DEBUGFS_ENTRY(COMMAND, S_IWUSR, DEBUGFS_NORMAL), + DEBUGFS_ENTRY(STATUS, S_IWUSR, DEBUGFS_NORMAL), + DEBUGFS_ENTRY(VOLTAGE0, 0, DEBUGFS_FLOAT), + DEBUGFS_ENTRY(TEMP0, 0, DEBUGFS_FLOAT), + DEBUGFS_ENTRY(PV0, 0, DEBUGFS_FLOAT), + DEBUGFS_ENTRY(MV0, 0, DEBUGFS_FLOAT), + DEBUGFS_ENTRY(PARAM1, S_IWUSR, DEBUGFS_NORMAL), + DEBUGFS_ENTRY(PARAM2, S_IWUSR, DEBUGFS_NORMAL), + DEBUGFS_ENTRY(PARAM3, S_IWUSR, DEBUGFS_NORMAL), + DEBUGFS_ENTRY(PARAM4, S_IWUSR, DEBUGFS_NORMAL), + DEBUGFS_ENTRY(REVISION, 0, DEBUGFS_REV), + DEBUGFS_ENTRY(PSTATE, 0, DEBUGFS_NORMAL), + DEBUGFS_ENTRY(HEARTBEAT, 0, DEBUGFS_NORMAL), + DEBUGFS_ENTRY(MAGIC, S_IWUSR, DEBUGFS_NORMAL), + DEBUGFS_ENTRY(SIGMA_HVT, 0, DEBUGFS_NORMAL), + DEBUGFS_ENTRY(SIGMA_SVT, 0, DEBUGFS_NORMAL), + DEBUGFS_ENTRY(VOLTAGE1, 0, DEBUGFS_FLOAT), + DEBUGFS_ENTRY(TEMP1, 0, DEBUGFS_FLOAT), + DEBUGFS_ENTRY(PV1, 0, DEBUGFS_FLOAT), + DEBUGFS_ENTRY(MV1, 0, DEBUGFS_FLOAT), + DEBUGFS_ENTRY(FREQUENCY, 0, DEBUGFS_NORMAL), +}; + +static int brcm_avs_target_index(struct cpufreq_policy *, unsigned int); + +static char *__strtolower(char *s) +{ + char *p; + + for (p = s; *p; p++) + *p = tolower(*p); + + return s; +} + +#endif /* CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG */ + static void __iomem *__map_region(const char *name) { struct device_node *np; @@ -432,6 +516,238 @@ brcm_avs_get_freq_table(struct device *dev, struct private_data *priv) return table; } +#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG + +#define MANT(x) (unsigned int)(abs((x)) / 1000) +#define FRAC(x) (unsigned int)(abs((x)) - abs((x)) / 1000 * 1000) + +static int brcm_avs_debug_show(struct seq_file *s, void *data) +{ + struct debugfs_data *dbgfs = s->private; + void __iomem *base; + u32 val, offset; + + if (!dbgfs) { + seq_puts(s, "No device pointer\n"); + return 0; + } + + base = dbgfs->priv->base; + offset = dbgfs->entry->offset; + val = readl(base + offset); + switch (dbgfs->entry->format) { + case DEBUGFS_NORMAL: + seq_printf(s, "%u\n", val); + break; + case DEBUGFS_FLOAT: + seq_printf(s, "%d.%03d\n", MANT(val), FRAC(val)); + break; + case DEBUGFS_REV: + seq_printf(s, "%c.%c.%c.%c\n", (val >> 24 & 0xff), + (val >> 16 & 0xff), (val >> 8 & 0xff), + val & 0xff); + break; + } + seq_printf(s, "0x%08x\n", val); + + return 0; +} + +#undef MANT +#undef FRAC + +static ssize_t brcm_avs_seq_write(struct file *file, const char __user *buf, + size_t size, loff_t *ppos) +{ + struct seq_file *s = file->private_data; + struct debugfs_data *dbgfs = s->private; + struct private_data *priv = dbgfs->priv; + void __iomem *base, *avs_intr_base; + bool use_issue_command = false; + unsigned long val, offset; + char str[128]; + int ret; + char *str_ptr = str; + + if (size >= sizeof(str)) + return -E2BIG; + + memset(str, 0, sizeof(str)); + ret = copy_from_user(str, buf, size); + if (ret) + return ret; + + base = priv->base; + avs_intr_base = priv->avs_intr_base; + offset = dbgfs->entry->offset; + /* + * Special case writing to "command" entry only: if the string starts + * with a 'c', we use the driver's __issue_avs_command() function. + * Otherwise, we perform a raw write. This should allow testing of raw + * access as well as using the higher level function. (Raw access + * doesn't clear the firmware return status after issuing the command.) + */ + if (str_ptr[0] == 'c' && offset == AVS_MBOX_COMMAND) { + use_issue_command = true; + str_ptr++; + } + if (kstrtoul(str_ptr, 0, &val) != 0) + return -EINVAL; + + /* + * Setting the P-state is a special case. We need to update the CPU + * frequency we report. + */ + if (val == AVS_CMD_SET_PSTATE) { + struct cpufreq_policy *policy; + unsigned int pstate; + + policy = cpufreq_cpu_get(smp_processor_id()); + /* Read back the P-state we are about to set */ + pstate = readl(base + AVS_MBOX_PARAM(0)); + if (use_issue_command) { + ret = brcm_avs_target_index(policy, pstate); + return ret ? ret : size; + } + policy->cur = policy->freq_table[pstate].frequency; + } + + if (use_issue_command) { + ret = __issue_avs_command(priv, val, false, NULL); + } else { + /* Locking here is not perfect, but is only for debug. */ + ret = down_interruptible(&priv->sem); + if (ret) + return ret; + + writel(val, base + offset); + /* We have to wake up the firmware to process a command. */ + if (offset == AVS_MBOX_COMMAND) + writel(AVS_CPU_L2_INT_MASK, + avs_intr_base + AVS_CPU_L2_SET0); + up(&priv->sem); + } + + return ret ? ret : size; +} + +static struct debugfs_entry *__find_debugfs_entry(const char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(debugfs_entries); i++) + if (strcasecmp(debugfs_entries[i].name, name) == 0) + return &debugfs_entries[i]; + + return NULL; +} + +static int brcm_avs_debug_open(struct inode *inode, struct file *file) +{ + struct debugfs_data *data; + fmode_t fmode; + int ret; + + /* + * seq_open(), which is called by single_open(), clears "write" access. + * We need write access to some files, so we preserve our access mode + * and restore it. + */ + fmode = file->f_mode; + /* + * Check access permissions even for root. We don't want to be writing + * to read-only registers. Access for regular users has already been + * checked by the VFS layer. + */ + if ((fmode & FMODE_WRITER) && !(inode->i_mode & S_IWUSR)) + return -EACCES; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + /* + * We use the same file system operations for all our debug files. To + * produce specific output, we look up the file name upon opening a + * debugfs entry and map it to a memory offset. This offset is then used + * in the generic "show" function to read a specific register. + */ + data->entry = __find_debugfs_entry(file->f_path.dentry->d_iname); + data->priv = inode->i_private; + + ret = single_open(file, brcm_avs_debug_show, data); + if (ret) + kfree(data); + file->f_mode = fmode; + + return ret; +} + +static int brcm_avs_debug_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq_priv = file->private_data; + struct debugfs_data *data = seq_priv->private; + + kfree(data); + return single_release(inode, file); +} + +static const struct file_operations brcm_avs_debug_ops = { + .open = brcm_avs_debug_open, + .read = seq_read, + .write = brcm_avs_seq_write, + .llseek = seq_lseek, + .release = brcm_avs_debug_release, +}; + +static void brcm_avs_cpufreq_debug_init(struct platform_device *pdev) +{ + struct private_data *priv = platform_get_drvdata(pdev); + struct dentry *dir; + int i; + + if (!priv) + return; + + dir = debugfs_create_dir(BRCM_AVS_CPUFREQ_NAME, NULL); + if (IS_ERR_OR_NULL(dir)) + return; + priv->debugfs = dir; + + for (i = 0; i < ARRAY_SIZE(debugfs_entries); i++) { + /* + * The DEBUGFS_ENTRY macro generates uppercase strings. We + * convert them to lowercase before creating the debugfs + * entries. + */ + char *entry = __strtolower(debugfs_entries[i].name); + fmode_t mode = debugfs_entries[i].mode; + + if (!debugfs_create_file(entry, S_IFREG | S_IRUGO | mode, + dir, priv, &brcm_avs_debug_ops)) { + priv->debugfs = NULL; + debugfs_remove_recursive(dir); + break; + } + } +} + +static void brcm_avs_cpufreq_debug_exit(struct platform_device *pdev) +{ + struct private_data *priv = platform_get_drvdata(pdev); + + if (priv && priv->debugfs) { + debugfs_remove_recursive(priv->debugfs); + priv->debugfs = NULL; + } +} + +#else + +static void brcm_avs_cpufreq_debug_init(struct platform_device *pdev) {} +static void brcm_avs_cpufreq_debug_exit(struct platform_device *pdev) {} + +#endif /* CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG */ + /* * To ensure the right firmware is running we need to * - check the MAGIC matches what we expect @@ -694,8 +1010,11 @@ static int brcm_avs_cpufreq_probe(struct platform_device *pdev) return ret; brcm_avs_driver.driver_data = pdev; + ret = cpufreq_register_driver(&brcm_avs_driver); + if (!ret) + brcm_avs_cpufreq_debug_init(pdev); - return cpufreq_register_driver(&brcm_avs_driver); + return ret; } static int brcm_avs_cpufreq_remove(struct platform_device *pdev) @@ -707,6 +1026,8 @@ static int brcm_avs_cpufreq_remove(struct platform_device *pdev) if (ret) return ret; + brcm_avs_cpufreq_debug_exit(pdev); + priv = platform_get_drvdata(pdev); iounmap(priv->base); iounmap(priv->avs_intr_base); -- cgit v1.2.3 From a410c03d668122923ab9b17c0c5728b520edddb4 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 28 Oct 2016 10:44:52 -0700 Subject: cpufreq: intel_pstate: protect limits variable The limits variable gets modified from intel_pstate sysfs and also gets modified from cpufreq sysfs. So protect with a mutex to keep data integrity, when they are getting modified from multiple threads. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 8e7390bb2175..0837175395b9 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -373,6 +373,8 @@ static struct perf_limits *limits = &performance_limits; static struct perf_limits *limits = &powersave_limits; #endif +static DEFINE_MUTEX(intel_pstate_limits_lock); + #ifdef CONFIG_ACPI static bool intel_pstate_get_ppc_enable_status(void) @@ -730,14 +732,19 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, if (ret != 1) return -EINVAL; + mutex_lock(&intel_pstate_limits_lock); + update_turbo_state(); if (limits->turbo_disabled) { pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); + mutex_unlock(&intel_pstate_limits_lock); return -EPERM; } limits->no_turbo = clamp_t(int, input, 0, 1); + mutex_unlock(&intel_pstate_limits_lock); + if (hwp_active) intel_pstate_hwp_set_online_cpus(); @@ -754,6 +761,8 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, if (ret != 1) return -EINVAL; + mutex_lock(&intel_pstate_limits_lock); + limits->max_sysfs_pct = clamp_t(int, input, 0 , 100); limits->max_perf_pct = min(limits->max_policy_pct, limits->max_sysfs_pct); @@ -763,6 +772,8 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, limits->max_perf_pct); limits->max_perf = div_fp(limits->max_perf_pct, 100); + mutex_unlock(&intel_pstate_limits_lock); + if (hwp_active) intel_pstate_hwp_set_online_cpus(); return count; @@ -778,6 +789,8 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, if (ret != 1) return -EINVAL; + mutex_lock(&intel_pstate_limits_lock); + limits->min_sysfs_pct = clamp_t(int, input, 0 , 100); limits->min_perf_pct = max(limits->min_policy_pct, limits->min_sysfs_pct); @@ -787,6 +800,8 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, limits->min_perf_pct); limits->min_perf = div_fp(limits->min_perf_pct, 100); + mutex_unlock(&intel_pstate_limits_lock); + if (hwp_active) intel_pstate_hwp_set_online_cpus(); return count; @@ -1528,6 +1543,7 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu) static void intel_pstate_set_performance_limits(struct perf_limits *limits) { + mutex_lock(&intel_pstate_limits_lock); limits->no_turbo = 0; limits->turbo_disabled = 0; limits->max_perf_pct = 100; @@ -1538,11 +1554,15 @@ static void intel_pstate_set_performance_limits(struct perf_limits *limits) limits->max_sysfs_pct = 100; limits->min_policy_pct = 0; limits->min_sysfs_pct = 0; + mutex_unlock(&intel_pstate_limits_lock); } static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, struct perf_limits *limits) { + + mutex_lock(&intel_pstate_limits_lock); + limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, policy->cpuinfo.max_freq); limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100); @@ -1572,6 +1592,8 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, limits->max_perf = div_fp(limits->max_perf_pct, 100); limits->max_perf = round_up(limits->max_perf, FRAC_BITS); + mutex_unlock(&intel_pstate_limits_lock); + pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu, limits->max_perf_pct, limits->min_perf_pct); } -- cgit v1.2.3 From 8812872960824681147fad051e6e1406fdfa07f9 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 27 Oct 2016 13:23:54 +0200 Subject: net: smsc911x: Synchronize the runtime PM status during system suspend The smsc911c driver puts its device into low power state when entering system suspend. Although it doesn't update the device's runtime PM status to RPM_SUSPENDED, which causes problems for a parent device. In particular, when the runtime PM status of the parent is requested to be updated to RPM_SUSPENDED, the runtime PM core prevent this, because it's forbidden to runtime suspend a device, which has an active child. Fix this by updating the runtime PM status of the smsc911x device to RPM_SUSPENDED during system suspend. In system resume, let's reverse that action by runtime resuming the device and thus also the parent. Signed-off-by: Ulf Hansson Tested-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- drivers/net/ethernet/smsc/smsc911x.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index e9b8579e6241..65fca9ca26ba 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2584,6 +2584,9 @@ static int smsc911x_suspend(struct device *dev) PMT_CTRL_PM_MODE_D1_ | PMT_CTRL_WOL_EN_ | PMT_CTRL_ED_EN_ | PMT_CTRL_PME_EN_); + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + return 0; } @@ -2593,6 +2596,9 @@ static int smsc911x_resume(struct device *dev) struct smsc911x_data *pdata = netdev_priv(ndev); unsigned int to = 100; + pm_runtime_enable(dev); + pm_runtime_resume(dev); + /* Note 3.11 from the datasheet: * "When the LAN9220 is in a power saving state, a write of any * data to the BYTE_TEST register will wake-up the device." -- cgit v1.2.3 From a8636c89648ab12e59d8f3aa667ec76fc96fd643 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 17 Oct 2016 20:17:01 +0200 Subject: PM / Runtime: Don't allow to suspend a device with an active child When resuming a device in __pm_runtime_set_status(), the prerequisite is that its parent must already be active, else an error code is returned and the device's status remains suspended. When suspending a device there is no similar constraints being validated. Let's change this to make the behaviour consistent, by not allowing to suspend a device with an active child, unless it has been explicitly set to ignore its children. Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 117db71e84cb..60ebb04d8140 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1028,7 +1028,17 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status) goto out_set; if (status == RPM_SUSPENDED) { - /* It always is possible to set the status to 'suspended'. */ + /* + * It is invalid to suspend a device with an active child, + * unless it has been set to ignore its children. + */ + if (!dev->power.ignore_children && + atomic_read(&dev->power.child_count)) { + dev_err(dev, "runtime PM trying to suspend device but active child\n"); + error = -EBUSY; + goto out; + } + if (parent) { atomic_add_unless(&parent->power.child_count, -1, 0); notify_parent = !parent->power.ignore_children; -- cgit v1.2.3 From e7d040b8a2ea36a40f0d4176fe558d4e813fb715 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 10 Nov 2016 15:19:05 +0000 Subject: cpufreq: brcmstb-avs-cpufreq: make symbol brcm_avs_cpufreq_attr static Fixes the following sparse warning: drivers/cpufreq/brcmstb-avs-cpufreq.c:982:18: warning: symbol 'brcm_avs_cpufreq_attr' was not declared. Should it be static? Signed-off-by: Wei Yongjun Acked-by: Markus Mayer Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/brcmstb-avs-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index b761d54bb08d..4fda623e55bb 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -979,7 +979,7 @@ cpufreq_freq_attr_ro(brcm_avs_pmap); cpufreq_freq_attr_ro(brcm_avs_voltage); cpufreq_freq_attr_ro(brcm_avs_frequency); -struct freq_attr *brcm_avs_cpufreq_attr[] = { +static struct freq_attr *brcm_avs_cpufreq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, &brcm_avs_pstate, &brcm_avs_mode, -- cgit v1.2.3 From 44cae7d5a1b4a4e4db24eae478a9cfba0ea2b9d2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 10 Nov 2016 15:52:15 +0300 Subject: PM / Domains: Fix a warning message The first argument of WARN() is the condition, followed by the message. Signed-off-by: Dan Carpenter Acked-by: Pavel Machek Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 661737c2bae0..7b4d41ff9506 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1297,7 +1297,7 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd, * powered on/off in that context. */ if (!genpd_is_irq_safe(genpd) && genpd_is_irq_safe(subdomain)) { - WARN("Parent %s of subdomain %s must be IRQ safe\n", + WARN(1, "Parent %s of subdomain %s must be IRQ safe\n", genpd->name, subdomain->name); return -EINVAL; } -- cgit v1.2.3 From 60c9efb8f783194200bdb510df8d9d4fb398016e Mon Sep 17 00:00:00 2001 From: Akshay Adiga Date: Tue, 8 Nov 2016 19:03:27 +0530 Subject: cpufreq: powernv: Adding fast_switch for schedutil Adding fast_switch which does light weight operation to set the desired pstate. Both global and local pstates are set to the same desired pstate. Signed-off-by: Akshay Adiga Reviewed-by: Gautham R. Shenoy Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index d3ffde806629..4a4380d728ad 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -752,9 +752,12 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy) spin_lock_init(&gpstates->gpstate_lock); ret = cpufreq_table_validate_and_show(policy, powernv_freqs); - if (ret < 0) + if (ret < 0) { kfree(policy->driver_data); + return ret; + } + policy->fast_switch_possible = true; return ret; } @@ -897,6 +900,20 @@ static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy) del_timer_sync(&gpstates->timer); } +static unsigned int powernv_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + int index; + struct powernv_smp_call_data freq_data; + + index = cpufreq_table_find_index_dl(policy, target_freq); + freq_data.pstate_id = powernv_freqs[index].driver_data; + freq_data.gpstate_id = powernv_freqs[index].driver_data; + set_pstate(&freq_data); + + return powernv_freqs[index].frequency; +} + static struct cpufreq_driver powernv_cpufreq_driver = { .name = "powernv-cpufreq", .flags = CPUFREQ_CONST_LOOPS, @@ -904,6 +921,7 @@ static struct cpufreq_driver powernv_cpufreq_driver = { .exit = powernv_cpufreq_cpu_exit, .verify = cpufreq_generic_frequency_table_verify, .target_index = powernv_cpufreq_target_index, + .fast_switch = powernv_fast_switch, .get = powernv_cpufreq_get, .stop_cpu = powernv_cpufreq_stop_cpu, .attr = powernv_cpu_freq_attr, -- cgit v1.2.3 From 20b15b7663549b34abcb4a18f16fedb239406c41 Mon Sep 17 00:00:00 2001 From: Akshay Adiga Date: Tue, 8 Nov 2016 19:03:28 +0530 Subject: cpufreq: powernv: Use PMCR to verify global and local pstate As fast_switch() may get called with interrupt disable mode, we cannot hold a mutex to update the global_pstate_info. So currently, fast_switch() does not update the global_pstate_info and it will end up with stale data whenever pstate is updated through fast_switch(). As the gpstate_timer can fire after fast_switch() has updated the pstates, the timer handler cannot rely on the cached values of local and global pstate and needs to read it from the PMCR. Only gpstate_timer_handler() is affected by the stale cached pstate data beacause either fast_switch() or target_index() routines will be called for a given govenor, but gpstate_timer can fire after the governor has changed to schedutil. Signed-off-by: Akshay Adiga Reviewed-by: Gautham R. Shenoy Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 4a4380d728ad..c82304b7a4c0 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -42,6 +42,10 @@ #define PMSR_PSAFE_ENABLE (1UL << 30) #define PMSR_SPR_EM_DISABLE (1UL << 31) #define PMSR_MAX(x) ((x >> 32) & 0xFF) +#define LPSTATE_SHIFT 48 +#define GPSTATE_SHIFT 56 +#define GET_LPSTATE(x) (((x) >> LPSTATE_SHIFT) & 0xFF) +#define GET_GPSTATE(x) (((x) >> GPSTATE_SHIFT) & 0xFF) #define MAX_RAMP_DOWN_TIME 5120 /* @@ -592,7 +596,8 @@ void gpstate_timer_handler(unsigned long data) { struct cpufreq_policy *policy = (struct cpufreq_policy *)data; struct global_pstate_info *gpstates = policy->driver_data; - int gpstate_idx; + int gpstate_idx, lpstate_idx; + unsigned long val; unsigned int time_diff = jiffies_to_msecs(jiffies) - gpstates->last_sampled_time; struct powernv_smp_call_data freq_data; @@ -600,21 +605,36 @@ void gpstate_timer_handler(unsigned long data) if (!spin_trylock(&gpstates->gpstate_lock)) return; + /* + * If PMCR was last updated was using fast_swtich then + * We may have wrong in gpstate->last_lpstate_idx + * value. Hence, read from PMCR to get correct data. + */ + val = get_pmspr(SPRN_PMCR); + freq_data.gpstate_id = (s8)GET_GPSTATE(val); + freq_data.pstate_id = (s8)GET_LPSTATE(val); + if (freq_data.gpstate_id == freq_data.pstate_id) { + reset_gpstates(policy); + spin_unlock(&gpstates->gpstate_lock); + return; + } + gpstates->last_sampled_time += time_diff; gpstates->elapsed_time += time_diff; - freq_data.pstate_id = idx_to_pstate(gpstates->last_lpstate_idx); - if ((gpstates->last_gpstate_idx == gpstates->last_lpstate_idx) || - (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME)) { + if (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME) { gpstate_idx = pstate_to_idx(freq_data.pstate_id); reset_gpstates(policy); gpstates->highest_lpstate_idx = gpstate_idx; } else { + lpstate_idx = pstate_to_idx(freq_data.pstate_id); gpstate_idx = calc_global_pstate(gpstates->elapsed_time, gpstates->highest_lpstate_idx, - gpstates->last_lpstate_idx); + lpstate_idx); } - + freq_data.gpstate_id = idx_to_pstate(gpstate_idx); + gpstates->last_gpstate_idx = gpstate_idx; + gpstates->last_lpstate_idx = lpstate_idx; /* * If local pstate is equal to global pstate, rampdown is over * So timer is not required to be queued. @@ -622,10 +642,6 @@ void gpstate_timer_handler(unsigned long data) if (gpstate_idx != gpstates->last_lpstate_idx) queue_gpstate_timer(gpstates); - freq_data.gpstate_id = idx_to_pstate(gpstate_idx); - gpstates->last_gpstate_idx = pstate_to_idx(freq_data.gpstate_id); - gpstates->last_lpstate_idx = pstate_to_idx(freq_data.pstate_id); - spin_unlock(&gpstates->gpstate_lock); /* Timer may get migrated to a different cpu on cpu hot unplug */ -- cgit v1.2.3 From 26f0dbc9ab158afe86bac5ece2fcaf873d6bd8ad Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 8 Nov 2016 11:06:33 +0530 Subject: cpufreq: governor: Don't use 'timer' keyword The earlier implementation of governors used background timers and so functions, mutex, etc had 'timer' keyword in their names. But that's not true anymore. Replace 'timer' with 'update', as those functions, variables are based around updates to frequency. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 4 ++-- drivers/cpufreq/cpufreq_governor.c | 18 +++++++++--------- drivers/cpufreq/cpufreq_governor.h | 4 ++-- drivers/cpufreq/cpufreq_ondemand.c | 15 +++++++-------- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 13475890d792..fa5ece3915a1 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -58,7 +58,7 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, * Any frequency increase takes it to the maximum frequency. Frequency reduction * happens at minimum steps of 5% (default) of maximum frequency */ -static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) +static unsigned int cs_dbs_update(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); @@ -305,7 +305,7 @@ static void cs_start(struct cpufreq_policy *policy) static struct dbs_governor cs_governor = { .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), .kobj_type = { .default_attrs = cs_attributes }, - .gov_dbs_timer = cs_dbs_timer, + .gov_dbs_update = cs_dbs_update, .alloc = cs_alloc, .free = cs_free, .init = cs_init, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 642dd0f183a8..372947416b75 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -61,7 +61,7 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf, * entries can't be freed concurrently. */ list_for_each_entry(policy_dbs, &attr_set->policy_list, list) { - mutex_lock(&policy_dbs->timer_mutex); + mutex_lock(&policy_dbs->update_mutex); /* * On 32-bit architectures this may race with the * sample_delay_ns read in dbs_update_util_handler(), but that @@ -76,7 +76,7 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf, * taken, so it shouldn't be significant. */ gov_update_sample_delay(policy_dbs, 0); - mutex_unlock(&policy_dbs->timer_mutex); + mutex_unlock(&policy_dbs->update_mutex); } return count; @@ -236,9 +236,9 @@ static void dbs_work_handler(struct work_struct *work) * Make sure cpufreq_governor_limits() isn't evaluating load or the * ondemand governor isn't updating the sampling rate in parallel. */ - mutex_lock(&policy_dbs->timer_mutex); - gov_update_sample_delay(policy_dbs, gov->gov_dbs_timer(policy)); - mutex_unlock(&policy_dbs->timer_mutex); + mutex_lock(&policy_dbs->update_mutex); + gov_update_sample_delay(policy_dbs, gov->gov_dbs_update(policy)); + mutex_unlock(&policy_dbs->update_mutex); /* Allow the utilization update handler to queue up more work. */ atomic_set(&policy_dbs->work_count, 0); @@ -348,7 +348,7 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli return NULL; policy_dbs->policy = policy; - mutex_init(&policy_dbs->timer_mutex); + mutex_init(&policy_dbs->update_mutex); atomic_set(&policy_dbs->work_count, 0); init_irq_work(&policy_dbs->irq_work, dbs_irq_work); INIT_WORK(&policy_dbs->work, dbs_work_handler); @@ -367,7 +367,7 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs, { int j; - mutex_destroy(&policy_dbs->timer_mutex); + mutex_destroy(&policy_dbs->update_mutex); for_each_cpu(j, policy_dbs->policy->related_cpus) { struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j); @@ -547,10 +547,10 @@ void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; - mutex_lock(&policy_dbs->timer_mutex); + mutex_lock(&policy_dbs->update_mutex); cpufreq_policy_apply_limits(policy); gov_update_sample_delay(policy_dbs, 0); - mutex_unlock(&policy_dbs->timer_mutex); + mutex_unlock(&policy_dbs->update_mutex); } EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_limits); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index ef1037e9c92b..9660cc6b4b39 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -85,7 +85,7 @@ struct policy_dbs_info { * Per policy mutex that serializes load evaluation from limit-change * and work-handler. */ - struct mutex timer_mutex; + struct mutex update_mutex; u64 last_sample_time; s64 sample_delay_ns; @@ -135,7 +135,7 @@ struct dbs_governor { */ struct dbs_data *gdbs_data; - unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); + unsigned int (*gov_dbs_update)(struct cpufreq_policy *policy); struct policy_dbs_info *(*alloc)(void); void (*free)(struct policy_dbs_info *policy_dbs); int (*init)(struct dbs_data *dbs_data); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 3a1f49f5f4c6..1e2bd9851fba 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -169,7 +169,7 @@ static void od_update(struct cpufreq_policy *policy) } } -static unsigned int od_dbs_timer(struct cpufreq_policy *policy) +static unsigned int od_dbs_update(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *dbs_data = policy_dbs->dbs_data; @@ -191,7 +191,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy) od_update(policy); if (dbs_info->freq_lo) { - /* Setup timer for SUB_SAMPLE */ + /* Setup SUB_SAMPLE */ dbs_info->sample_type = OD_SUB_SAMPLE; return dbs_info->freq_hi_delay_us; } @@ -255,11 +255,11 @@ static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set, list_for_each_entry(policy_dbs, &attr_set->policy_list, list) { /* * Doing this without locking might lead to using different - * rate_mult values in od_update() and od_dbs_timer(). + * rate_mult values in od_update() and od_dbs_update(). */ - mutex_lock(&policy_dbs->timer_mutex); + mutex_lock(&policy_dbs->update_mutex); policy_dbs->rate_mult = 1; - mutex_unlock(&policy_dbs->timer_mutex); + mutex_unlock(&policy_dbs->update_mutex); } return count; @@ -374,8 +374,7 @@ static int od_init(struct dbs_data *dbs_data) dbs_data->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; /* * In nohz/micro accounting case we set the minimum frequency - * not depending on HZ, but fixed (very low). The deferred - * timer might skip some samples if idle/sleeping as needed. + * not depending on HZ, but fixed (very low). */ dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; } else { @@ -415,7 +414,7 @@ static struct od_ops od_ops = { static struct dbs_governor od_dbs_gov = { .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("ondemand"), .kobj_type = { .default_attrs = od_attributes }, - .gov_dbs_timer = od_dbs_timer, + .gov_dbs_update = od_dbs_update, .alloc = od_alloc, .free = od_free, .init = od_init, -- cgit v1.2.3 From ee7930ee27fe5240398cc302fa8eb4454725f188 Mon Sep 17 00:00:00 2001 From: Markus Mayer Date: Mon, 7 Nov 2016 10:02:23 -0800 Subject: cpufreq: stats: New sysfs attribute for clearing statistics Allow CPUfreq statistics to be cleared by writing anything to /sys/.../cpufreq/stats/reset. Signed-off-by: Markus Mayer Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/cpufreq-stats.txt | 6 ++++++ drivers/cpufreq/cpufreq_stats.c | 22 ++++++++++++++++++++++ include/linux/cpufreq.h | 4 ++++ 3 files changed, 32 insertions(+) diff --git a/Documentation/cpu-freq/cpufreq-stats.txt b/Documentation/cpu-freq/cpufreq-stats.txt index 8d9773f23550..3c355f6ad834 100644 --- a/Documentation/cpu-freq/cpufreq-stats.txt +++ b/Documentation/cpu-freq/cpufreq-stats.txt @@ -44,11 +44,17 @@ the stats driver insertion. total 0 drwxr-xr-x 2 root root 0 May 14 16:06 . drwxr-xr-x 3 root root 0 May 14 15:58 .. +--w------- 1 root root 4096 May 14 16:06 reset -r--r--r-- 1 root root 4096 May 14 16:06 time_in_state -r--r--r-- 1 root root 4096 May 14 16:06 total_trans -r--r--r-- 1 root root 4096 May 14 16:06 trans_table -------------------------------------------------------------------------------- +- reset +Write-only attribute that can be used to reset the stat counters. This can be +useful for evaluating system behaviour under different governors without the +need for a reboot. + - time_in_state This gives the amount of time spent in each of the frequencies supported by this CPU. The cat output will have "