summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 13:12:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 13:12:24 -0700
commitb018fc9800557bd14a40d69501e19c340eb2c521 (patch)
tree541109645e83725699d2b091a1c6c4816fdc6649 /drivers
parentc07b3682cd12a017f976ec63bbd4758dc4c5100e (diff)
parent7425ecd5e3e8c9d84f399a102282a23a90a19278 (diff)
downloadlwn-b018fc9800557bd14a40d69501e19c340eb2c521.tar.gz
lwn-b018fc9800557bd14a40d69501e19c340eb2c521.zip
Merge tag 'pm-4.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management updates from Rafael Wysocki: "These add a new framework for CPU idle time injection, to be used by all of the idle injection code in the kernel in the future, fix some issues and add a number of relatively small extensions in multiple places. Specifics: - Add a new framework for CPU idle time injection (Daniel Lezcano). - Add AVS support to the armada-37xx cpufreq driver (Gregory CLEMENT). - Add support for current CPU frequency reporting to the ACPI CPPC cpufreq driver (George Cherian). - Rework the cooling device registration in the imx6q/thermal driver (Bastian Stender). - Make the pcc-cpufreq driver refuse to work with dynamic scaling governors on systems with many CPUs to avoid scalability issues with it (Rafael Wysocki). - Fix the intel_pstate driver to report different maximum CPU frequencies on systems where they really are different and to ignore the turbo active ratio if hardware-managend P-states (HWP) are in use; make it use the match_string() helper (Xie Yisheng, Srinivas Pandruvada). - Fix a minor deferred probe issue in the qcom-kryo cpufreq driver (Niklas Cassel). - Add a tracepoint for the tracking of frequency limits changes (from Andriod) to the cpufreq core (Ruchi Kandoi). - Fix a circular lock dependency between CPU hotplug and sysfs locking in the cpufreq core reported by lockdep (Waiman Long). - Avoid excessive error reports on driver registration failures in the ARM cpuidle driver (Sudeep Holla). - Add a new device links flag to the driver core to make links go away automatically on supplier driver removal (Vivek Gautam). - Eliminate potential race condition between system-wide power management transitions and system shutdown (Pingfan Liu). - Add a quirk to save NVS memory on system suspend for the ASUS 1025C laptop (Willy Tarreau). - Make more systems use suspend-to-idle (instead of ACPI S3) by default (Tristian Celestin). - Get rid of stack VLA usage in the low-level hibernation code on 64-bit x86 (Kees Cook). - Fix error handling in the hibernation core and mark an expected fall-through switch in it (Chengguang Xu, Gustavo Silva). - Extend the generic power domains (genpd) framework to support attaching a device to a power domain by name (Ulf Hansson). - Fix device reference counting and user limits initialization in the devfreq core (Arvind Yadav, Matthias Kaehlcke). - Fix a few issues in the rk3399_dmc devfreq driver and improve its documentation (Enric Balletbo i Serra, Lin Huang, Nick Milner). - Drop a redundant error message from the exynos-ppmu devfreq driver (Markus Elfring)" * tag 'pm-4.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (35 commits) PM / reboot: Eliminate race between reboot and suspend PM / hibernate: Mark expected switch fall-through cpufreq: intel_pstate: Ignore turbo active ratio in HWP cpufreq: Fix a circular lock dependency problem cpu/hotplug: Add a cpus_read_trylock() function x86/power/hibernate_64: Remove VLA usage cpufreq: trace frequency limits change cpufreq: intel_pstate: Show different max frequency with turbo 3 and HWP cpufreq: pcc-cpufreq: Disable dynamic scaling on many-CPU systems cpufreq: qcom-kryo: Silently error out on EPROBE_DEFER cpufreq / CPPC: Add cpuinfo_cur_freq support for CPPC cpufreq: armada-37xx: Add AVS support dt-bindings: marvell: Add documentation for the Armada 3700 AVS binding PM / devfreq: rk3399_dmc: Fix duplicated opp table on reload. PM / devfreq: Init user limits from OPP limits, not viceversa PM / devfreq: rk3399_dmc: fix spelling mistakes. PM / devfreq: rk3399_dmc: do not print error when get supply and clk defer. dt-bindings: devfreq: rk3399_dmc: move interrupts to be optional. PM / devfreq: rk3399_dmc: remove wait for dcf irq event. dt-bindings: clock: add rk3399 DDR3 standard speed bins. ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/sleep.c30
-rw-r--r--drivers/base/core.c25
-rw-r--r--drivers/base/power/common.c17
-rw-r--r--drivers/base/power/domain.c24
-rw-r--r--drivers/cpufreq/armada-37xx-cpufreq.c163
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c52
-rw-r--r--drivers/cpufreq/cpufreq.c8
-rw-r--r--drivers/cpufreq/imx6q-cpufreq.c21
-rw-r--r--drivers/cpufreq/intel_pstate.c27
-rw-r--r--drivers/cpufreq/pcc-cpufreq.c9
-rw-r--r--drivers/cpufreq/qcom-cpufreq-kryo.c5
-rw-r--r--drivers/cpuidle/cpuidle-arm.c3
-rw-r--r--drivers/devfreq/devfreq.c16
-rw-r--r--drivers/devfreq/event/exynos-ppmu.c6
-rw-r--r--drivers/devfreq/rk3399_dmc.c102
-rw-r--r--drivers/gpu/drm/tegra/dc.c2
-rw-r--r--drivers/gpu/ipu-v3/ipu-pre.c3
-rw-r--r--drivers/gpu/ipu-v3/ipu-prg.c3
-rw-r--r--drivers/powercap/Kconfig10
-rw-r--r--drivers/powercap/Makefile1
-rw-r--r--drivers/powercap/idle_inject.c356
-rw-r--r--drivers/soc/imx/gpc.c2
-rw-r--r--drivers/thermal/imx_thermal.c28
23 files changed, 794 insertions, 119 deletions
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 5d0486f1cfcd..754d59f95500 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -338,6 +338,14 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = {
DMI_MATCH(DMI_PRODUCT_NAME, "K54HR"),
},
},
+ {
+ .callback = init_nvs_save_s3,
+ .ident = "Asus 1025C",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "1025C"),
+ },
+ },
/*
* https://bugzilla.kernel.org/show_bug.cgi?id=189431
* Lenovo G50-45 is a platform later than 2012, but needs nvs memory
@@ -718,9 +726,6 @@ static const struct acpi_device_id lps0_device_ids[] = {
#define ACPI_LPS0_ENTRY 5
#define ACPI_LPS0_EXIT 6
-#define ACPI_LPS0_SCREEN_MASK ((1 << ACPI_LPS0_SCREEN_OFF) | (1 << ACPI_LPS0_SCREEN_ON))
-#define ACPI_LPS0_PLATFORM_MASK ((1 << ACPI_LPS0_ENTRY) | (1 << ACPI_LPS0_EXIT))
-
static acpi_handle lps0_device_handle;
static guid_t lps0_dsm_guid;
static char lps0_dsm_func_mask;
@@ -924,17 +929,14 @@ static int lps0_device_attach(struct acpi_device *adev,
if (out_obj && out_obj->type == ACPI_TYPE_BUFFER) {
char bitmask = *(char *)out_obj->buffer.pointer;
- if ((bitmask & ACPI_LPS0_PLATFORM_MASK) == ACPI_LPS0_PLATFORM_MASK ||
- (bitmask & ACPI_LPS0_SCREEN_MASK) == ACPI_LPS0_SCREEN_MASK) {
- lps0_dsm_func_mask = bitmask;
- lps0_device_handle = adev->handle;
- /*
- * Use suspend-to-idle by default if the default
- * suspend mode was not set from the command line.
- */
- if (mem_sleep_default > PM_SUSPEND_MEM)
- mem_sleep_current = PM_SUSPEND_TO_IDLE;
- }
+ lps0_dsm_func_mask = bitmask;
+ lps0_device_handle = adev->handle;
+ /*
+ * Use suspend-to-idle by default if the default
+ * suspend mode was not set from the command line.
+ */
+ if (mem_sleep_default > PM_SUSPEND_MEM)
+ mem_sleep_current = PM_SUSPEND_TO_IDLE;
acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n",
bitmask);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 2ab316d85651..9d3c15d4dde8 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -178,10 +178,10 @@ void device_pm_move_to_tail(struct device *dev)
* of the link. If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be
* ignored.
*
- * If the DL_FLAG_AUTOREMOVE is set, the link will be removed automatically
- * when the consumer device driver unbinds from it. The combination of both
- * DL_FLAG_AUTOREMOVE and DL_FLAG_STATELESS set is invalid and will cause NULL
- * to be returned.
+ * If the DL_FLAG_AUTOREMOVE_CONSUMER is set, the link will be removed
+ * automatically when the consumer device driver unbinds from it.
+ * The combination of both DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_STATELESS
+ * set is invalid and will cause NULL to be returned.
*
* A side effect of the link creation is re-ordering of dpm_list and the
* devices_kset list by moving the consumer device and all devices depending
@@ -198,7 +198,8 @@ struct device_link *device_link_add(struct device *consumer,
struct device_link *link;
if (!consumer || !supplier ||
- ((flags & DL_FLAG_STATELESS) && (flags & DL_FLAG_AUTOREMOVE)))
+ ((flags & DL_FLAG_STATELESS) &&
+ (flags & DL_FLAG_AUTOREMOVE_CONSUMER)))
return NULL;
device_links_write_lock();
@@ -509,7 +510,7 @@ static void __device_links_no_driver(struct device *dev)
if (link->flags & DL_FLAG_STATELESS)
continue;
- if (link->flags & DL_FLAG_AUTOREMOVE)
+ if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER)
kref_put(&link->kref, __device_link_del);
else if (link->status != DL_STATE_SUPPLIER_UNBIND)
WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
@@ -545,8 +546,18 @@ void device_links_driver_cleanup(struct device *dev)
if (link->flags & DL_FLAG_STATELESS)
continue;
- WARN_ON(link->flags & DL_FLAG_AUTOREMOVE);
+ WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER);
WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND);
+
+ /*
+ * autoremove the links between this @dev and its consumer
+ * devices that are not active, i.e. where the link state
+ * has moved to DL_STATE_SUPPLIER_UNBIND.
+ */
+ if (link->status == DL_STATE_SUPPLIER_UNBIND &&
+ link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
+ kref_put(&link->kref, __device_link_del);
+
WRITE_ONCE(link->status, DL_STATE_DORMANT);
}
diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index df41b4780b3b..b413951c6abc 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -153,6 +153,23 @@ struct device *dev_pm_domain_attach_by_id(struct device *dev,
EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_id);
/**
+ * dev_pm_domain_attach_by_name - Associate a device with one of its PM domains.
+ * @dev: The device used to lookup the PM domain.
+ * @name: The name of the PM domain.
+ *
+ * For a detailed function description, see dev_pm_domain_attach_by_id().
+ */
+struct device *dev_pm_domain_attach_by_name(struct device *dev,
+ char *name)
+{
+ if (dev->pm_domain)
+ return ERR_PTR(-EEXIST);
+
+ return genpd_dev_pm_attach_by_name(dev, name);
+}
+EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_name);
+
+/**
* dev_pm_domain_detach - Detach a device from its PM domain.
* @dev: Device to detach.
* @power_off: Used to indicate whether we should power off the device.
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 9e8484189034..79bdca70a81a 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2374,6 +2374,30 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
}
EXPORT_SYMBOL_GPL(genpd_dev_pm_attach_by_id);
+/**
+ * genpd_dev_pm_attach_by_name - Associate a device with one of its PM domains.
+ * @dev: The device used to lookup the PM domain.
+ * @name: The name of the PM domain.
+ *
+ * Parse device's OF node to find a PM domain specifier using the
+ * power-domain-names DT property. For further description see
+ * genpd_dev_pm_attach_by_id().
+ */
+struct device *genpd_dev_pm_attach_by_name(struct device *dev, char *name)
+{
+ int index;
+
+ if (!dev->of_node)
+ return NULL;
+
+ index = of_property_match_string(dev->of_node, "power-domain-names",
+ name);
+ if (index < 0)
+ return NULL;
+
+ return genpd_dev_pm_attach_by_id(dev, index);
+}
+
static const struct of_device_id idle_state_match[] = {
{ .compatible = "domain-idle-state", },
{ }
diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
index 739da90ff3f6..75491fc841a6 100644
--- a/drivers/cpufreq/armada-37xx-cpufreq.c
+++ b/drivers/cpufreq/armada-37xx-cpufreq.c
@@ -51,6 +51,16 @@
#define ARMADA_37XX_DVFS_LOAD_2 2
#define ARMADA_37XX_DVFS_LOAD_3 3
+/* AVS register set */
+#define ARMADA_37XX_AVS_CTL0 0x0
+#define ARMADA_37XX_AVS_ENABLE BIT(30)
+#define ARMADA_37XX_AVS_HIGH_VDD_LIMIT 16
+#define ARMADA_37XX_AVS_LOW_VDD_LIMIT 22
+#define ARMADA_37XX_AVS_VDD_MASK 0x3F
+#define ARMADA_37XX_AVS_CTL2 0x8
+#define ARMADA_37XX_AVS_LOW_VDD_EN BIT(6)
+#define ARMADA_37XX_AVS_VSET(x) (0x1C + 4 * (x))
+
/*
* On Armada 37xx the Power management manages 4 level of CPU load,
* each level can be associated with a CPU clock source, a CPU
@@ -58,6 +68,17 @@
*/
#define LOAD_LEVEL_NR 4
+#define MIN_VOLT_MV 1000
+
+/* AVS value for the corresponding voltage (in mV) */
+static int avs_map[] = {
+ 747, 758, 770, 782, 793, 805, 817, 828, 840, 852, 863, 875, 887, 898,
+ 910, 922, 933, 945, 957, 968, 980, 992, 1003, 1015, 1027, 1038, 1050,
+ 1062, 1073, 1085, 1097, 1108, 1120, 1132, 1143, 1155, 1167, 1178, 1190,
+ 1202, 1213, 1225, 1237, 1248, 1260, 1272, 1283, 1295, 1307, 1318, 1330,
+ 1342
+};
+
struct armada37xx_cpufreq_state {
struct regmap *regmap;
u32 nb_l0l1;
@@ -71,6 +92,7 @@ static struct armada37xx_cpufreq_state *armada37xx_cpufreq_state;
struct armada_37xx_dvfs {
u32 cpu_freq_max;
u8 divider[LOAD_LEVEL_NR];
+ u32 avs[LOAD_LEVEL_NR];
};
static struct armada_37xx_dvfs armada_37xx_dvfs[] = {
@@ -148,6 +170,128 @@ static void __init armada37xx_cpufreq_dvfs_setup(struct regmap *base,
clk_set_parent(clk, parent);
}
+/*
+ * Find out the armada 37x supported AVS value whose voltage value is
+ * the round-up closest to the target voltage value.
+ */
+static u32 armada_37xx_avs_val_match(int target_vm)
+{
+ u32 avs;
+
+ /* Find out the round-up closest supported voltage value */
+ for (avs = 0; avs < ARRAY_SIZE(avs_map); avs++)
+ if (avs_map[avs] >= target_vm)
+ break;
+
+ /*
+ * If all supported voltages are smaller than target one,
+ * choose the largest supported voltage
+ */
+ if (avs == ARRAY_SIZE(avs_map))
+ avs = ARRAY_SIZE(avs_map) - 1;
+
+ return avs;
+}
+
+/*
+ * For Armada 37xx soc, L0(VSET0) VDD AVS value is set to SVC revision
+ * value or a default value when SVC is not supported.
+ * - L0 can be read out from the register of AVS_CTRL_0 and L0 voltage
+ * can be got from the mapping table of avs_map.
+ * - L1 voltage should be about 100mv smaller than L0 voltage
+ * - L2 & L3 voltage should be about 150mv smaller than L0 voltage.
+ * This function calculates L1 & L2 & L3 AVS values dynamically based
+ * on L0 voltage and fill all AVS values to the AVS value table.
+ */
+static void __init armada37xx_cpufreq_avs_configure(struct regmap *base,
+ struct armada_37xx_dvfs *dvfs)
+{
+ unsigned int target_vm;
+ int load_level = 0;
+ u32 l0_vdd_min;
+
+ if (base == NULL)
+ return;
+
+ /* Get L0 VDD min value */
+ regmap_read(base, ARMADA_37XX_AVS_CTL0, &l0_vdd_min);
+ l0_vdd_min = (l0_vdd_min >> ARMADA_37XX_AVS_LOW_VDD_LIMIT) &
+ ARMADA_37XX_AVS_VDD_MASK;
+ if (l0_vdd_min >= ARRAY_SIZE(avs_map)) {
+ pr_err("L0 VDD MIN %d is not correct.\n", l0_vdd_min);
+ return;
+ }
+ dvfs->avs[0] = l0_vdd_min;
+
+ if (avs_map[l0_vdd_min] <= MIN_VOLT_MV) {
+ /*
+ * If L0 voltage is smaller than 1000mv, then all VDD sets
+ * use L0 voltage;
+ */
+ u32 avs_min = armada_37xx_avs_val_match(MIN_VOLT_MV);
+
+ for (load_level = 1; load_level < LOAD_LEVEL_NR; load_level++)
+ dvfs->avs[load_level] = avs_min;
+
+ return;
+ }
+
+ /*
+ * L1 voltage is equal to L0 voltage - 100mv and it must be
+ * larger than 1000mv
+ */
+
+ target_vm = avs_map[l0_vdd_min] - 100;
+ target_vm = target_vm > MIN_VOLT_MV ? target_vm : MIN_VOLT_MV;
+ dvfs->avs[1] = armada_37xx_avs_val_match(target_vm);
+
+ /*
+ * L2 & L3 voltage is equal to L0 voltage - 150mv and it must
+ * be larger than 1000mv
+ */
+ target_vm = avs_map[l0_vdd_min] - 150;
+ target_vm = target_vm > MIN_VOLT_MV ? target_vm : MIN_VOLT_MV;
+ dvfs->avs[2] = dvfs->avs[3] = armada_37xx_avs_val_match(target_vm);
+}
+
+static void __init armada37xx_cpufreq_avs_setup(struct regmap *base,
+ struct armada_37xx_dvfs *dvfs)
+{
+ unsigned int avs_val = 0, freq;
+ int load_level = 0;
+
+ if (base == NULL)
+ return;
+
+ /* Disable AVS before the configuration */
+ regmap_update_bits(base, ARMADA_37XX_AVS_CTL0,
+ ARMADA_37XX_AVS_ENABLE, 0);
+
+
+ /* Enable low voltage mode */
+ regmap_update_bits(base, ARMADA_37XX_AVS_CTL2,
+ ARMADA_37XX_AVS_LOW_VDD_EN,
+ ARMADA_37XX_AVS_LOW_VDD_EN);
+
+
+ for (load_level = 1; load_level < LOAD_LEVEL_NR; load_level++) {
+ freq = dvfs->cpu_freq_max / dvfs->divider[load_level];
+
+ avs_val = dvfs->avs[load_level];
+ regmap_update_bits(base, ARMADA_37XX_AVS_VSET(load_level-1),
+ ARMADA_37XX_AVS_VDD_MASK << ARMADA_37XX_AVS_HIGH_VDD_LIMIT |
+ ARMADA_37XX_AVS_VDD_MASK << ARMADA_37XX_AVS_LOW_VDD_LIMIT,
+ avs_val << ARMADA_37XX_AVS_HIGH_VDD_LIMIT |
+ avs_val << ARMADA_37XX_AVS_LOW_VDD_LIMIT);
+ }
+
+ /* Enable AVS after the configuration */
+ regmap_update_bits(base, ARMADA_37XX_AVS_CTL0,
+ ARMADA_37XX_AVS_ENABLE,
+ ARMADA_37XX_AVS_ENABLE);
+
+}
+
static void armada37xx_cpufreq_disable_dvfs(struct regmap *base)
{
unsigned int reg = ARMADA_37XX_NB_DYN_MOD,
@@ -216,7 +360,7 @@ static int __init armada37xx_cpufreq_driver_init(void)
struct platform_device *pdev;
unsigned long freq;
unsigned int cur_frequency;
- struct regmap *nb_pm_base;
+ struct regmap *nb_pm_base, *avs_base;
struct device *cpu_dev;
int load_lvl, ret;
struct clk *clk;
@@ -227,6 +371,14 @@ static int __init armada37xx_cpufreq_driver_init(void)
if (IS_ERR(nb_pm_base))
return -ENODEV;
+ avs_base =
+ syscon_regmap_lookup_by_compatible("marvell,armada-3700-avs");
+
+ /* if AVS is not present don't use it but still try to setup dvfs */
+ if (IS_ERR(avs_base)) {
+ pr_info("Syscon failed for Adapting Voltage Scaling: skip it\n");
+ avs_base = NULL;
+ }
/* Before doing any configuration on the DVFS first, disable it */
armada37xx_cpufreq_disable_dvfs(nb_pm_base);
@@ -270,16 +422,21 @@ static int __init armada37xx_cpufreq_driver_init(void)
armada37xx_cpufreq_state->regmap = nb_pm_base;
+ armada37xx_cpufreq_avs_configure(avs_base, dvfs);
+ armada37xx_cpufreq_avs_setup(avs_base, dvfs);
+
armada37xx_cpufreq_dvfs_setup(nb_pm_base, clk, dvfs->divider);
clk_put(clk);
for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR;
load_lvl++) {
+ unsigned long u_volt = avs_map[dvfs->avs[load_lvl]] * 1000;
freq = cur_frequency / dvfs->divider[load_lvl];
-
- ret = dev_pm_opp_add(cpu_dev, freq, 0);
+ ret = dev_pm_opp_add(cpu_dev, freq, u_volt);
if (ret)
goto remove_opp;
+
+
}
/* Now that everything is setup, enable the DVFS at hardware level */
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index a9d3eec32795..30f302149730 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -296,10 +296,62 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
return ret;
}
+static inline u64 get_delta(u64 t1, u64 t0)
+{
+ if (t1 > t0 || t0 > ~(u32)0)
+ return t1 - t0;
+
+ return (u32)t1 - (u32)t0;
+}
+
+static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu,
+ struct cppc_perf_fb_ctrs fb_ctrs_t0,
+ struct cppc_perf_fb_ctrs fb_ctrs_t1)
+{
+ u64 delta_reference, delta_delivered;
+ u64 reference_perf, delivered_perf;
+
+ reference_perf = fb_ctrs_t0.reference_perf;
+
+ delta_reference = get_delta(fb_ctrs_t1.reference,
+ fb_ctrs_t0.reference);
+ delta_delivered = get_delta(fb_ctrs_t1.delivered,
+ fb_ctrs_t0.delivered);
+
+ /* Check to avoid divide-by zero */
+ if (delta_reference || delta_delivered)
+ delivered_perf = (reference_perf * delta_delivered) /
+ delta_reference;
+ else
+ delivered_perf = cpu->perf_ctrls.desired_perf;
+
+ return cppc_cpufreq_perf_to_khz(cpu, delivered_perf);
+}
+
+static unsigned int cppc_cpufreq_get_rate(unsigned int cpunum)
+{
+ struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
+ struct cppc_cpudata *cpu = all_cpu_data[cpunum];
+ int ret;
+
+ ret = cppc_get_perf_ctrs(cpunum, &fb_ctrs_t0);
+ if (ret)
+ return ret;
+
+ udelay(2); /* 2usec delay between sampling */
+
+ ret = cppc_get_perf_ctrs(cpunum, &fb_ctrs_t1);
+ if (ret)
+ return ret;
+
+ return cppc_get_rate_from_fbctrs(cpu, fb_ctrs_t0, fb_ctrs_t1);
+}
+
static struct cpufreq_driver cppc_cpufreq_driver = {
.flags = CPUFREQ_CONST_LOOPS,
.verify = cppc_verify_policy,
.target = cppc_cpufreq_set_target,
+ .get = cppc_cpufreq_get_rate,
.init = cppc_cpufreq_cpu_init,
.stop_cpu = cppc_cpufreq_stop_cpu,
.name = "cppc_cpufreq",
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b0dfd3222013..f53fb41efb7b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -923,7 +923,12 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
struct freq_attr *fattr = to_attr(attr);
ssize_t ret = -EINVAL;
- cpus_read_lock();
+ /*
+ * cpus_read_trylock() is used here to work around a circular lock
+ * dependency problem with respect to the cpufreq_register_driver().
+ */
+ if (!cpus_read_trylock())
+ return -EBUSY;
if (cpu_online(policy->cpu)) {
down_write(&policy->rwsem);
@@ -2236,6 +2241,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
policy->min = new_policy->min;
policy->max = new_policy->max;
+ trace_cpu_frequency_limits(policy);
policy->cached_target_freq = UINT_MAX;
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 8b3c2a79ad6c..b2ff423ad7f8 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -9,6 +9,7 @@
#include <linux/clk.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
+#include <linux/cpu_cooling.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/of.h>
@@ -50,6 +51,7 @@ static struct clk_bulk_data clks[] = {
};
static struct device *cpu_dev;
+static struct thermal_cooling_device *cdev;
static bool free_opp;
static struct cpufreq_frequency_table *freq_table;
static unsigned int max_freq;
@@ -191,6 +193,16 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index)
return 0;
}
+static void imx6q_cpufreq_ready(struct cpufreq_policy *policy)
+{
+ cdev = of_cpufreq_cooling_register(policy);
+
+ if (!cdev)
+ dev_err(cpu_dev,
+ "running cpufreq without cooling device: %ld\n",
+ PTR_ERR(cdev));
+}
+
static int imx6q_cpufreq_init(struct cpufreq_policy *policy)
{
int ret;
@@ -202,13 +214,22 @@ static int imx6q_cpufreq_init(struct cpufreq_policy *policy)
return ret;
}
+static int imx6q_cpufreq_exit(struct cpufreq_policy *policy)
+{
+ cpufreq_cooling_unregister(cdev);
+
+ return 0;
+}
+
static struct cpufreq_driver imx6q_cpufreq_driver = {
.flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK,
.verify = cpufreq_generic_frequency_table_verify,
.target_index = imx6q_set_target,
.get = cpufreq_generic_get,
.init = imx6q_cpufreq_init,
+ .exit = imx6q_cpufreq_exit,
.name = "imx6q-cpufreq",
+ .ready = imx6q_cpufreq_ready,
.attr = cpufreq_generic_attr,
.suspend = cpufreq_generic_suspend,
};
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index d4ed0022b0dd..b6a1aadaff9f 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -670,21 +670,18 @@ static ssize_t store_energy_performance_preference(
{
struct cpudata *cpu_data = all_cpu_data[policy->cpu];
char str_preference[21];
- int ret, i = 0;
+ int ret;
ret = sscanf(buf, "%20s", str_preference);
if (ret != 1)
return -EINVAL;
- while (energy_perf_strings[i] != NULL) {
- if (!strcmp(str_preference, energy_perf_strings[i])) {
- intel_pstate_set_energy_pref_index(cpu_data, i);
- return count;
- }
- ++i;
- }
+ ret = match_string(energy_perf_strings, -1, str_preference);
+ if (ret < 0)
+ return ret;
- return -EINVAL;
+ intel_pstate_set_energy_pref_index(cpu_data, ret);
+ return count;
}
static ssize_t show_energy_performance_preference(
@@ -2011,7 +2008,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy,
struct cpudata *cpu)
{
- if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
+ if (!hwp_active &&
+ cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
policy->max < policy->cpuinfo.max_freq &&
policy->max > cpu->pstate.max_freq) {
pr_debug("policy->max > max non turbo frequency\n");
@@ -2085,6 +2083,15 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
policy->cpuinfo.max_freq *= cpu->pstate.scaling;
+ if (hwp_active) {
+ unsigned int max_freq;
+
+ max_freq = global.turbo_disabled ?
+ cpu->pstate.max_freq : cpu->pstate.turbo_freq;
+ if (max_freq < policy->cpuinfo.max_freq)
+ policy->cpuinfo.max_freq = max_freq;
+ }
+
intel_pstate_init_acpi_perf_limits(policy);
policy->fast_switch_possible = true;
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index 0c56c9759672..099a849396f6 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -593,6 +593,15 @@ static int __init pcc_cpufreq_init(void)
return ret;
}
+ if (num_present_cpus() > 4) {
+ pcc_cpufreq_driver.flags |= CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING;
+ pr_err("%s: Too many CPUs, dynamic performance scaling disabled\n",
+ __func__);
+ pr_err("%s: Try to enable another scaling driver through BIOS settings\n",
+ __func__);
+ pr_err("%s: and complain to the system vendor\n", __func__);
+ }
+
ret = cpufreq_register_driver(&pcc_cpufreq_driver);
return ret;
diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c b/drivers/cpufreq/qcom-cpufreq-kryo.c
index efc9a7ae4857..a1830fa25fc5 100644
--- a/drivers/cpufreq/qcom-cpufreq-kryo.c
+++ b/drivers/cpufreq/qcom-cpufreq-kryo.c
@@ -109,8 +109,9 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
speedbin_nvmem = of_nvmem_cell_get(np, NULL);
of_node_put(np);
if (IS_ERR(speedbin_nvmem)) {
- dev_err(cpu_dev, "Could not get nvmem cell: %ld\n",
- PTR_ERR(speedbin_nvmem));
+ if (PTR_ERR(speedbin_nvmem) != -EPROBE_DEFER)
+ dev_err(cpu_dev, "Could not get nvmem cell: %ld\n",
+ PTR_ERR(speedbin_nvmem));
return PTR_ERR(speedbin_nvmem);
}
diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index e07bc7ace774..073557f433eb 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -105,7 +105,8 @@ static int __init arm_idle_init_cpu(int cpu)
ret = cpuidle_register_driver(drv);
if (ret) {
- pr_err("Failed to register cpuidle driver\n");
+ if (ret != -EBUSY)
+ pr_err("Failed to register cpuidle driver\n");
goto out_kfree_drv;
}
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 0b5b3abe054e..4c49bb1330b5 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -604,28 +604,29 @@ struct devfreq *devfreq_add_device(struct device *dev,
mutex_lock(&devfreq->lock);
}
- devfreq->min_freq = find_available_min_freq(devfreq);
- if (!devfreq->min_freq) {
+ devfreq->scaling_min_freq = find_available_min_freq(devfreq);
+ if (!devfreq->scaling_min_freq) {
mutex_unlock(&devfreq->lock);
err = -EINVAL;
goto err_dev;
}
- devfreq->scaling_min_freq = devfreq->min_freq;
+ devfreq->min_freq = devfreq->scaling_min_freq;
- devfreq->max_freq = find_available_max_freq(devfreq);
- if (!devfreq->max_freq) {
+ devfreq->scaling_max_freq = find_available_max_freq(devfreq);
+ if (!devfreq->scaling_max_freq) {
mutex_unlock(&devfreq->lock);
err = -EINVAL;
goto err_dev;
}
- devfreq->scaling_max_freq = devfreq->max_freq;
+ devfreq->max_freq = devfreq->scaling_max_freq;
dev_set_name(&devfreq->dev, "devfreq%d",
atomic_inc_return(&devfreq_no));
err = device_register(&devfreq->dev);
if (err) {
mutex_unlock(&devfreq->lock);
- goto err_dev;
+ put_device(&devfreq->dev);
+ goto err_out;
}
devfreq->trans_table =
@@ -672,6 +673,7 @@ err_init:
mutex_unlock(&devfreq_list_lock);
device_unregister(&devfreq->dev);
+ devfreq = NULL;
err_dev:
if (devfreq)
kfree(devfreq);
diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
index 3cd6a184fe7c..a9c64f0d3284 100644
--- a/drivers/devfreq/event/exynos-ppmu.c
+++ b/drivers/devfreq/event/exynos-ppmu.c
@@ -627,11 +627,9 @@ static int exynos_ppmu_probe(struct platform_device *pdev)
size = sizeof(struct devfreq_event_dev *) * info->num_events;
info->edev = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
- if (!info->edev) {
- dev_err(&pdev->dev,
- "failed to allocate memory devfreq-event devices\n");
+ if (!info->edev)
return -ENOMEM;
- }
+
edev = info->edev;
platform_set_drvdata(pdev, info);
diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c
index 5dfbfa3cc878..e795ad2b3f6b 100644
--- a/drivers/devfreq/rk3399_dmc.c
+++ b/drivers/devfreq/rk3399_dmc.c
@@ -68,15 +68,6 @@ struct rk3399_dmcfreq {
struct devfreq_event_dev *edev;
struct mutex lock;
struct dram_timing timing;
-
- /*
- * DDR Converser of Frequency (DCF) is used to implement DDR frequency
- * conversion without the participation of CPU, we will implement and
- * control it in arm trust firmware.
- */
- wait_queue_head_t wait_dcf_queue;
- int irq;
- int wait_dcf_flag;
struct regulator *vdd_center;
unsigned long rate, target_rate;
unsigned long volt, target_volt;
@@ -112,31 +103,22 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
err = regulator_set_voltage(dmcfreq->vdd_center, target_volt,
target_volt);
if (err) {
- dev_err(dev, "Cannot to set voltage %lu uV\n",
+ dev_err(dev, "Cannot set voltage %lu uV\n",
target_volt);
goto out;
}
}
- dmcfreq->wait_dcf_flag = 1;
err = clk_set_rate(dmcfreq->dmc_clk, target_rate);
if (err) {
- dev_err(dev, "Cannot to set frequency %lu (%d)\n",
- target_rate, err);
+ dev_err(dev, "Cannot set frequency %lu (%d)\n", target_rate,
+ err);
regulator_set_voltage(dmcfreq->vdd_center, dmcfreq->volt,
dmcfreq->volt);
goto out;
}
/*
- * Wait until bcf irq happen, it means freq scaling finish in
- * arm trust firmware, use 100ms as timeout time.
- */
- if (!wait_event_timeout(dmcfreq->wait_dcf_queue,
- !dmcfreq->wait_dcf_flag, HZ / 10))
- dev_warn(dev, "Timeout waiting for dcf interrupt\n");
-
- /*
* Check the dpll rate,
* There only two result we will get,
* 1. Ddr frequency scaling fail, we still get the old rate.
@@ -146,8 +128,8 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
/* If get the incorrect rate, set voltage to old value. */
if (dmcfreq->rate != target_rate) {
- dev_err(dev, "Get wrong ddr frequency, Request frequency %lu,\
- Current frequency %lu\n", target_rate, dmcfreq->rate);
+ dev_err(dev, "Got wrong frequency, Request %lu, Current %lu\n",
+ target_rate, dmcfreq->rate);
regulator_set_voltage(dmcfreq->vdd_center, dmcfreq->volt,
dmcfreq->volt);
goto out;
@@ -155,7 +137,7 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
err = regulator_set_voltage(dmcfreq->vdd_center, target_volt,
target_volt);
if (err)
- dev_err(dev, "Cannot to set vol %lu uV\n", target_volt);
+ dev_err(dev, "Cannot set voltage %lu uV\n", target_volt);
dmcfreq->rate = target_rate;
dmcfreq->volt = target_volt;
@@ -241,22 +223,6 @@ static __maybe_unused int rk3399_dmcfreq_resume(struct device *dev)
static SIMPLE_DEV_PM_OPS(rk3399_dmcfreq_pm, rk3399_dmcfreq_suspend,
rk3399_dmcfreq_resume);
-static irqreturn_t rk3399_dmc_irq(int irq, void *dev_id)
-{
- struct rk3399_dmcfreq *dmcfreq = dev_id;
- struct arm_smccc_res res;
-
- dmcfreq->wait_dcf_flag = 0;
- wake_up(&dmcfreq->wait_dcf_queue);
-
- /* Clear the DCF interrupt */
- arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0,
- ROCKCHIP_SIP_CONFIG_DRAM_CLR_IRQ,
- 0, 0, 0, 0, &res);
-
- return IRQ_HANDLED;
-}
-
static int of_get_ddr_timings(struct dram_timing *timing,
struct device_node *np)
{
@@ -330,16 +296,10 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct device_node *np = pdev->dev.of_node;
struct rk3399_dmcfreq *data;
- int ret, irq, index, size;
+ int ret, index, size;
uint32_t *timing;
struct dev_pm_opp *opp;
- irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(&pdev->dev,
- "Cannot get the dmc interrupt resource: %d\n", irq);
- return irq;
- }
data = devm_kzalloc(dev, sizeof(struct rk3399_dmcfreq), GFP_KERNEL);
if (!data)
return -ENOMEM;
@@ -348,27 +308,22 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
data->vdd_center = devm_regulator_get(dev, "center");
if (IS_ERR(data->vdd_center)) {
+ if (PTR_ERR(data->vdd_center) == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+
dev_err(dev, "Cannot get the regulator \"center\"\n");
return PTR_ERR(data->vdd_center);
}
data->dmc_clk = devm_clk_get(dev, "dmc_clk");
if (IS_ERR(data->dmc_clk)) {
+ if (PTR_ERR(data->dmc_clk) == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+
dev_err(dev, "Cannot get the clk dmc_clk\n");
return PTR_ERR(data->dmc_clk);
};
- data->irq = irq;
- ret = devm_request_irq(dev, irq, rk3399_dmc_irq, 0,
- dev_name(dev), data);
- if (ret) {
- dev_err(dev, "Failed to request dmc irq: %d\n", ret);
- return ret;
- }
-
- init_waitqueue_head(&data->wait_dcf_queue);
- data->wait_dcf_flag = 0;
-
data->edev = devfreq_event_get_edev_by_phandle(dev, 0);
if (IS_ERR(data->edev))
return -EPROBE_DEFER;
@@ -420,8 +375,10 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
data->rate = clk_get_rate(data->dmc_clk);
opp = devfreq_recommended_opp(dev, &data->rate, 0);
- if (IS_ERR(opp))
- return PTR_ERR(opp);
+ if (IS_ERR(opp)) {
+ ret = PTR_ERR(opp);
+ goto err_free_opp;
+ }
data->rate = dev_pm_opp_get_freq(opp);
data->volt = dev_pm_opp_get_voltage(opp);
@@ -433,14 +390,34 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
&rk3399_devfreq_dmc_profile,
DEVFREQ_GOV_SIMPLE_ONDEMAND,
&data->ondemand_data);
- if (IS_ERR(data->devfreq))
- return PTR_ERR(data->devfreq);
+ if (IS_ERR(data->devfreq)) {
+ ret = PTR_ERR(data->devfreq);
+ goto err_free_opp;
+ }
+
devm_devfreq_register_opp_notifier(dev, data->devfreq);
data->dev = dev;
platform_set_drvdata(pdev, data);
return 0;
+
+err_free_opp:
+ dev_pm_opp_of_remove_table(&pdev->dev);
+ return ret;
+}
+
+static int rk3399_dmcfreq_remove(struct platform_device *pdev)
+{
+ struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(&pdev->dev);
+
+ /*
+ * Before remove the opp table we need to unregister the opp notifier.
+ */
+ devm_devfreq_unregister_opp_notifier(dmcfreq->dev, dmcfreq->devfreq);
+ dev_pm_opp_of_remove_table(dmcfreq->dev);
+
+ return 0;
}
static const struct of_device_id rk3399dmc_devfreq_of_match[] = {
@@ -451,6 +428,7 @@ MODULE_DEVICE_TABLE(of, rk3399dmc_devfreq_of_match);
static struct platform_driver rk3399_dmcfreq_driver = {
.probe = rk3399_dmcfreq_probe,
+ .remove = rk3399_dmcfreq_remove,
.driver = {
.name = "rk3399-dmc-freq",
.pm = &rk3399_dmcfreq_pm,
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index c3afe7b2237e..965088afcfad 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -2312,7 +2312,7 @@ static int tegra_dc_couple(struct tegra_dc *dc)
* POWER_CONTROL registers during CRTC enabling.
*/
if (dc->soc->coupled_pm && dc->pipe == 1) {
- u32 flags = DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE;
+ u32 flags = DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_CONSUMER;
struct device_link *link;
struct device *partner;
diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c
index 0f70e8847540..2f8db9d62551 100644
--- a/drivers/gpu/ipu-v3/ipu-pre.c
+++ b/drivers/gpu/ipu-v3/ipu-pre.c
@@ -128,7 +128,8 @@ ipu_pre_lookup_by_phandle(struct device *dev, const char *name, int index)
list_for_each_entry(pre, &ipu_pre_list, list) {
if (pre_node == pre->dev->of_node) {
mutex_unlock(&ipu_pre_list_mutex);
- device_link_add(dev, pre->dev, DL_FLAG_AUTOREMOVE);
+ device_link_add(dev, pre->dev,
+ DL_FLAG_AUTOREMOVE_CONSUMER);
of_node_put(pre_node);
return pre;
}
diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c
index 83f9dd934a5d..38a3a9764e49 100644
--- a/drivers/gpu/ipu-v3/ipu-prg.c
+++ b/drivers/gpu/ipu-v3/ipu-prg.c
@@ -100,7 +100,8 @@ ipu_prg_lookup_by_phandle(struct device *dev, const char *name, int ipu_id)
list_for_each_entry(prg, &ipu_prg_list, list) {
if (prg_node == prg->dev->of_node) {
mutex_unlock(&ipu_prg_list_mutex);
- device_link_add(dev, prg->dev, DL_FLAG_AUTOREMOVE);
+ device_link_add(dev, prg->dev,
+ DL_FLAG_AUTOREMOVE_CONSUMER);
prg->id = ipu_id;
of_node_put(prg_node);
return prg;
diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index 85727ef6ce8e..6ac27e5908f5 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -29,4 +29,14 @@ config INTEL_RAPL
controller, CPU core (Power Plance 0), graphics uncore (Power Plane
1), etc.
+config IDLE_INJECT
+ bool "Idle injection framework"
+ depends on CPU_IDLE
+ default n
+ help
+ This enables support for the idle injection framework. It
+ provides a way to force idle periods on a set of specified
+ CPUs for power capping. Idle period can be injected
+ synchronously on a set of specified CPUs or alternatively
+ on a per CPU basis.
endif
diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile
index 0a21ef31372b..1b328854b36e 100644
--- a/drivers/powercap/Makefile
+++ b/drivers/powercap/Makefile
@@ -1,2 +1,3 @@
obj-$(CONFIG_POWERCAP) += powercap_sys.o
obj-$(CONFIG_INTEL_RAPL) += intel_rapl.o
+obj-$(CONFIG_IDLE_INJECT) += idle_inject.o
diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
new file mode 100644
index 000000000000..24ff2a068978
--- /dev/null
+++ b/drivers/powercap/idle_inject.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018 Linaro Limited
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * The idle injection framework provides a way to force CPUs to enter idle
+ * states for a specified fraction of time over a specified period.
+ *
+ * It relies on the smpboot kthreads feature providing common code for CPU
+ * hotplug and thread [un]parking.
+ *
+ * All of the kthreads used for idle injection are created at init time.
+ *
+ * Next, the users of the the idle injection framework provide a cpumask via
+ * its register function. The kthreads will be synchronized with respect to
+ * this cpumask.
+ *
+ * The idle + run duration is specified via separate helpers and that allows
+ * idle injection to be started.
+ *
+ * The idle injection kthreads will call play_idle() with the idle duration
+ * specified as per the above.
+ *
+ * After all of them have been woken up, a timer is set to start the next idle
+ * injection cycle.
+ *
+ * The timer interrupt handler will wake up the idle injection kthreads for
+ * all of the CPUs in the cpumask provided by the user.
+ *
+ * Idle injection is stopped synchronously and no leftover idle injection
+ * kthread activity after its completion is guaranteed.
+ *
+ * It is up to the user of this framework to provide a lock for higher-level
+ * synchronization to prevent race conditions like starting idle injection
+ * while unregistering from the framework.
+ */
+#define pr_fmt(fmt) "ii_dev: " fmt
+
+#include <linux/cpu.h>
+#include <linux/hrtimer.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smpboot.h>
+
+#include <uapi/linux/sched/types.h>
+
+/**
+ * struct idle_inject_thread - task on/off switch structure
+ * @tsk: task injecting the idle cycles
+ * @should_run: whether or not to run the task (for the smpboot kthread API)
+ */
+struct idle_inject_thread {
+ struct task_struct *tsk;
+ int should_run;
+};
+
+/**
+ * struct idle_inject_device - idle injection data
+ * @timer: idle injection period timer
+ * @idle_duration_ms: duration of CPU idle time to inject
+ * @run_duration_ms: duration of CPU run time to allow
+ * @cpumask: mask of CPUs affected by idle injection
+ */
+struct idle_inject_device {
+ struct hrtimer timer;
+ unsigned int idle_duration_ms;
+ unsigned int run_duration_ms;
+ unsigned long int cpumask[0];
+};
+
+static DEFINE_PER_CPU(struct idle_inject_thread, idle_inject_thread);
+static DEFINE_PER_CPU(struct idle_inject_device *, idle_inject_device);
+
+/**
+ * idle_inject_wakeup - Wake up idle injection threads
+ * @ii_dev: target idle injection device
+ *
+ * Every idle injection task associated with the given idle injection device
+ * and running on an online CPU will be woken up.
+ */
+static void idle_inject_wakeup(struct idle_inject_device *ii_dev)
+{
+ struct idle_inject_thread *iit;
+ unsigned int cpu;
+
+ for_each_cpu_and(cpu, to_cpumask(ii_dev->cpumask), cpu_online_mask) {
+ iit = per_cpu_ptr(&idle_inject_thread, cpu);
+ iit->should_run = 1;
+ wake_up_process(iit->tsk);
+ }
+}
+
+/**
+ * idle_inject_timer_fn - idle injection timer function
+ * @timer: idle injection hrtimer
+ *
+ * This function is called when the idle injection timer expires. It wakes up
+ * idle injection tasks associated with the timer and they, in turn, invoke
+ * play_idle() to inject a specified amount of CPU idle time.
+ *
+ * Return: HRTIMER_RESTART.
+ */
+static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
+{
+ unsigned int duration_ms;
+ struct idle_inject_device *ii_dev =
+ container_of(timer, struct idle_inject_device, timer);
+
+ duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+ duration_ms += READ_ONCE(ii_dev->idle_duration_ms);
+
+ idle_inject_wakeup(ii_dev);
+
+ hrtimer_forward_now(timer, ms_to_ktime(duration_ms));
+
+ return HRTIMER_RESTART;
+}
+
+/**
+ * idle_inject_fn - idle injection work function
+ * @cpu: the CPU owning the task
+ *
+ * This function calls play_idle() to inject a specified amount of CPU idle
+ * time.
+ */
+static void idle_inject_fn(unsigned int cpu)
+{
+ struct idle_inject_device *ii_dev;
+ struct idle_inject_thread *iit;
+
+ ii_dev = per_cpu(idle_inject_device, cpu);
+ iit = per_cpu_ptr(&idle_inject_thread, cpu);
+
+ /*
+ * Let the smpboot main loop know that the task should not run again.
+ */
+ iit->should_run = 0;
+
+ play_idle(READ_ONCE(ii_dev->idle_duration_ms));
+}
+
+/**
+ * idle_inject_set_duration - idle and run duration update helper
+ * @run_duration_ms: CPU run time to allow in milliseconds
+ * @idle_duration_ms: CPU idle time to inject in milliseconds
+ */
+void idle_inject_set_duration(struct idle_inject_device *ii_dev,
+ unsigned int run_duration_ms,
+ unsigned int idle_duration_ms)
+{
+ if (run_duration_ms && idle_duration_ms) {
+ WRITE_ONCE(ii_dev->run_duration_ms, run_duration_ms);
+ WRITE_ONCE(ii_dev->idle_duration_ms, idle_duration_ms);
+ }
+}
+
+/**
+ * idle_inject_get_duration - idle and run duration retrieval helper
+ * @run_duration_ms: memory location to store the current CPU run time
+ * @idle_duration_ms: memory location to store the current CPU idle time
+ */
+void idle_inject_get_duration(struct idle_inject_device *ii_dev,
+ unsigned int *run_duration_ms,
+ unsigned int *idle_duration_ms)
+{
+ *run_duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+ *idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms);
+}
+
+/**
+ * idle_inject_start - start idle injections
+ * @ii_dev: idle injection control device structure
+ *
+ * The function starts idle injection by first waking up all of the idle
+ * injection kthreads associated with @ii_dev to let them inject CPU idle time
+ * sets up a timer to start the next idle injection period.
+ *
+ * Return: -EINVAL if the CPU idle or CPU run time is not set or 0 on success.
+ */
+int idle_inject_start(struct idle_inject_device *ii_dev)
+{
+ unsigned int idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms);
+ unsigned int run_duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+
+ if (!idle_duration_ms || !run_duration_ms)
+ return -EINVAL;
+
+ pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n",
+ cpumask_pr_args(to_cpumask(ii_dev->cpumask)));
+
+ idle_inject_wakeup(ii_dev);
+
+ hrtimer_start(&ii_dev->timer,
+ ms_to_ktime(idle_duration_ms + run_duration_ms),
+ HRTIMER_MODE_REL);
+
+ return 0;
+}
+
+/**
+ * idle_inject_stop - stops idle injections
+ * @ii_dev: idle injection control device structure
+ *
+ * The function stops idle injection and waits for the threads to finish work.
+ * If CPU idle time is being injected when this function runs, then it will
+ * wait until the end of the cycle.
+ *
+ * When it returns, there is no more idle injection kthread activity. The
+ * kthreads are scheduled out and the periodic timer is off.
+ */
+void idle_inject_stop(struct idle_inject_device *ii_dev)
+{
+ struct idle_inject_thread *iit;
+ unsigned int cpu;
+
+ pr_debug("Stopping idle injection on CPUs '%*pbl'\n",
+ cpumask_pr_args(to_cpumask(ii_dev->cpumask)));
+
+ hrtimer_cancel(&ii_dev->timer);
+
+ /*
+ * Stopping idle injection requires all of the idle injection kthreads
+ * associated with the given cpumask to be parked and stay that way, so
+ * prevent CPUs from going online at this point. Any CPUs going online
+ * after the loop below will be covered by clearing the should_run flag
+ * that will cause the smpboot main loop to schedule them out.
+ */
+ cpu_hotplug_disable();
+
+ /*
+ * Iterate over all (online + offline) CPUs here in case one of them
+ * goes offline with the should_run flag set so as to prevent its idle
+ * injection kthread from running when the CPU goes online again after
+ * the ii_dev has been freed.
+ */
+ for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
+ iit = per_cpu_ptr(&idle_inject_thread, cpu);
+ iit->should_run = 0;
+
+ wait_task_inactive(iit->tsk, 0);
+ }
+
+ cpu_hotplug_enable();
+}
+
+/**
+ * idle_inject_setup - prepare the current task for idle injection
+ * @cpu: not used
+ *
+ * Called once, this function is in charge of setting the current task's
+ * scheduler parameters to make it an RT task.
+ */
+static void idle_inject_setup(unsigned int cpu)
+{
+ struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 };
+
+ sched_setscheduler(current, SCHED_FIFO, &param);
+}
+
+/**
+ * idle_inject_should_run - function helper for the smpboot API
+ * @cpu: CPU the kthread is running on
+ *
+ * Return: whether or not the thread can run.
+ */
+static int idle_inject_should_run(unsigned int cpu)
+{
+ struct idle_inject_thread *iit =
+ per_cpu_ptr(&idle_inject_thread, cpu);
+
+ return iit->should_run;
+}
+
+/**
+ * idle_inject_register - initialize idle injection on a set of CPUs
+ * @cpumask: CPUs to be affected by idle injection
+ *
+ * This function creates an idle injection control device structure for the
+ * given set of CPUs and initializes the timer associated with it. It does not
+ * start any injection cycles.
+ *
+ * Return: NULL if memory allocation fails, idle injection control device
+ * pointer on success.
+ */
+struct idle_inject_device *idle_inject_register(struct cpumask *cpumask)
+{
+ struct idle_inject_device *ii_dev;
+ int cpu, cpu_rb;
+
+ ii_dev = kzalloc(sizeof(*ii_dev) + cpumask_size(), GFP_KERNEL);
+ if (!ii_dev)
+ return NULL;
+
+ cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask);
+ hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ ii_dev->timer.function = idle_inject_timer_fn;
+
+ for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
+
+ if (per_cpu(idle_inject_device, cpu)) {
+ pr_err("cpu%d is already registered\n", cpu);
+ goto out_rollback;
+ }
+
+ per_cpu(idle_inject_device, cpu) = ii_dev;
+ }
+
+ return ii_dev;
+
+out_rollback:
+ for_each_cpu(cpu_rb, to_cpumask(ii_dev->cpumask)) {
+ if (cpu == cpu_rb)
+ break;
+ per_cpu(idle_inject_device, cpu_rb) = NULL;
+ }
+
+ kfree(ii_dev);
+
+ return NULL;
+}
+
+/**
+ * idle_inject_unregister - unregister idle injection control device
+ * @ii_dev: idle injection control device to unregister
+ *
+ * The function stops idle injection for the given control device,
+ * unregisters its kthreads and frees memory allocated when that device was
+ * created.
+ */
+void idle_inject_unregister(struct idle_inject_device *ii_dev)
+{
+ unsigned int cpu;
+
+ idle_inject_stop(ii_dev);
+
+ for_each_cpu(cpu, to_cpumask(ii_dev->cpumask))
+ per_cpu(idle_inject_device, cpu) = NULL;
+
+ kfree(ii_dev);
+}
+
+static struct smp_hotplug_thread idle_inject_threads = {
+ .store = &idle_inject_thread.tsk,
+ .setup = idle_inject_setup,
+ .thread_fn = idle_inject_fn,
+ .thread_comm = "idle_inject/%u",
+ .thread_should_run = idle_inject_should_run,
+};
+
+static int __init idle_inject_init(void)
+{
+ return smpboot_register_percpu_thread(&idle_inject_threads);
+}
+early_initcall(idle_inject_init);
diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c
index 0097a939487f..546960a18d60 100644
--- a/drivers/soc/imx/gpc.c
+++ b/drivers/soc/imx/gpc.c
@@ -209,7 +209,7 @@ static int imx_pgc_power_domain_probe(struct platform_device *pdev)
goto genpd_err;
}
- device_link_add(dev, dev->parent, DL_FLAG_AUTOREMOVE);
+ device_link_add(dev, dev->parent, DL_FLAG_AUTOREMOVE_CONSUMER);
return 0;
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index 334d98be03b9..cbfcca828cd7 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -3,6 +3,7 @@
// Copyright 2013 Freescale Semiconductor, Inc.
#include <linux/clk.h>
+#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/cpu_cooling.h>
#include <linux/delay.h>
@@ -644,6 +645,27 @@ static const struct of_device_id of_imx_thermal_match[] = {
};
MODULE_DEVICE_TABLE(of, of_imx_thermal_match);
+/*
+ * Create cooling device in case no #cooling-cells property is available in
+ * CPU node
+ */
+static int imx_thermal_register_legacy_cooling(struct imx_thermal_data *data)
+{
+ struct device_node *np = of_get_cpu_node(data->policy->cpu, NULL);
+ int ret;
+
+ if (!np || !of_find_property(np, "#cooling-cells", NULL)) {
+ data->cdev = cpufreq_cooling_register(data->policy);
+ if (IS_ERR(data->cdev)) {
+ ret = PTR_ERR(data->cdev);
+ cpufreq_cpu_put(data->policy);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
static int imx_thermal_probe(struct platform_device *pdev)
{
struct imx_thermal_data *data;
@@ -724,12 +746,10 @@ static int imx_thermal_probe(struct platform_device *pdev)
return -EPROBE_DEFER;
}
- data->cdev = cpufreq_cooling_register(data->policy);
- if (IS_ERR(data->cdev)) {
- ret = PTR_ERR(data->cdev);
+ ret = imx_thermal_register_legacy_cooling(data);
+ if (ret) {
dev_err(&pdev->dev,
"failed to register cpufreq cooling device: %d\n", ret);
- cpufreq_cpu_put(data->policy);
return ret;
}