diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-14 13:12:24 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-14 13:12:24 -0700 |
commit | b018fc9800557bd14a40d69501e19c340eb2c521 (patch) | |
tree | 541109645e83725699d2b091a1c6c4816fdc6649 /drivers | |
parent | c07b3682cd12a017f976ec63bbd4758dc4c5100e (diff) | |
parent | 7425ecd5e3e8c9d84f399a102282a23a90a19278 (diff) | |
download | lwn-b018fc9800557bd14a40d69501e19c340eb2c521.tar.gz lwn-b018fc9800557bd14a40d69501e19c340eb2c521.zip |
Merge tag 'pm-4.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management updates from Rafael Wysocki:
"These add a new framework for CPU idle time injection, to be used by
all of the idle injection code in the kernel in the future, fix some
issues and add a number of relatively small extensions in multiple
places.
Specifics:
- Add a new framework for CPU idle time injection (Daniel Lezcano).
- Add AVS support to the armada-37xx cpufreq driver (Gregory
CLEMENT).
- Add support for current CPU frequency reporting to the ACPI CPPC
cpufreq driver (George Cherian).
- Rework the cooling device registration in the imx6q/thermal driver
(Bastian Stender).
- Make the pcc-cpufreq driver refuse to work with dynamic scaling
governors on systems with many CPUs to avoid scalability issues
with it (Rafael Wysocki).
- Fix the intel_pstate driver to report different maximum CPU
frequencies on systems where they really are different and to
ignore the turbo active ratio if hardware-managend P-states (HWP)
are in use; make it use the match_string() helper (Xie Yisheng,
Srinivas Pandruvada).
- Fix a minor deferred probe issue in the qcom-kryo cpufreq driver
(Niklas Cassel).
- Add a tracepoint for the tracking of frequency limits changes (from
Andriod) to the cpufreq core (Ruchi Kandoi).
- Fix a circular lock dependency between CPU hotplug and sysfs
locking in the cpufreq core reported by lockdep (Waiman Long).
- Avoid excessive error reports on driver registration failures in
the ARM cpuidle driver (Sudeep Holla).
- Add a new device links flag to the driver core to make links go
away automatically on supplier driver removal (Vivek Gautam).
- Eliminate potential race condition between system-wide power
management transitions and system shutdown (Pingfan Liu).
- Add a quirk to save NVS memory on system suspend for the ASUS 1025C
laptop (Willy Tarreau).
- Make more systems use suspend-to-idle (instead of ACPI S3) by
default (Tristian Celestin).
- Get rid of stack VLA usage in the low-level hibernation code on
64-bit x86 (Kees Cook).
- Fix error handling in the hibernation core and mark an expected
fall-through switch in it (Chengguang Xu, Gustavo Silva).
- Extend the generic power domains (genpd) framework to support
attaching a device to a power domain by name (Ulf Hansson).
- Fix device reference counting and user limits initialization in the
devfreq core (Arvind Yadav, Matthias Kaehlcke).
- Fix a few issues in the rk3399_dmc devfreq driver and improve its
documentation (Enric Balletbo i Serra, Lin Huang, Nick Milner).
- Drop a redundant error message from the exynos-ppmu devfreq driver
(Markus Elfring)"
* tag 'pm-4.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (35 commits)
PM / reboot: Eliminate race between reboot and suspend
PM / hibernate: Mark expected switch fall-through
cpufreq: intel_pstate: Ignore turbo active ratio in HWP
cpufreq: Fix a circular lock dependency problem
cpu/hotplug: Add a cpus_read_trylock() function
x86/power/hibernate_64: Remove VLA usage
cpufreq: trace frequency limits change
cpufreq: intel_pstate: Show different max frequency with turbo 3 and HWP
cpufreq: pcc-cpufreq: Disable dynamic scaling on many-CPU systems
cpufreq: qcom-kryo: Silently error out on EPROBE_DEFER
cpufreq / CPPC: Add cpuinfo_cur_freq support for CPPC
cpufreq: armada-37xx: Add AVS support
dt-bindings: marvell: Add documentation for the Armada 3700 AVS binding
PM / devfreq: rk3399_dmc: Fix duplicated opp table on reload.
PM / devfreq: Init user limits from OPP limits, not viceversa
PM / devfreq: rk3399_dmc: fix spelling mistakes.
PM / devfreq: rk3399_dmc: do not print error when get supply and clk defer.
dt-bindings: devfreq: rk3399_dmc: move interrupts to be optional.
PM / devfreq: rk3399_dmc: remove wait for dcf irq event.
dt-bindings: clock: add rk3399 DDR3 standard speed bins.
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/acpi/sleep.c | 30 | ||||
-rw-r--r-- | drivers/base/core.c | 25 | ||||
-rw-r--r-- | drivers/base/power/common.c | 17 | ||||
-rw-r--r-- | drivers/base/power/domain.c | 24 | ||||
-rw-r--r-- | drivers/cpufreq/armada-37xx-cpufreq.c | 163 | ||||
-rw-r--r-- | drivers/cpufreq/cppc_cpufreq.c | 52 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 8 | ||||
-rw-r--r-- | drivers/cpufreq/imx6q-cpufreq.c | 21 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 27 | ||||
-rw-r--r-- | drivers/cpufreq/pcc-cpufreq.c | 9 | ||||
-rw-r--r-- | drivers/cpufreq/qcom-cpufreq-kryo.c | 5 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-arm.c | 3 | ||||
-rw-r--r-- | drivers/devfreq/devfreq.c | 16 | ||||
-rw-r--r-- | drivers/devfreq/event/exynos-ppmu.c | 6 | ||||
-rw-r--r-- | drivers/devfreq/rk3399_dmc.c | 102 | ||||
-rw-r--r-- | drivers/gpu/drm/tegra/dc.c | 2 | ||||
-rw-r--r-- | drivers/gpu/ipu-v3/ipu-pre.c | 3 | ||||
-rw-r--r-- | drivers/gpu/ipu-v3/ipu-prg.c | 3 | ||||
-rw-r--r-- | drivers/powercap/Kconfig | 10 | ||||
-rw-r--r-- | drivers/powercap/Makefile | 1 | ||||
-rw-r--r-- | drivers/powercap/idle_inject.c | 356 | ||||
-rw-r--r-- | drivers/soc/imx/gpc.c | 2 | ||||
-rw-r--r-- | drivers/thermal/imx_thermal.c | 28 |
23 files changed, 794 insertions, 119 deletions
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 5d0486f1cfcd..754d59f95500 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -338,6 +338,14 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "K54HR"), }, }, + { + .callback = init_nvs_save_s3, + .ident = "Asus 1025C", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "1025C"), + }, + }, /* * https://bugzilla.kernel.org/show_bug.cgi?id=189431 * Lenovo G50-45 is a platform later than 2012, but needs nvs memory @@ -718,9 +726,6 @@ static const struct acpi_device_id lps0_device_ids[] = { #define ACPI_LPS0_ENTRY 5 #define ACPI_LPS0_EXIT 6 -#define ACPI_LPS0_SCREEN_MASK ((1 << ACPI_LPS0_SCREEN_OFF) | (1 << ACPI_LPS0_SCREEN_ON)) -#define ACPI_LPS0_PLATFORM_MASK ((1 << ACPI_LPS0_ENTRY) | (1 << ACPI_LPS0_EXIT)) - static acpi_handle lps0_device_handle; static guid_t lps0_dsm_guid; static char lps0_dsm_func_mask; @@ -924,17 +929,14 @@ static int lps0_device_attach(struct acpi_device *adev, if (out_obj && out_obj->type == ACPI_TYPE_BUFFER) { char bitmask = *(char *)out_obj->buffer.pointer; - if ((bitmask & ACPI_LPS0_PLATFORM_MASK) == ACPI_LPS0_PLATFORM_MASK || - (bitmask & ACPI_LPS0_SCREEN_MASK) == ACPI_LPS0_SCREEN_MASK) { - lps0_dsm_func_mask = bitmask; - lps0_device_handle = adev->handle; - /* - * Use suspend-to-idle by default if the default - * suspend mode was not set from the command line. - */ - if (mem_sleep_default > PM_SUSPEND_MEM) - mem_sleep_current = PM_SUSPEND_TO_IDLE; - } + lps0_dsm_func_mask = bitmask; + lps0_device_handle = adev->handle; + /* + * Use suspend-to-idle by default if the default + * suspend mode was not set from the command line. + */ + if (mem_sleep_default > PM_SUSPEND_MEM) + mem_sleep_current = PM_SUSPEND_TO_IDLE; acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n", bitmask); diff --git a/drivers/base/core.c b/drivers/base/core.c index 2ab316d85651..9d3c15d4dde8 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -178,10 +178,10 @@ void device_pm_move_to_tail(struct device *dev) * of the link. If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be * ignored. * - * If the DL_FLAG_AUTOREMOVE is set, the link will be removed automatically - * when the consumer device driver unbinds from it. The combination of both - * DL_FLAG_AUTOREMOVE and DL_FLAG_STATELESS set is invalid and will cause NULL - * to be returned. + * If the DL_FLAG_AUTOREMOVE_CONSUMER is set, the link will be removed + * automatically when the consumer device driver unbinds from it. + * The combination of both DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_STATELESS + * set is invalid and will cause NULL to be returned. * * A side effect of the link creation is re-ordering of dpm_list and the * devices_kset list by moving the consumer device and all devices depending @@ -198,7 +198,8 @@ struct device_link *device_link_add(struct device *consumer, struct device_link *link; if (!consumer || !supplier || - ((flags & DL_FLAG_STATELESS) && (flags & DL_FLAG_AUTOREMOVE))) + ((flags & DL_FLAG_STATELESS) && + (flags & DL_FLAG_AUTOREMOVE_CONSUMER))) return NULL; device_links_write_lock(); @@ -509,7 +510,7 @@ static void __device_links_no_driver(struct device *dev) if (link->flags & DL_FLAG_STATELESS) continue; - if (link->flags & DL_FLAG_AUTOREMOVE) + if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) kref_put(&link->kref, __device_link_del); else if (link->status != DL_STATE_SUPPLIER_UNBIND) WRITE_ONCE(link->status, DL_STATE_AVAILABLE); @@ -545,8 +546,18 @@ void device_links_driver_cleanup(struct device *dev) if (link->flags & DL_FLAG_STATELESS) continue; - WARN_ON(link->flags & DL_FLAG_AUTOREMOVE); + WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER); WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND); + + /* + * autoremove the links between this @dev and its consumer + * devices that are not active, i.e. where the link state + * has moved to DL_STATE_SUPPLIER_UNBIND. + */ + if (link->status == DL_STATE_SUPPLIER_UNBIND && + link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER) + kref_put(&link->kref, __device_link_del); + WRITE_ONCE(link->status, DL_STATE_DORMANT); } diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index df41b4780b3b..b413951c6abc 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -153,6 +153,23 @@ struct device *dev_pm_domain_attach_by_id(struct device *dev, EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_id); /** + * dev_pm_domain_attach_by_name - Associate a device with one of its PM domains. + * @dev: The device used to lookup the PM domain. + * @name: The name of the PM domain. + * + * For a detailed function description, see dev_pm_domain_attach_by_id(). + */ +struct device *dev_pm_domain_attach_by_name(struct device *dev, + char *name) +{ + if (dev->pm_domain) + return ERR_PTR(-EEXIST); + + return genpd_dev_pm_attach_by_name(dev, name); +} +EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_name); + +/** * dev_pm_domain_detach - Detach a device from its PM domain. * @dev: Device to detach. * @power_off: Used to indicate whether we should power off the device. diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 9e8484189034..79bdca70a81a 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2374,6 +2374,30 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev, } EXPORT_SYMBOL_GPL(genpd_dev_pm_attach_by_id); +/** + * genpd_dev_pm_attach_by_name - Associate a device with one of its PM domains. + * @dev: The device used to lookup the PM domain. + * @name: The name of the PM domain. + * + * Parse device's OF node to find a PM domain specifier using the + * power-domain-names DT property. For further description see + * genpd_dev_pm_attach_by_id(). + */ +struct device *genpd_dev_pm_attach_by_name(struct device *dev, char *name) +{ + int index; + + if (!dev->of_node) + return NULL; + + index = of_property_match_string(dev->of_node, "power-domain-names", + name); + if (index < 0) + return NULL; + + return genpd_dev_pm_attach_by_id(dev, index); +} + static const struct of_device_id idle_state_match[] = { { .compatible = "domain-idle-state", }, { } diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index 739da90ff3f6..75491fc841a6 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -51,6 +51,16 @@ #define ARMADA_37XX_DVFS_LOAD_2 2 #define ARMADA_37XX_DVFS_LOAD_3 3 +/* AVS register set */ +#define ARMADA_37XX_AVS_CTL0 0x0 +#define ARMADA_37XX_AVS_ENABLE BIT(30) +#define ARMADA_37XX_AVS_HIGH_VDD_LIMIT 16 +#define ARMADA_37XX_AVS_LOW_VDD_LIMIT 22 +#define ARMADA_37XX_AVS_VDD_MASK 0x3F +#define ARMADA_37XX_AVS_CTL2 0x8 +#define ARMADA_37XX_AVS_LOW_VDD_EN BIT(6) +#define ARMADA_37XX_AVS_VSET(x) (0x1C + 4 * (x)) + /* * On Armada 37xx the Power management manages 4 level of CPU load, * each level can be associated with a CPU clock source, a CPU @@ -58,6 +68,17 @@ */ #define LOAD_LEVEL_NR 4 +#define MIN_VOLT_MV 1000 + +/* AVS value for the corresponding voltage (in mV) */ +static int avs_map[] = { + 747, 758, 770, 782, 793, 805, 817, 828, 840, 852, 863, 875, 887, 898, + 910, 922, 933, 945, 957, 968, 980, 992, 1003, 1015, 1027, 1038, 1050, + 1062, 1073, 1085, 1097, 1108, 1120, 1132, 1143, 1155, 1167, 1178, 1190, + 1202, 1213, 1225, 1237, 1248, 1260, 1272, 1283, 1295, 1307, 1318, 1330, + 1342 +}; + struct armada37xx_cpufreq_state { struct regmap *regmap; u32 nb_l0l1; @@ -71,6 +92,7 @@ static struct armada37xx_cpufreq_state *armada37xx_cpufreq_state; struct armada_37xx_dvfs { u32 cpu_freq_max; u8 divider[LOAD_LEVEL_NR]; + u32 avs[LOAD_LEVEL_NR]; }; static struct armada_37xx_dvfs armada_37xx_dvfs[] = { @@ -148,6 +170,128 @@ static void __init armada37xx_cpufreq_dvfs_setup(struct regmap *base, clk_set_parent(clk, parent); } +/* + * Find out the armada 37x supported AVS value whose voltage value is + * the round-up closest to the target voltage value. + */ +static u32 armada_37xx_avs_val_match(int target_vm) +{ + u32 avs; + + /* Find out the round-up closest supported voltage value */ + for (avs = 0; avs < ARRAY_SIZE(avs_map); avs++) + if (avs_map[avs] >= target_vm) + break; + + /* + * If all supported voltages are smaller than target one, + * choose the largest supported voltage + */ + if (avs == ARRAY_SIZE(avs_map)) + avs = ARRAY_SIZE(avs_map) - 1; + + return avs; +} + +/* + * For Armada 37xx soc, L0(VSET0) VDD AVS value is set to SVC revision + * value or a default value when SVC is not supported. + * - L0 can be read out from the register of AVS_CTRL_0 and L0 voltage + * can be got from the mapping table of avs_map. + * - L1 voltage should be about 100mv smaller than L0 voltage + * - L2 & L3 voltage should be about 150mv smaller than L0 voltage. + * This function calculates L1 & L2 & L3 AVS values dynamically based + * on L0 voltage and fill all AVS values to the AVS value table. + */ +static void __init armada37xx_cpufreq_avs_configure(struct regmap *base, + struct armada_37xx_dvfs *dvfs) +{ + unsigned int target_vm; + int load_level = 0; + u32 l0_vdd_min; + + if (base == NULL) + return; + + /* Get L0 VDD min value */ + regmap_read(base, ARMADA_37XX_AVS_CTL0, &l0_vdd_min); + l0_vdd_min = (l0_vdd_min >> ARMADA_37XX_AVS_LOW_VDD_LIMIT) & + ARMADA_37XX_AVS_VDD_MASK; + if (l0_vdd_min >= ARRAY_SIZE(avs_map)) { + pr_err("L0 VDD MIN %d is not correct.\n", l0_vdd_min); + return; + } + dvfs->avs[0] = l0_vdd_min; + + if (avs_map[l0_vdd_min] <= MIN_VOLT_MV) { + /* + * If L0 voltage is smaller than 1000mv, then all VDD sets + * use L0 voltage; + */ + u32 avs_min = armada_37xx_avs_val_match(MIN_VOLT_MV); + + for (load_level = 1; load_level < LOAD_LEVEL_NR; load_level++) + dvfs->avs[load_level] = avs_min; + + return; + } + + /* + * L1 voltage is equal to L0 voltage - 100mv and it must be + * larger than 1000mv + */ + + target_vm = avs_map[l0_vdd_min] - 100; + target_vm = target_vm > MIN_VOLT_MV ? target_vm : MIN_VOLT_MV; + dvfs->avs[1] = armada_37xx_avs_val_match(target_vm); + + /* + * L2 & L3 voltage is equal to L0 voltage - 150mv and it must + * be larger than 1000mv + */ + target_vm = avs_map[l0_vdd_min] - 150; + target_vm = target_vm > MIN_VOLT_MV ? target_vm : MIN_VOLT_MV; + dvfs->avs[2] = dvfs->avs[3] = armada_37xx_avs_val_match(target_vm); +} + +static void __init armada37xx_cpufreq_avs_setup(struct regmap *base, + struct armada_37xx_dvfs *dvfs) +{ + unsigned int avs_val = 0, freq; + int load_level = 0; + + if (base == NULL) + return; + + /* Disable AVS before the configuration */ + regmap_update_bits(base, ARMADA_37XX_AVS_CTL0, + ARMADA_37XX_AVS_ENABLE, 0); + + + /* Enable low voltage mode */ + regmap_update_bits(base, ARMADA_37XX_AVS_CTL2, + ARMADA_37XX_AVS_LOW_VDD_EN, + ARMADA_37XX_AVS_LOW_VDD_EN); + + + for (load_level = 1; load_level < LOAD_LEVEL_NR; load_level++) { + freq = dvfs->cpu_freq_max / dvfs->divider[load_level]; + + avs_val = dvfs->avs[load_level]; + regmap_update_bits(base, ARMADA_37XX_AVS_VSET(load_level-1), + ARMADA_37XX_AVS_VDD_MASK << ARMADA_37XX_AVS_HIGH_VDD_LIMIT | + ARMADA_37XX_AVS_VDD_MASK << ARMADA_37XX_AVS_LOW_VDD_LIMIT, + avs_val << ARMADA_37XX_AVS_HIGH_VDD_LIMIT | + avs_val << ARMADA_37XX_AVS_LOW_VDD_LIMIT); + } + + /* Enable AVS after the configuration */ + regmap_update_bits(base, ARMADA_37XX_AVS_CTL0, + ARMADA_37XX_AVS_ENABLE, + ARMADA_37XX_AVS_ENABLE); + +} + static void armada37xx_cpufreq_disable_dvfs(struct regmap *base) { unsigned int reg = ARMADA_37XX_NB_DYN_MOD, @@ -216,7 +360,7 @@ static int __init armada37xx_cpufreq_driver_init(void) struct platform_device *pdev; unsigned long freq; unsigned int cur_frequency; - struct regmap *nb_pm_base; + struct regmap *nb_pm_base, *avs_base; struct device *cpu_dev; int load_lvl, ret; struct clk *clk; @@ -227,6 +371,14 @@ static int __init armada37xx_cpufreq_driver_init(void) if (IS_ERR(nb_pm_base)) return -ENODEV; + avs_base = + syscon_regmap_lookup_by_compatible("marvell,armada-3700-avs"); + + /* if AVS is not present don't use it but still try to setup dvfs */ + if (IS_ERR(avs_base)) { + pr_info("Syscon failed for Adapting Voltage Scaling: skip it\n"); + avs_base = NULL; + } /* Before doing any configuration on the DVFS first, disable it */ armada37xx_cpufreq_disable_dvfs(nb_pm_base); @@ -270,16 +422,21 @@ static int __init armada37xx_cpufreq_driver_init(void) armada37xx_cpufreq_state->regmap = nb_pm_base; + armada37xx_cpufreq_avs_configure(avs_base, dvfs); + armada37xx_cpufreq_avs_setup(avs_base, dvfs); + armada37xx_cpufreq_dvfs_setup(nb_pm_base, clk, dvfs->divider); clk_put(clk); for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR; load_lvl++) { + unsigned long u_volt = avs_map[dvfs->avs[load_lvl]] * 1000; freq = cur_frequency / dvfs->divider[load_lvl]; - - ret = dev_pm_opp_add(cpu_dev, freq, 0); + ret = dev_pm_opp_add(cpu_dev, freq, u_volt); if (ret) goto remove_opp; + + } /* Now that everything is setup, enable the DVFS at hardware level */ diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index a9d3eec32795..30f302149730 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -296,10 +296,62 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) return ret; } +static inline u64 get_delta(u64 t1, u64 t0) +{ + if (t1 > t0 || t0 > ~(u32)0) + return t1 - t0; + + return (u32)t1 - (u32)t0; +} + +static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu, + struct cppc_perf_fb_ctrs fb_ctrs_t0, + struct cppc_perf_fb_ctrs fb_ctrs_t1) +{ + u64 delta_reference, delta_delivered; + u64 reference_perf, delivered_perf; + + reference_perf = fb_ctrs_t0.reference_perf; + + delta_reference = get_delta(fb_ctrs_t1.reference, + fb_ctrs_t0.reference); + delta_delivered = get_delta(fb_ctrs_t1.delivered, + fb_ctrs_t0.delivered); + + /* Check to avoid divide-by zero */ + if (delta_reference || delta_delivered) + delivered_perf = (reference_perf * delta_delivered) / + delta_reference; + else + delivered_perf = cpu->perf_ctrls.desired_perf; + + return cppc_cpufreq_perf_to_khz(cpu, delivered_perf); +} + +static unsigned int cppc_cpufreq_get_rate(unsigned int cpunum) +{ + struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0}; + struct cppc_cpudata *cpu = all_cpu_data[cpunum]; + int ret; + + ret = cppc_get_perf_ctrs(cpunum, &fb_ctrs_t0); + if (ret) + return ret; + + udelay(2); /* 2usec delay between sampling */ + + ret = cppc_get_perf_ctrs(cpunum, &fb_ctrs_t1); + if (ret) + return ret; + + return cppc_get_rate_from_fbctrs(cpu, fb_ctrs_t0, fb_ctrs_t1); +} + static struct cpufreq_driver cppc_cpufreq_driver = { .flags = CPUFREQ_CONST_LOOPS, .verify = cppc_verify_policy, .target = cppc_cpufreq_set_target, + .get = cppc_cpufreq_get_rate, .init = cppc_cpufreq_cpu_init, .stop_cpu = cppc_cpufreq_stop_cpu, .name = "cppc_cpufreq", diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b0dfd3222013..f53fb41efb7b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -923,7 +923,12 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, struct freq_attr *fattr = to_attr(attr); ssize_t ret = -EINVAL; - cpus_read_lock(); + /* + * cpus_read_trylock() is used here to work around a circular lock + * dependency problem with respect to the cpufreq_register_driver(). + */ + if (!cpus_read_trylock()) + return -EBUSY; if (cpu_online(policy->cpu)) { down_write(&policy->rwsem); @@ -2236,6 +2241,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, policy->min = new_policy->min; policy->max = new_policy->max; + trace_cpu_frequency_limits(policy); policy->cached_target_freq = UINT_MAX; diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 8b3c2a79ad6c..b2ff423ad7f8 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -9,6 +9,7 @@ #include <linux/clk.h> #include <linux/cpu.h> #include <linux/cpufreq.h> +#include <linux/cpu_cooling.h> #include <linux/err.h> #include <linux/module.h> #include <linux/of.h> @@ -50,6 +51,7 @@ static struct clk_bulk_data clks[] = { }; static struct device *cpu_dev; +static struct thermal_cooling_device *cdev; static bool free_opp; static struct cpufreq_frequency_table *freq_table; static unsigned int max_freq; @@ -191,6 +193,16 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) return 0; } +static void imx6q_cpufreq_ready(struct cpufreq_policy *policy) +{ + cdev = of_cpufreq_cooling_register(policy); + + if (!cdev) + dev_err(cpu_dev, + "running cpufreq without cooling device: %ld\n", + PTR_ERR(cdev)); +} + static int imx6q_cpufreq_init(struct cpufreq_policy *policy) { int ret; @@ -202,13 +214,22 @@ static int imx6q_cpufreq_init(struct cpufreq_policy *policy) return ret; } +static int imx6q_cpufreq_exit(struct cpufreq_policy *policy) +{ + cpufreq_cooling_unregister(cdev); + + return 0; +} + static struct cpufreq_driver imx6q_cpufreq_driver = { .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, .verify = cpufreq_generic_frequency_table_verify, .target_index = imx6q_set_target, .get = cpufreq_generic_get, .init = imx6q_cpufreq_init, + .exit = imx6q_cpufreq_exit, .name = "imx6q-cpufreq", + .ready = imx6q_cpufreq_ready, .attr = cpufreq_generic_attr, .suspend = cpufreq_generic_suspend, }; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index d4ed0022b0dd..b6a1aadaff9f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -670,21 +670,18 @@ static ssize_t store_energy_performance_preference( { struct cpudata *cpu_data = all_cpu_data[policy->cpu]; char str_preference[21]; - int ret, i = 0; + int ret; ret = sscanf(buf, "%20s", str_preference); if (ret != 1) return -EINVAL; - while (energy_perf_strings[i] != NULL) { - if (!strcmp(str_preference, energy_perf_strings[i])) { - intel_pstate_set_energy_pref_index(cpu_data, i); - return count; - } - ++i; - } + ret = match_string(energy_perf_strings, -1, str_preference); + if (ret < 0) + return ret; - return -EINVAL; + intel_pstate_set_energy_pref_index(cpu_data, ret); + return count; } static ssize_t show_energy_performance_preference( @@ -2011,7 +2008,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy, struct cpudata *cpu) { - if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && + if (!hwp_active && + cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && policy->max < policy->cpuinfo.max_freq && policy->max > cpu->pstate.max_freq) { pr_debug("policy->max > max non turbo frequency\n"); @@ -2085,6 +2083,15 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; policy->cpuinfo.max_freq *= cpu->pstate.scaling; + if (hwp_active) { + unsigned int max_freq; + + max_freq = global.turbo_disabled ? + cpu->pstate.max_freq : cpu->pstate.turbo_freq; + if (max_freq < policy->cpuinfo.max_freq) + policy->cpuinfo.max_freq = max_freq; + } + intel_pstate_init_acpi_perf_limits(policy); policy->fast_switch_possible = true; diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index 0c56c9759672..099a849396f6 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -593,6 +593,15 @@ static int __init pcc_cpufreq_init(void) return ret; } + if (num_present_cpus() > 4) { + pcc_cpufreq_driver.flags |= CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING; + pr_err("%s: Too many CPUs, dynamic performance scaling disabled\n", + __func__); + pr_err("%s: Try to enable another scaling driver through BIOS settings\n", + __func__); + pr_err("%s: and complain to the system vendor\n", __func__); + } + ret = cpufreq_register_driver(&pcc_cpufreq_driver); return ret; diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c b/drivers/cpufreq/qcom-cpufreq-kryo.c index efc9a7ae4857..a1830fa25fc5 100644 --- a/drivers/cpufreq/qcom-cpufreq-kryo.c +++ b/drivers/cpufreq/qcom-cpufreq-kryo.c @@ -109,8 +109,9 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev) speedbin_nvmem = of_nvmem_cell_get(np, NULL); of_node_put(np); if (IS_ERR(speedbin_nvmem)) { - dev_err(cpu_dev, "Could not get nvmem cell: %ld\n", - PTR_ERR(speedbin_nvmem)); + if (PTR_ERR(speedbin_nvmem) != -EPROBE_DEFER) + dev_err(cpu_dev, "Could not get nvmem cell: %ld\n", + PTR_ERR(speedbin_nvmem)); return PTR_ERR(speedbin_nvmem); } diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c index e07bc7ace774..073557f433eb 100644 --- a/drivers/cpuidle/cpuidle-arm.c +++ b/drivers/cpuidle/cpuidle-arm.c @@ -105,7 +105,8 @@ static int __init arm_idle_init_cpu(int cpu) ret = cpuidle_register_driver(drv); if (ret) { - pr_err("Failed to register cpuidle driver\n"); + if (ret != -EBUSY) + pr_err("Failed to register cpuidle driver\n"); goto out_kfree_drv; } diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 0b5b3abe054e..4c49bb1330b5 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -604,28 +604,29 @@ struct devfreq *devfreq_add_device(struct device *dev, mutex_lock(&devfreq->lock); } - devfreq->min_freq = find_available_min_freq(devfreq); - if (!devfreq->min_freq) { + devfreq->scaling_min_freq = find_available_min_freq(devfreq); + if (!devfreq->scaling_min_freq) { mutex_unlock(&devfreq->lock); err = -EINVAL; goto err_dev; } - devfreq->scaling_min_freq = devfreq->min_freq; + devfreq->min_freq = devfreq->scaling_min_freq; - devfreq->max_freq = find_available_max_freq(devfreq); - if (!devfreq->max_freq) { + devfreq->scaling_max_freq = find_available_max_freq(devfreq); + if (!devfreq->scaling_max_freq) { mutex_unlock(&devfreq->lock); err = -EINVAL; goto err_dev; } - devfreq->scaling_max_freq = devfreq->max_freq; + devfreq->max_freq = devfreq->scaling_max_freq; dev_set_name(&devfreq->dev, "devfreq%d", atomic_inc_return(&devfreq_no)); err = device_register(&devfreq->dev); if (err) { mutex_unlock(&devfreq->lock); - goto err_dev; + put_device(&devfreq->dev); + goto err_out; } devfreq->trans_table = @@ -672,6 +673,7 @@ err_init: mutex_unlock(&devfreq_list_lock); device_unregister(&devfreq->dev); + devfreq = NULL; err_dev: if (devfreq) kfree(devfreq); diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index 3cd6a184fe7c..a9c64f0d3284 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -627,11 +627,9 @@ static int exynos_ppmu_probe(struct platform_device *pdev) size = sizeof(struct devfreq_event_dev *) * info->num_events; info->edev = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); - if (!info->edev) { - dev_err(&pdev->dev, - "failed to allocate memory devfreq-event devices\n"); + if (!info->edev) return -ENOMEM; - } + edev = info->edev; platform_set_drvdata(pdev, info); diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index 5dfbfa3cc878..e795ad2b3f6b 100644 --- a/drivers/devfreq/rk3399_dmc.c +++ b/drivers/devfreq/rk3399_dmc.c @@ -68,15 +68,6 @@ struct rk3399_dmcfreq { struct devfreq_event_dev *edev; struct mutex lock; struct dram_timing timing; - - /* - * DDR Converser of Frequency (DCF) is used to implement DDR frequency - * conversion without the participation of CPU, we will implement and - * control it in arm trust firmware. - */ - wait_queue_head_t wait_dcf_queue; - int irq; - int wait_dcf_flag; struct regulator *vdd_center; unsigned long rate, target_rate; unsigned long volt, target_volt; @@ -112,31 +103,22 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, err = regulator_set_voltage(dmcfreq->vdd_center, target_volt, target_volt); if (err) { - dev_err(dev, "Cannot to set voltage %lu uV\n", + dev_err(dev, "Cannot set voltage %lu uV\n", target_volt); goto out; } } - dmcfreq->wait_dcf_flag = 1; err = clk_set_rate(dmcfreq->dmc_clk, target_rate); if (err) { - dev_err(dev, "Cannot to set frequency %lu (%d)\n", - target_rate, err); + dev_err(dev, "Cannot set frequency %lu (%d)\n", target_rate, + err); regulator_set_voltage(dmcfreq->vdd_center, dmcfreq->volt, dmcfreq->volt); goto out; } /* - * Wait until bcf irq happen, it means freq scaling finish in - * arm trust firmware, use 100ms as timeout time. - */ - if (!wait_event_timeout(dmcfreq->wait_dcf_queue, - !dmcfreq->wait_dcf_flag, HZ / 10)) - dev_warn(dev, "Timeout waiting for dcf interrupt\n"); - - /* * Check the dpll rate, * There only two result we will get, * 1. Ddr frequency scaling fail, we still get the old rate. @@ -146,8 +128,8 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, /* If get the incorrect rate, set voltage to old value. */ if (dmcfreq->rate != target_rate) { - dev_err(dev, "Get wrong ddr frequency, Request frequency %lu,\ - Current frequency %lu\n", target_rate, dmcfreq->rate); + dev_err(dev, "Got wrong frequency, Request %lu, Current %lu\n", + target_rate, dmcfreq->rate); regulator_set_voltage(dmcfreq->vdd_center, dmcfreq->volt, dmcfreq->volt); goto out; @@ -155,7 +137,7 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, err = regulator_set_voltage(dmcfreq->vdd_center, target_volt, target_volt); if (err) - dev_err(dev, "Cannot to set vol %lu uV\n", target_volt); + dev_err(dev, "Cannot set voltage %lu uV\n", target_volt); dmcfreq->rate = target_rate; dmcfreq->volt = target_volt; @@ -241,22 +223,6 @@ static __maybe_unused int rk3399_dmcfreq_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(rk3399_dmcfreq_pm, rk3399_dmcfreq_suspend, rk3399_dmcfreq_resume); -static irqreturn_t rk3399_dmc_irq(int irq, void *dev_id) -{ - struct rk3399_dmcfreq *dmcfreq = dev_id; - struct arm_smccc_res res; - - dmcfreq->wait_dcf_flag = 0; - wake_up(&dmcfreq->wait_dcf_queue); - - /* Clear the DCF interrupt */ - arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0, - ROCKCHIP_SIP_CONFIG_DRAM_CLR_IRQ, - 0, 0, 0, 0, &res); - - return IRQ_HANDLED; -} - static int of_get_ddr_timings(struct dram_timing *timing, struct device_node *np) { @@ -330,16 +296,10 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *np = pdev->dev.of_node; struct rk3399_dmcfreq *data; - int ret, irq, index, size; + int ret, index, size; uint32_t *timing; struct dev_pm_opp *opp; - irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, - "Cannot get the dmc interrupt resource: %d\n", irq); - return irq; - } data = devm_kzalloc(dev, sizeof(struct rk3399_dmcfreq), GFP_KERNEL); if (!data) return -ENOMEM; @@ -348,27 +308,22 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) data->vdd_center = devm_regulator_get(dev, "center"); if (IS_ERR(data->vdd_center)) { + if (PTR_ERR(data->vdd_center) == -EPROBE_DEFER) + return -EPROBE_DEFER; + dev_err(dev, "Cannot get the regulator \"center\"\n"); return PTR_ERR(data->vdd_center); } data->dmc_clk = devm_clk_get(dev, "dmc_clk"); if (IS_ERR(data->dmc_clk)) { + if (PTR_ERR(data->dmc_clk) == -EPROBE_DEFER) + return -EPROBE_DEFER; + dev_err(dev, "Cannot get the clk dmc_clk\n"); return PTR_ERR(data->dmc_clk); }; - data->irq = irq; - ret = devm_request_irq(dev, irq, rk3399_dmc_irq, 0, - dev_name(dev), data); - if (ret) { - dev_err(dev, "Failed to request dmc irq: %d\n", ret); - return ret; - } - - init_waitqueue_head(&data->wait_dcf_queue); - data->wait_dcf_flag = 0; - data->edev = devfreq_event_get_edev_by_phandle(dev, 0); if (IS_ERR(data->edev)) return -EPROBE_DEFER; @@ -420,8 +375,10 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) data->rate = clk_get_rate(data->dmc_clk); opp = devfreq_recommended_opp(dev, &data->rate, 0); - if (IS_ERR(opp)) - return PTR_ERR(opp); + if (IS_ERR(opp)) { + ret = PTR_ERR(opp); + goto err_free_opp; + } data->rate = dev_pm_opp_get_freq(opp); data->volt = dev_pm_opp_get_voltage(opp); @@ -433,14 +390,34 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) &rk3399_devfreq_dmc_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND, &data->ondemand_data); - if (IS_ERR(data->devfreq)) - return PTR_ERR(data->devfreq); + if (IS_ERR(data->devfreq)) { + ret = PTR_ERR(data->devfreq); + goto err_free_opp; + } + devm_devfreq_register_opp_notifier(dev, data->devfreq); data->dev = dev; platform_set_drvdata(pdev, data); return 0; + +err_free_opp: + dev_pm_opp_of_remove_table(&pdev->dev); + return ret; +} + +static int rk3399_dmcfreq_remove(struct platform_device *pdev) +{ + struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(&pdev->dev); + + /* + * Before remove the opp table we need to unregister the opp notifier. + */ + devm_devfreq_unregister_opp_notifier(dmcfreq->dev, dmcfreq->devfreq); + dev_pm_opp_of_remove_table(dmcfreq->dev); + + return 0; } static const struct of_device_id rk3399dmc_devfreq_of_match[] = { @@ -451,6 +428,7 @@ MODULE_DEVICE_TABLE(of, rk3399dmc_devfreq_of_match); static struct platform_driver rk3399_dmcfreq_driver = { .probe = rk3399_dmcfreq_probe, + .remove = rk3399_dmcfreq_remove, .driver = { .name = "rk3399-dmc-freq", .pm = &rk3399_dmcfreq_pm, diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index c3afe7b2237e..965088afcfad 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -2312,7 +2312,7 @@ static int tegra_dc_couple(struct tegra_dc *dc) * POWER_CONTROL registers during CRTC enabling. */ if (dc->soc->coupled_pm && dc->pipe == 1) { - u32 flags = DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE; + u32 flags = DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_CONSUMER; struct device_link *link; struct device *partner; diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c index 0f70e8847540..2f8db9d62551 100644 --- a/drivers/gpu/ipu-v3/ipu-pre.c +++ b/drivers/gpu/ipu-v3/ipu-pre.c @@ -128,7 +128,8 @@ ipu_pre_lookup_by_phandle(struct device *dev, const char *name, int index) list_for_each_entry(pre, &ipu_pre_list, list) { if (pre_node == pre->dev->of_node) { mutex_unlock(&ipu_pre_list_mutex); - device_link_add(dev, pre->dev, DL_FLAG_AUTOREMOVE); + device_link_add(dev, pre->dev, + DL_FLAG_AUTOREMOVE_CONSUMER); of_node_put(pre_node); return pre; } diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c index 83f9dd934a5d..38a3a9764e49 100644 --- a/drivers/gpu/ipu-v3/ipu-prg.c +++ b/drivers/gpu/ipu-v3/ipu-prg.c @@ -100,7 +100,8 @@ ipu_prg_lookup_by_phandle(struct device *dev, const char *name, int ipu_id) list_for_each_entry(prg, &ipu_prg_list, list) { if (prg_node == prg->dev->of_node) { mutex_unlock(&ipu_prg_list_mutex); - device_link_add(dev, prg->dev, DL_FLAG_AUTOREMOVE); + device_link_add(dev, prg->dev, + DL_FLAG_AUTOREMOVE_CONSUMER); prg->id = ipu_id; of_node_put(prg_node); return prg; diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index 85727ef6ce8e..6ac27e5908f5 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -29,4 +29,14 @@ config INTEL_RAPL controller, CPU core (Power Plance 0), graphics uncore (Power Plane 1), etc. +config IDLE_INJECT + bool "Idle injection framework" + depends on CPU_IDLE + default n + help + This enables support for the idle injection framework. It + provides a way to force idle periods on a set of specified + CPUs for power capping. Idle period can be injected + synchronously on a set of specified CPUs or alternatively + on a per CPU basis. endif diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile index 0a21ef31372b..1b328854b36e 100644 --- a/drivers/powercap/Makefile +++ b/drivers/powercap/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_POWERCAP) += powercap_sys.o obj-$(CONFIG_INTEL_RAPL) += intel_rapl.o +obj-$(CONFIG_IDLE_INJECT) += idle_inject.o diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c new file mode 100644 index 000000000000..24ff2a068978 --- /dev/null +++ b/drivers/powercap/idle_inject.c @@ -0,0 +1,356 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Linaro Limited + * + * Author: Daniel Lezcano <daniel.lezcano@linaro.org> + * + * The idle injection framework provides a way to force CPUs to enter idle + * states for a specified fraction of time over a specified period. + * + * It relies on the smpboot kthreads feature providing common code for CPU + * hotplug and thread [un]parking. + * + * All of the kthreads used for idle injection are created at init time. + * + * Next, the users of the the idle injection framework provide a cpumask via + * its register function. The kthreads will be synchronized with respect to + * this cpumask. + * + * The idle + run duration is specified via separate helpers and that allows + * idle injection to be started. + * + * The idle injection kthreads will call play_idle() with the idle duration + * specified as per the above. + * + * After all of them have been woken up, a timer is set to start the next idle + * injection cycle. + * + * The timer interrupt handler will wake up the idle injection kthreads for + * all of the CPUs in the cpumask provided by the user. + * + * Idle injection is stopped synchronously and no leftover idle injection + * kthread activity after its completion is guaranteed. + * + * It is up to the user of this framework to provide a lock for higher-level + * synchronization to prevent race conditions like starting idle injection + * while unregistering from the framework. + */ +#define pr_fmt(fmt) "ii_dev: " fmt + +#include <linux/cpu.h> +#include <linux/hrtimer.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/smpboot.h> + +#include <uapi/linux/sched/types.h> + +/** + * struct idle_inject_thread - task on/off switch structure + * @tsk: task injecting the idle cycles + * @should_run: whether or not to run the task (for the smpboot kthread API) + */ +struct idle_inject_thread { + struct task_struct *tsk; + int should_run; +}; + +/** + * struct idle_inject_device - idle injection data + * @timer: idle injection period timer + * @idle_duration_ms: duration of CPU idle time to inject + * @run_duration_ms: duration of CPU run time to allow + * @cpumask: mask of CPUs affected by idle injection + */ +struct idle_inject_device { + struct hrtimer timer; + unsigned int idle_duration_ms; + unsigned int run_duration_ms; + unsigned long int cpumask[0]; +}; + +static DEFINE_PER_CPU(struct idle_inject_thread, idle_inject_thread); +static DEFINE_PER_CPU(struct idle_inject_device *, idle_inject_device); + +/** + * idle_inject_wakeup - Wake up idle injection threads + * @ii_dev: target idle injection device + * + * Every idle injection task associated with the given idle injection device + * and running on an online CPU will be woken up. + */ +static void idle_inject_wakeup(struct idle_inject_device *ii_dev) +{ + struct idle_inject_thread *iit; + unsigned int cpu; + + for_each_cpu_and(cpu, to_cpumask(ii_dev->cpumask), cpu_online_mask) { + iit = per_cpu_ptr(&idle_inject_thread, cpu); + iit->should_run = 1; + wake_up_process(iit->tsk); + } +} + +/** + * idle_inject_timer_fn - idle injection timer function + * @timer: idle injection hrtimer + * + * This function is called when the idle injection timer expires. It wakes up + * idle injection tasks associated with the timer and they, in turn, invoke + * play_idle() to inject a specified amount of CPU idle time. + * + * Return: HRTIMER_RESTART. + */ +static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) +{ + unsigned int duration_ms; + struct idle_inject_device *ii_dev = + container_of(timer, struct idle_inject_device, timer); + + duration_ms = READ_ONCE(ii_dev->run_duration_ms); + duration_ms += READ_ONCE(ii_dev->idle_duration_ms); + + idle_inject_wakeup(ii_dev); + + hrtimer_forward_now(timer, ms_to_ktime(duration_ms)); + + return HRTIMER_RESTART; +} + +/** + * idle_inject_fn - idle injection work function + * @cpu: the CPU owning the task + * + * This function calls play_idle() to inject a specified amount of CPU idle + * time. + */ +static void idle_inject_fn(unsigned int cpu) +{ + struct idle_inject_device *ii_dev; + struct idle_inject_thread *iit; + + ii_dev = per_cpu(idle_inject_device, cpu); + iit = per_cpu_ptr(&idle_inject_thread, cpu); + + /* + * Let the smpboot main loop know that the task should not run again. + */ + iit->should_run = 0; + + play_idle(READ_ONCE(ii_dev->idle_duration_ms)); +} + +/** + * idle_inject_set_duration - idle and run duration update helper + * @run_duration_ms: CPU run time to allow in milliseconds + * @idle_duration_ms: CPU idle time to inject in milliseconds + */ +void idle_inject_set_duration(struct idle_inject_device *ii_dev, + unsigned int run_duration_ms, + unsigned int idle_duration_ms) +{ + if (run_duration_ms && idle_duration_ms) { + WRITE_ONCE(ii_dev->run_duration_ms, run_duration_ms); + WRITE_ONCE(ii_dev->idle_duration_ms, idle_duration_ms); + } +} + +/** + * idle_inject_get_duration - idle and run duration retrieval helper + * @run_duration_ms: memory location to store the current CPU run time + * @idle_duration_ms: memory location to store the current CPU idle time + */ +void idle_inject_get_duration(struct idle_inject_device *ii_dev, + unsigned int *run_duration_ms, + unsigned int *idle_duration_ms) +{ + *run_duration_ms = READ_ONCE(ii_dev->run_duration_ms); + *idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms); +} + +/** + * idle_inject_start - start idle injections + * @ii_dev: idle injection control device structure + * + * The function starts idle injection by first waking up all of the idle + * injection kthreads associated with @ii_dev to let them inject CPU idle time + * sets up a timer to start the next idle injection period. + * + * Return: -EINVAL if the CPU idle or CPU run time is not set or 0 on success. + */ +int idle_inject_start(struct idle_inject_device *ii_dev) +{ + unsigned int idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms); + unsigned int run_duration_ms = READ_ONCE(ii_dev->run_duration_ms); + + if (!idle_duration_ms || !run_duration_ms) + return -EINVAL; + + pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n", + cpumask_pr_args(to_cpumask(ii_dev->cpumask))); + + idle_inject_wakeup(ii_dev); + + hrtimer_start(&ii_dev->timer, + ms_to_ktime(idle_duration_ms + run_duration_ms), + HRTIMER_MODE_REL); + + return 0; +} + +/** + * idle_inject_stop - stops idle injections + * @ii_dev: idle injection control device structure + * + * The function stops idle injection and waits for the threads to finish work. + * If CPU idle time is being injected when this function runs, then it will + * wait until the end of the cycle. + * + * When it returns, there is no more idle injection kthread activity. The + * kthreads are scheduled out and the periodic timer is off. + */ +void idle_inject_stop(struct idle_inject_device *ii_dev) +{ + struct idle_inject_thread *iit; + unsigned int cpu; + + pr_debug("Stopping idle injection on CPUs '%*pbl'\n", + cpumask_pr_args(to_cpumask(ii_dev->cpumask))); + + hrtimer_cancel(&ii_dev->timer); + + /* + * Stopping idle injection requires all of the idle injection kthreads + * associated with the given cpumask to be parked and stay that way, so + * prevent CPUs from going online at this point. Any CPUs going online + * after the loop below will be covered by clearing the should_run flag + * that will cause the smpboot main loop to schedule them out. + */ + cpu_hotplug_disable(); + + /* + * Iterate over all (online + offline) CPUs here in case one of them + * goes offline with the should_run flag set so as to prevent its idle + * injection kthread from running when the CPU goes online again after + * the ii_dev has been freed. + */ + for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { + iit = per_cpu_ptr(&idle_inject_thread, cpu); + iit->should_run = 0; + + wait_task_inactive(iit->tsk, 0); + } + + cpu_hotplug_enable(); +} + +/** + * idle_inject_setup - prepare the current task for idle injection + * @cpu: not used + * + * Called once, this function is in charge of setting the current task's + * scheduler parameters to make it an RT task. + */ +static void idle_inject_setup(unsigned int cpu) +{ + struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 }; + + sched_setscheduler(current, SCHED_FIFO, ¶m); +} + +/** + * idle_inject_should_run - function helper for the smpboot API + * @cpu: CPU the kthread is running on + * + * Return: whether or not the thread can run. + */ +static int idle_inject_should_run(unsigned int cpu) +{ + struct idle_inject_thread *iit = + per_cpu_ptr(&idle_inject_thread, cpu); + + return iit->should_run; +} + +/** + * idle_inject_register - initialize idle injection on a set of CPUs + * @cpumask: CPUs to be affected by idle injection + * + * This function creates an idle injection control device structure for the + * given set of CPUs and initializes the timer associated with it. It does not + * start any injection cycles. + * + * Return: NULL if memory allocation fails, idle injection control device + * pointer on success. + */ +struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) +{ + struct idle_inject_device *ii_dev; + int cpu, cpu_rb; + + ii_dev = kzalloc(sizeof(*ii_dev) + cpumask_size(), GFP_KERNEL); + if (!ii_dev) + return NULL; + + cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask); + hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + ii_dev->timer.function = idle_inject_timer_fn; + + for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { + + if (per_cpu(idle_inject_device, cpu)) { + pr_err("cpu%d is already registered\n", cpu); + goto out_rollback; + } + + per_cpu(idle_inject_device, cpu) = ii_dev; + } + + return ii_dev; + +out_rollback: + for_each_cpu(cpu_rb, to_cpumask(ii_dev->cpumask)) { + if (cpu == cpu_rb) + break; + per_cpu(idle_inject_device, cpu_rb) = NULL; + } + + kfree(ii_dev); + + return NULL; +} + +/** + * idle_inject_unregister - unregister idle injection control device + * @ii_dev: idle injection control device to unregister + * + * The function stops idle injection for the given control device, + * unregisters its kthreads and frees memory allocated when that device was + * created. + */ +void idle_inject_unregister(struct idle_inject_device *ii_dev) +{ + unsigned int cpu; + + idle_inject_stop(ii_dev); + + for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) + per_cpu(idle_inject_device, cpu) = NULL; + + kfree(ii_dev); +} + +static struct smp_hotplug_thread idle_inject_threads = { + .store = &idle_inject_thread.tsk, + .setup = idle_inject_setup, + .thread_fn = idle_inject_fn, + .thread_comm = "idle_inject/%u", + .thread_should_run = idle_inject_should_run, +}; + +static int __init idle_inject_init(void) +{ + return smpboot_register_percpu_thread(&idle_inject_threads); +} +early_initcall(idle_inject_init); diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c index 0097a939487f..546960a18d60 100644 --- a/drivers/soc/imx/gpc.c +++ b/drivers/soc/imx/gpc.c @@ -209,7 +209,7 @@ static int imx_pgc_power_domain_probe(struct platform_device *pdev) goto genpd_err; } - device_link_add(dev, dev->parent, DL_FLAG_AUTOREMOVE); + device_link_add(dev, dev->parent, DL_FLAG_AUTOREMOVE_CONSUMER); return 0; diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c index 334d98be03b9..cbfcca828cd7 100644 --- a/drivers/thermal/imx_thermal.c +++ b/drivers/thermal/imx_thermal.c @@ -3,6 +3,7 @@ // Copyright 2013 Freescale Semiconductor, Inc. #include <linux/clk.h> +#include <linux/cpu.h> #include <linux/cpufreq.h> #include <linux/cpu_cooling.h> #include <linux/delay.h> @@ -644,6 +645,27 @@ static const struct of_device_id of_imx_thermal_match[] = { }; MODULE_DEVICE_TABLE(of, of_imx_thermal_match); +/* + * Create cooling device in case no #cooling-cells property is available in + * CPU node + */ +static int imx_thermal_register_legacy_cooling(struct imx_thermal_data *data) +{ + struct device_node *np = of_get_cpu_node(data->policy->cpu, NULL); + int ret; + + if (!np || !of_find_property(np, "#cooling-cells", NULL)) { + data->cdev = cpufreq_cooling_register(data->policy); + if (IS_ERR(data->cdev)) { + ret = PTR_ERR(data->cdev); + cpufreq_cpu_put(data->policy); + return ret; + } + } + + return 0; +} + static int imx_thermal_probe(struct platform_device *pdev) { struct imx_thermal_data *data; @@ -724,12 +746,10 @@ static int imx_thermal_probe(struct platform_device *pdev) return -EPROBE_DEFER; } - data->cdev = cpufreq_cooling_register(data->policy); - if (IS_ERR(data->cdev)) { - ret = PTR_ERR(data->cdev); + ret = imx_thermal_register_legacy_cooling(data); + if (ret) { dev_err(&pdev->dev, "failed to register cpufreq cooling device: %d\n", ret); - cpufreq_cpu_put(data->policy); return ret; } |