From 5a25e3f7cc538fb49e11267c1e41c54ccf83835e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 26 Mar 2019 12:15:13 +0100 Subject: cpufreq: intel_pstate: Driver-specific handling of _PPC updates In some cases, the platform firmware disables or enables turbo frequencies for all CPUs globally before triggering a _PPC change notification for one of them. Obviously, that global change affects all CPUs, not just the notified one, and it needs to be acted upon by cpufreq. The intel_pstate driver is able to detect such global changes of the settings, but it also needs to update policy limits for all CPUs if that happens, in particular if turbo frequencies are enabled globally - to allow them to be used. For this reason, introduce a new cpufreq driver callback to be invoked on _PPC notifications, if present, instead of simply calling cpufreq_update_policy() for the notified CPU and make intel_pstate use it to trigger policy updates for all CPUs in the system if global settings change. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200759 Reported-by: Gabriele Mazzotta Tested-by: Gabriele Mazzotta Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index b160e98076e3..5005ea40364f 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -195,6 +195,7 @@ void disable_cpufreq(void); u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); void cpufreq_update_policy(unsigned int cpu); +void cpufreq_update_limits(unsigned int cpu); bool have_governor_per_policy(void); struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); @@ -322,6 +323,9 @@ struct cpufreq_driver { /* should be defined, if possible */ unsigned int (*get)(unsigned int cpu); + /* Called to update policy limits on firmware notifications. */ + void (*update_limits)(unsigned int cpu); + /* optional */ int (*bios_limit)(int cpu, unsigned int *limit); -- cgit v1.2.3 From b5dee3130bb4014511f5d0dd46855ed843e3fdc8 Mon Sep 17 00:00:00 2001 From: Harry Pan Date: Mon, 25 Feb 2019 20:36:41 +0800 Subject: PM / sleep: Refactor filesystems sync to reduce duplication Create a common helper to sync filesystems for system suspend and hibernation. Signed-off-by: Harry Pan Acked-by: Pavel Machek [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 3 +++ kernel/power/hibernate.c | 5 +---- kernel/power/main.c | 9 +++++++++ kernel/power/suspend.c | 13 +++++-------- kernel/power/user.c | 5 +---- 5 files changed, 19 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 3f529ad9a9d2..6b3ea9ea6a9e 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -425,6 +425,7 @@ void restore_processor_state(void); /* kernel/power/main.c */ extern int register_pm_notifier(struct notifier_block *nb); extern int unregister_pm_notifier(struct notifier_block *nb); +extern void ksys_sync_helper(void); #define pm_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ @@ -462,6 +463,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) return 0; } +static inline void ksys_sync_helper(void) {} + #define pm_notifier(fn, pri) do { (void)(fn); } while (0) static inline bool pm_wakeup_pending(void) { return false; } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index abef759de7c8..cc105ecd9c07 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -709,9 +708,7 @@ int hibernate(void) goto Exit; } - pr_info("Syncing filesystems ... \n"); - ksys_sync(); - pr_info("done.\n"); + ksys_sync_helper(); error = freeze_processes(); if (error) diff --git a/kernel/power/main.c b/kernel/power/main.c index 98e76cad128b..40472a7c5536 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "power.h" @@ -51,6 +52,14 @@ void unlock_system_sleep(void) } EXPORT_SYMBOL_GPL(unlock_system_sleep); +void ksys_sync_helper(void) +{ + pr_info("Syncing filesystems ... "); + ksys_sync(); + pr_cont("done.\n"); +} +EXPORT_SYMBOL_GPL(ksys_sync_helper); + /* Routines for PM-transition notifications */ static BLOCKING_NOTIFIER_HEAD(pm_chain_head); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 0bd595a0b610..e39059dea38b 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -568,13 +567,11 @@ static int enter_state(suspend_state_t state) if (state == PM_SUSPEND_TO_IDLE) s2idle_begin(); -#ifndef CONFIG_SUSPEND_SKIP_SYNC - trace_suspend_resume(TPS("sync_filesystems"), 0, true); - pr_info("Syncing filesystems ... "); - ksys_sync(); - pr_cont("done.\n"); - trace_suspend_resume(TPS("sync_filesystems"), 0, false); -#endif + if (!IS_ENABLED(CONFIG_SUSPEND_SKIP_SYNC)) { + trace_suspend_resume(TPS("sync_filesystems"), 0, true); + ksys_sync_helper(); + trace_suspend_resume(TPS("sync_filesystems"), 0, false); + } pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]); pm_suspend_clear_flags(); diff --git a/kernel/power/user.c b/kernel/power/user.c index 2d8b60a3c86b..cb24e840a3e6 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -10,7 +10,6 @@ */ #include -#include #include #include #include @@ -228,9 +227,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, if (data->frozen) break; - printk("Syncing filesystems ... "); - ksys_sync(); - printk("done.\n"); + ksys_sync_helper(); error = freeze_processes(); if (error) -- cgit v1.2.3 From 5861381d486601430cccf64849bd0a226154bc0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 21 Mar 2019 23:18:01 +0100 Subject: PM / arch: x86: Rework the MSR_IA32_ENERGY_PERF_BIAS handling The current handling of MSR_IA32_ENERGY_PERF_BIAS in the kernel is problematic, because it may cause changes made by user space to that MSR (with the help of the x86_energy_perf_policy tool, for example) to be lost every time a CPU goes offline and then back online as well as during system-wide power management transitions into sleep states and back into the working state. The first problem is that if the current EPB value for a CPU going online is 0 ('performance'), the kernel will change it to 6 ('normal') regardless of whether or not this is the first bring-up of that CPU. That also happens during system-wide resume from sleep states (including, but not limited to, hibernation). However, the EPB may have been adjusted by user space this way and the kernel should not blindly override that setting. The second problem is that if the platform firmware resets the EPB values for any CPUs during system-wide resume from a sleep state, the kernel will not restore their previous EPB values that may have been set by user space before the preceding system-wide suspend transition. Again, that behavior may at least be confusing from the user space perspective. In order to address these issues, rework the handling of MSR_IA32_ENERGY_PERF_BIAS so that the EPB value is saved on CPU offline and restored on CPU online as well as (for the boot CPU) during the syscore stages of system-wide suspend and resume transitions, respectively. However, retain the policy by which the EPB is set to 6 ('normal') on the first bring-up of each CPU if its initial value is 0, based on the observation that 0 may mean 'not initialized' just as well as 'performance' in that case. While at it, move the MSR_IA32_ENERGY_PERF_BIAS handling code into a separate file and document it in Documentation/admin-guide. Fixes: abe48b108247 (x86, intel, power: Initialize MSR_IA32_ENERGY_PERF_BIAS) Fixes: b51ef52df71c (x86/cpu: Restore MSR_IA32_ENERGY_PERF_BIAS after resume) Reported-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki Reviewed-by: Hannes Reinecke Acked-by: Borislav Petkov Acked-by: Thomas Gleixner --- Documentation/admin-guide/pm/intel_epb.rst | 6 ++ Documentation/admin-guide/pm/working-state.rst | 1 + arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/common.c | 17 ---- arch/x86/kernel/cpu/cpu.h | 1 - arch/x86/kernel/cpu/intel.c | 34 ------- arch/x86/kernel/cpu/intel_epb.c | 131 +++++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 8 files changed, 140 insertions(+), 53 deletions(-) create mode 100644 Documentation/admin-guide/pm/intel_epb.rst create mode 100644 arch/x86/kernel/cpu/intel_epb.c (limited to 'include') diff --git a/Documentation/admin-guide/pm/intel_epb.rst b/Documentation/admin-guide/pm/intel_epb.rst new file mode 100644 index 000000000000..e9cfa7ec5420 --- /dev/null +++ b/Documentation/admin-guide/pm/intel_epb.rst @@ -0,0 +1,6 @@ +====================================== +Intel Performance and Energy Bias Hint +====================================== + +.. kernel-doc:: arch/x86/kernel/cpu/intel_epb.c + :doc: overview diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst index b6cef9b5e961..beb004d3632b 100644 --- a/Documentation/admin-guide/pm/working-state.rst +++ b/Documentation/admin-guide/pm/working-state.rst @@ -8,3 +8,4 @@ Working-State Power Management cpuidle cpufreq intel_pstate + intel_epb diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index cfd24f9f7614..1796d2bdcaaa 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -28,7 +28,7 @@ obj-y += cpuid-deps.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o intel_epb.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cb28e98a0659..5e37dfa4d9df 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1864,23 +1864,6 @@ void cpu_init(void) } #endif -static void bsp_resume(void) -{ - if (this_cpu->c_bsp_resume) - this_cpu->c_bsp_resume(&boot_cpu_data); -} - -static struct syscore_ops cpu_syscore_ops = { - .resume = bsp_resume, -}; - -static int __init init_cpu_syscore(void) -{ - register_syscore_ops(&cpu_syscore_ops); - return 0; -} -core_initcall(init_cpu_syscore); - /* * The microcode loader calls this upon late microcode load to recheck features, * only when microcode has been updated. Caller holds microcode_mutex and CPU diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 5eb946b9a9f3..c0e2407abdd6 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -14,7 +14,6 @@ struct cpu_dev { void (*c_init)(struct cpuinfo_x86 *); void (*c_identify)(struct cpuinfo_x86 *); void (*c_detect_tlb)(struct cpuinfo_x86 *); - void (*c_bsp_resume)(struct cpuinfo_x86 *); int c_x86_vendor; #ifdef CONFIG_X86_32 /* Optional vendor specific routine to obtain the cache size. */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fc3c07fe7df5..f17c1a714779 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -596,36 +596,6 @@ detect_keyid_bits: c->x86_phys_bits -= keyid_bits; } -static void init_intel_energy_perf(struct cpuinfo_x86 *c) -{ - u64 epb; - - /* - * Initialize MSR_IA32_ENERGY_PERF_BIAS if not already initialized. - * (x86_energy_perf_policy(8) is available to change it at run-time.) - */ - if (!cpu_has(c, X86_FEATURE_EPB)) - return; - - rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); - if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE) - return; - - pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); - pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); - epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; - wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); -} - -static void intel_bsp_resume(struct cpuinfo_x86 *c) -{ - /* - * MSR_IA32_ENERGY_PERF_BIAS is lost across suspend/resume, - * so reinitialize it properly like during bootup: - */ - init_intel_energy_perf(c); -} - static void init_cpuid_fault(struct cpuinfo_x86 *c) { u64 msr; @@ -763,8 +733,6 @@ static void init_intel(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_TME)) detect_tme(c); - init_intel_energy_perf(c); - init_intel_misc_features(c); } @@ -1023,9 +991,7 @@ static const struct cpu_dev intel_cpu_dev = { .c_detect_tlb = intel_detect_tlb, .c_early_init = early_init_intel, .c_init = init_intel, - .c_bsp_resume = intel_bsp_resume, .c_x86_vendor = X86_VENDOR_INTEL, }; cpu_dev_register(intel_cpu_dev); - diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c new file mode 100644 index 000000000000..8d53cc88bd22 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_epb.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Performance and Energy Bias Hint support. + * + * Copyright (C) 2019 Intel Corporation + * + * Author: + * Rafael J. Wysocki + */ + +#include +#include +#include + +#include +#include + +/** + * DOC: overview + * + * The Performance and Energy Bias Hint (EPB) allows software to specify its + * preference with respect to the power-performance tradeoffs present in the + * processor. Generally, the EPB is expected to be set by user space through + * the generic MSR interface (with the help of the x86_energy_perf_policy tool), + * but there are two reasons for the kernel to touch it. + * + * First, there are systems where the platform firmware resets the EPB during + * system-wide transitions from sleep states back into the working state + * effectively causing the previous EPB updates by user space to be lost. + * Thus the kernel needs to save the current EPB values for all CPUs during + * system-wide transitions to sleep states and restore them on the way back to + * the working state. That can be achieved by saving EPB for secondary CPUs + * when they are taken offline during transitions into system sleep states and + * for the boot CPU in a syscore suspend operation, so that it can be restored + * for the boot CPU in a syscore resume operation and for the other CPUs when + * they are brought back online. However, CPUs that are already offline when + * a system-wide PM transition is started are not taken offline again, but their + * EPB values may still be reset by the platform firmware during the transition, + * so in fact it is necessary to save the EPB of any CPU taken offline and to + * restore it when the given CPU goes back online at all times. + * + * Second, on many systems the initial EPB value coming from the platform + * firmware is 0 ('performance') and at least on some of them that is because + * the platform firmware does not initialize EPB at all with the assumption that + * the OS will do that anyway. That sometimes is problematic, as it may cause + * the system battery to drain too fast, for example, so it is better to adjust + * it on CPU bring-up and if the initial EPB value for a given CPU is 0, the + * kernel changes it to 6 ('normal'). + */ + +static DEFINE_PER_CPU(u8, saved_epb); + +#define EPB_MASK 0x0fULL +#define EPB_SAVED 0x10ULL + +static int intel_epb_save(void) +{ + u64 epb; + + rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); + /* + * Ensure that saved_epb will always be nonzero after this write even if + * the EPB value read from the MSR is 0. + */ + this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED); + + return 0; +} + +static void intel_epb_restore(void) +{ + u64 val = this_cpu_read(saved_epb); + u64 epb; + + rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); + if (val) { + val &= EPB_MASK; + } else { + /* + * Because intel_epb_save() has not run for the current CPU yet, + * it is going online for the first time, so if its EPB value is + * 0 ('performance') at this point, assume that it has not been + * initialized by the platform firmware and set it to 6 + * ('normal'). + */ + val = epb & EPB_MASK; + if (val == ENERGY_PERF_BIAS_PERFORMANCE) { + val = ENERGY_PERF_BIAS_NORMAL; + pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); + } + } + wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val); +} + +static struct syscore_ops intel_epb_syscore_ops = { + .suspend = intel_epb_save, + .resume = intel_epb_restore, +}; + +static int intel_epb_online(unsigned int cpu) +{ + intel_epb_restore(); + return 0; +} + +static int intel_epb_offline(unsigned int cpu) +{ + return intel_epb_save(); +} + +static __init int intel_epb_init(void) +{ + int ret; + + if (!boot_cpu_has(X86_FEATURE_EPB)) + return -ENODEV; + + ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE, + "x86/intel/epb:online", intel_epb_online, + intel_epb_offline); + if (ret < 0) + goto err_out_online; + + register_syscore_ops(&intel_epb_syscore_ops); + return 0; + +err_out_online: + cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE); + return ret; +} +subsys_initcall(intel_epb_init); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index e78281d07b70..dbfdd0fadbef 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -147,6 +147,7 @@ enum cpuhp_state { CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_IRQ_AFFINITY_ONLINE, CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS, + CPUHP_AP_X86_INTEL_EPB_ONLINE, CPUHP_AP_PERF_ONLINE, CPUHP_AP_PERF_X86_ONLINE, CPUHP_AP_PERF_X86_UNCORE_ONLINE, -- cgit v1.2.3 From 9083e4986124389e2a7c0ffca95630a4983887f0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 26 Mar 2019 12:19:52 +0100 Subject: cpufreq: intel_pstate: Update max frequency on global turbo changes While the cpuinfo.max_freq value doesn't really matter for intel_pstate in the active mode, in the passive mode it is used by governors as the maximum physical frequency of the CPU and the results of governor computations generally depend on it. Also it is made available to user space via sysfs and it should match the current HW configuration. For this reason, make intel_pstate update cpuinfo.max_freq for all CPUs if it detects a global change of turbo frequency settings from "disable" to "enable" or the other way associated with a _PPC change notification from the platform firmware. Note that policy_is_inactive(), cpufreq_cpu_acquire(), cpufreq_cpu_release(), and cpufreq_set_policy() need to be made available to it for this purpose. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200759 Reported-by: Gabriele Mazzotta Tested-by: Gabriele Mazzotta Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 16 ++++------------ drivers/cpufreq/intel_pstate.c | 35 +++++++++++++++++++++++++++++------ include/linux/cpufreq.h | 10 ++++++++++ 3 files changed, 43 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d8fc395af773..f3f79266ab48 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -34,11 +34,6 @@ static LIST_HEAD(cpufreq_policy_list); -static inline bool policy_is_inactive(struct cpufreq_policy *policy) -{ - return cpumask_empty(policy->cpus); -} - /* Macros to iterate over CPU policies */ #define for_each_suitable_policy(__policy, __active) \ list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) \ @@ -254,7 +249,7 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_put); * cpufreq_cpu_release - Unlock a policy and decrement its usage counter. * @policy: cpufreq policy returned by cpufreq_cpu_acquire(). */ -static void cpufreq_cpu_release(struct cpufreq_policy *policy) +void cpufreq_cpu_release(struct cpufreq_policy *policy) { if (WARN_ON(!policy)) return; @@ -278,7 +273,7 @@ static void cpufreq_cpu_release(struct cpufreq_policy *policy) * cpufreq_cpu_release() in order to release its rwsem and balance its usage * counter properly. */ -static struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu) +struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); @@ -714,9 +709,6 @@ static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) return ret; } -static int cpufreq_set_policy(struct cpufreq_policy *policy, - struct cpufreq_policy *new_policy); - /** * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access */ @@ -2274,8 +2266,8 @@ EXPORT_SYMBOL(cpufreq_get_policy); * * The cpuinfo part of @policy is not updated by this function. */ -static int cpufreq_set_policy(struct cpufreq_policy *policy, - struct cpufreq_policy *new_policy) +int cpufreq_set_policy(struct cpufreq_policy *policy, + struct cpufreq_policy *new_policy) { struct cpufreq_governor *old_gov; int ret; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e2191a570ade..08d1a1e845aa 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -179,7 +179,7 @@ struct vid_data { * based on the MSR_IA32_MISC_ENABLE value and whether or * not the maximum reported turbo P-state is different from * the maximum reported non-turbo one. - * @turbo_disabled_s: Saved @turbo_disabled value. + * @turbo_disabled_mf: The @turbo_disabled value reflected by cpuinfo.max_freq. * @min_perf_pct: Minimum capacity limit in percent of the maximum turbo * P-state capacity. * @max_perf_pct: Maximum capacity limit in percent of the maximum turbo @@ -188,7 +188,7 @@ struct vid_data { struct global_params { bool no_turbo; bool turbo_disabled; - bool turbo_disabled_s; + bool turbo_disabled_mf; int max_perf_pct; int min_perf_pct; }; @@ -899,6 +899,28 @@ static void intel_pstate_update_policies(void) cpufreq_update_policy(cpu); } +static void intel_pstate_update_max_freq(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); + struct cpufreq_policy new_policy; + struct cpudata *cpudata; + + if (!policy) + return; + + cpudata = all_cpu_data[cpu]; + policy->cpuinfo.max_freq = global.turbo_disabled_mf ? + cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; + + memcpy(&new_policy, policy, sizeof(*policy)); + new_policy.max = min(policy->user_policy.max, policy->cpuinfo.max_freq); + new_policy.min = min(policy->user_policy.min, new_policy.max); + + cpufreq_set_policy(policy, &new_policy); + + cpufreq_cpu_release(policy); +} + static void intel_pstate_update_limits(unsigned int cpu) { mutex_lock(&intel_pstate_driver_lock); @@ -908,9 +930,10 @@ static void intel_pstate_update_limits(unsigned int cpu) * If turbo has been turned on or off globally, policy limits for * all CPUs need to be updated to reflect that. */ - if (global.turbo_disabled_s != global.turbo_disabled) { - global.turbo_disabled_s = global.turbo_disabled; - intel_pstate_update_policies(); + if (global.turbo_disabled_mf != global.turbo_disabled) { + global.turbo_disabled_mf = global.turbo_disabled; + for_each_possible_cpu(cpu) + intel_pstate_update_max_freq(cpu); } else { cpufreq_update_policy(cpu); } @@ -2159,7 +2182,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; update_turbo_state(); - global.turbo_disabled_s = global.turbo_disabled; + global.turbo_disabled_mf = global.turbo_disabled; policy->cpuinfo.max_freq = global.turbo_disabled ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; policy->cpuinfo.max_freq *= cpu->pstate.scaling; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 5005ea40364f..684caf067003 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -178,6 +178,11 @@ static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { } #endif +static inline bool policy_is_inactive(struct cpufreq_policy *policy) +{ + return cpumask_empty(policy->cpus); +} + static inline bool policy_is_shared(struct cpufreq_policy *policy) { return cpumask_weight(policy->cpus) > 1; @@ -193,7 +198,12 @@ unsigned int cpufreq_quick_get_max(unsigned int cpu); void disable_cpufreq(void); u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); + +struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu); +void cpufreq_cpu_release(struct cpufreq_policy *policy); int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); +int cpufreq_set_policy(struct cpufreq_policy *policy, + struct cpufreq_policy *new_policy); void cpufreq_update_policy(unsigned int cpu); void cpufreq_update_limits(unsigned int cpu); bool have_governor_per_policy(void); -- cgit v1.2.3 From 49a27e279052cf71ba931a26b00194ff46510480 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 27 Mar 2019 15:35:45 +0100 Subject: PM / Domains: Add generic data pointer to struct genpd_power_state Add a data pointer to the genpd_power_state struct, to allow a genpd backend driver to store per-state specific data. To introduce the pointer, change the way genpd deals with freeing of the corresponding allocated data. More precisely, clarify the responsibility of whom that shall free the data, by adding a ->free_states() callback to the generic_pm_domain structure. The one allocating the data will be expected to set the callback, to allow genpd to invoke it from genpd_remove(). Co-developed-by: Lina Iyer Acked-by: Daniel Lezcano Signed-off-by: Ulf Hansson [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 12 ++++++++++-- include/linux/pm_domain.h | 4 +++- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 96a6dc9d305c..ff6f992f7a1d 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1686,6 +1686,12 @@ out: } EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain); +static void genpd_free_default_power_state(struct genpd_power_state *states, + unsigned int state_count) +{ + kfree(states); +} + static int genpd_set_default_power_state(struct generic_pm_domain *genpd) { struct genpd_power_state *state; @@ -1696,7 +1702,7 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd) genpd->states = state; genpd->state_count = 1; - genpd->free = state; + genpd->free_states = genpd_free_default_power_state; return 0; } @@ -1812,7 +1818,9 @@ static int genpd_remove(struct generic_pm_domain *genpd) list_del(&genpd->gpd_list_node); genpd_unlock(genpd); cancel_work_sync(&genpd->power_off_work); - kfree(genpd->free); + if (genpd->free_states) + genpd->free_states(genpd->states, genpd->state_count); + pr_debug("%s: removed %s\n", __func__, genpd->name); return 0; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1ed5874bcee0..8e1399231753 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -69,6 +69,7 @@ struct genpd_power_state { s64 residency_ns; struct fwnode_handle *fwnode; ktime_t idle_time; + void *data; }; struct genpd_lock_ops; @@ -110,9 +111,10 @@ struct generic_pm_domain { struct device *dev); unsigned int flags; /* Bit field of configs for genpd */ struct genpd_power_state *states; + void (*free_states)(struct genpd_power_state *states, + unsigned int state_count); unsigned int state_count; /* number of states */ unsigned int state_idx; /* state that genpd will go to when off */ - void *free; /* Free the state that was allocated for default */ ktime_t on_time; ktime_t accounting_time; const struct genpd_lock_ops *lock_ops; -- cgit v1.2.3 From eb594b7325f61835555140922a4cb715264a325c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 27 Mar 2019 15:35:46 +0100 Subject: PM / Domains: Add support for CPU devices to genpd To enable a CPU device to be attached to a PM domain managed by genpd, make a few changes to it for convenience. To be able to quickly find out what CPUs are attached to a genpd, which typically becomes useful from a genpd governor as subsequent changes are about to show, add a cpumask to struct generic_pm_domain to be updated when a CPU device gets attached to the genpd containing that cpumask. Also, propagate the cpumask changes upwards in the domain hierarchy to the master PM domains. This way, the cpumask for a genpd hierarchically reflects all CPUs attached to the topology below it. Finally, make this an opt-in feature, to avoid having to manage CPUs and the cpumask for a genpd that don't need it. To that end, add a new genpd configuration bit, GENPD_FLAG_CPU_DOMAIN. Co-developed-by: Lina Iyer Acked-by: Daniel Lezcano Signed-off-by: Ulf Hansson [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 65 ++++++++++++++++++++++++++++++++++++++++++++- include/linux/pm_domain.h | 13 +++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index ff6f992f7a1d..ecac03dcc9b2 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "power.h" @@ -128,6 +129,7 @@ static const struct genpd_lock_ops genpd_spin_ops = { #define genpd_is_irq_safe(genpd) (genpd->flags & GENPD_FLAG_IRQ_SAFE) #define genpd_is_always_on(genpd) (genpd->flags & GENPD_FLAG_ALWAYS_ON) #define genpd_is_active_wakeup(genpd) (genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP) +#define genpd_is_cpu_domain(genpd) (genpd->flags & GENPD_FLAG_CPU_DOMAIN) static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, const struct generic_pm_domain *genpd) @@ -1454,6 +1456,56 @@ static void genpd_free_dev_data(struct device *dev, dev_pm_put_subsys_data(dev); } +static void __genpd_update_cpumask(struct generic_pm_domain *genpd, + int cpu, bool set, unsigned int depth) +{ + struct gpd_link *link; + + if (!genpd_is_cpu_domain(genpd)) + return; + + list_for_each_entry(link, &genpd->slave_links, slave_node) { + struct generic_pm_domain *master = link->master; + + genpd_lock_nested(master, depth + 1); + __genpd_update_cpumask(master, cpu, set, depth + 1); + genpd_unlock(master); + } + + if (set) + cpumask_set_cpu(cpu, genpd->cpus); + else + cpumask_clear_cpu(cpu, genpd->cpus); +} + +static void genpd_update_cpumask(struct generic_pm_domain *genpd, + struct device *dev, bool set) +{ + int cpu; + + if (!genpd_is_cpu_domain(genpd)) + return; + + for_each_possible_cpu(cpu) { + if (get_cpu_device(cpu) == dev) { + __genpd_update_cpumask(genpd, cpu, set, 0); + return; + } + } +} + +static void genpd_set_cpumask(struct generic_pm_domain *genpd, + struct device *dev) +{ + genpd_update_cpumask(genpd, dev, true); +} + +static void genpd_clear_cpumask(struct generic_pm_domain *genpd, + struct device *dev) +{ + genpd_update_cpumask(genpd, dev, false); +} + static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, struct gpd_timing_data *td) { @@ -1475,6 +1527,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, genpd_lock(genpd); + genpd_set_cpumask(genpd, dev); dev_pm_domain_set(dev, &genpd->domain); genpd->device_count++; @@ -1532,6 +1585,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, genpd->device_count--; genpd->max_off_time_changed = true; + genpd_clear_cpumask(genpd, dev); dev_pm_domain_set(dev, NULL); list_del_init(&pdd->list_node); @@ -1768,11 +1822,18 @@ int pm_genpd_init(struct generic_pm_domain *genpd, if (genpd_is_always_on(genpd) && !genpd_status_on(genpd)) return -EINVAL; + if (genpd_is_cpu_domain(genpd) && + !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL)) + return -ENOMEM; + /* Use only one "off" state if there were no states declared */ if (genpd->state_count == 0) { ret = genpd_set_default_power_state(genpd); - if (ret) + if (ret) { + if (genpd_is_cpu_domain(genpd)) + free_cpumask_var(genpd->cpus); return ret; + } } else if (!gov && genpd->state_count > 1) { pr_warn("%s: no governor for states\n", genpd->name); } @@ -1818,6 +1879,8 @@ static int genpd_remove(struct generic_pm_domain *genpd) list_del(&genpd->gpd_list_node); genpd_unlock(genpd); cancel_work_sync(&genpd->power_off_work); + if (genpd_is_cpu_domain(genpd)) + free_cpumask_var(genpd->cpus); if (genpd->free_states) genpd->free_states(genpd->states, genpd->state_count); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 8e1399231753..a6e251fe9deb 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -16,6 +16,7 @@ #include #include #include +#include /* * Flags to control the behaviour of a genpd. @@ -42,11 +43,22 @@ * GENPD_FLAG_ACTIVE_WAKEUP: Instructs genpd to keep the PM domain powered * on, in case any of its attached devices is used * in the wakeup path to serve system wakeups. + * + * GENPD_FLAG_CPU_DOMAIN: Instructs genpd that it should expect to get + * devices attached, which may belong to CPUs or + * possibly have subdomains with CPUs attached. + * This flag enables the genpd backend driver to + * deploy idle power management support for CPUs + * and groups of CPUs. Note that, the backend + * driver must then comply with the so called, + * last-man-standing algorithm, for the CPUs in the + * PM domain. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) #define GENPD_FLAG_ALWAYS_ON (1U << 2) #define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3) +#define GENPD_FLAG_CPU_DOMAIN (1U << 4) enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ @@ -94,6 +106,7 @@ struct generic_pm_domain { unsigned int suspended_count; /* System suspend device counter */ unsigned int prepared_count; /* Suspend counter of prepared devices */ unsigned int performance_state; /* Aggregated max performance state */ + cpumask_var_t cpus; /* A cpumask of the attached CPUs */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); struct opp_table *opp_table; /* OPP table of the genpd */ -- cgit v1.2.3 From 6f9b83ac877fb5558d76b9f78590f3afd1bdf421 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 27 Mar 2019 15:35:47 +0100 Subject: cpuidle: Export the next timer expiration for CPUs To be able to predict the sleep duration for a CPU entering idle, it is essential to know the expiration time of the next timer. Both the teo and the menu cpuidle governors already use this information for CPU idle state selection. Moving forward, a similar prediction needs to be made for a group of idle CPUs rather than for a single one and the following changes implement a new genpd governor for that purpose. In order to support that feature, add a new function called tick_nohz_get_next_hrtimer() that will return the next hrtimer expiration time of a given CPU to be invoked after deciding whether or not to stop the scheduler tick on that CPU. Make the cpuidle core call tick_nohz_get_next_hrtimer() right before invoking the ->enter() callback provided by the cpuidle driver for the given state and store its return value in the per-CPU struct cpuidle_device, so as to make it available to code outside of cpuidle. Note that at the point when cpuidle calls tick_nohz_get_next_hrtimer(), the governor's ->select() callback has already returned and indicated whether or not the tick should be stopped, so in fact the value returned by tick_nohz_get_next_hrtimer() always is the next hrtimer expiration time for the given CPU, possibly including the tick (if it hasn't been stopped). Co-developed-by: Lina Iyer Co-developed-by: Daniel Lezcano Acked-by: Daniel Lezcano Signed-off-by: Ulf Hansson [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 19 +++++++++++++++++-- include/linux/cpuidle.h | 1 + include/linux/tick.h | 7 ++++++- kernel/time/tick-sched.c | 12 ++++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 7f108309e871..0f4b7c45df3e 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -328,9 +328,23 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) { + int ret = 0; + + /* + * Store the next hrtimer, which becomes either next tick or the next + * timer event, whatever expires first. Additionally, to make this data + * useful for consumers outside cpuidle, we rely on that the governor's + * ->select() callback have decided, whether to stop the tick or not. + */ + WRITE_ONCE(dev->next_hrtimer, tick_nohz_get_next_hrtimer()); + if (cpuidle_state_is_coupled(drv, index)) - return cpuidle_enter_state_coupled(dev, drv, index); - return cpuidle_enter_state(dev, drv, index); + ret = cpuidle_enter_state_coupled(dev, drv, index); + else + ret = cpuidle_enter_state(dev, drv, index); + + WRITE_ONCE(dev->next_hrtimer, 0); + return ret; } /** @@ -511,6 +525,7 @@ static void __cpuidle_device_init(struct cpuidle_device *dev) { memset(dev->states_usage, 0, sizeof(dev->states_usage)); dev->last_residency = 0; + dev->next_hrtimer = 0; } /** diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 3b39472324a3..bb9a0db89f1a 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -83,6 +83,7 @@ struct cpuidle_device { unsigned int use_deepest_state:1; unsigned int poll_time_limit:1; unsigned int cpu; + ktime_t next_hrtimer; int last_residency; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; diff --git a/include/linux/tick.h b/include/linux/tick.h index 55388ab45fd4..8891b5ac3e40 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -122,6 +122,7 @@ extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern bool tick_nohz_idle_got_tick(void); +extern ktime_t tick_nohz_get_next_hrtimer(void); extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next); extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); @@ -145,7 +146,11 @@ static inline void tick_nohz_idle_restart_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } static inline bool tick_nohz_idle_got_tick(void) { return false; } - +static inline ktime_t tick_nohz_get_next_hrtimer(void) +{ + /* Next wake up is the tick period, assume it starts now */ + return ktime_add(ktime_get(), TICK_NSEC); +} static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) { *delta_next = TICK_NSEC; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6fa52cd6df0b..8d18e03124ff 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1022,6 +1022,18 @@ bool tick_nohz_idle_got_tick(void) return false; } +/** + * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer + * or the tick, whatever that expires first. Note that, if the tick has been + * stopped, it returns the next hrtimer. + * + * Called from power state control code with interrupts disabled + */ +ktime_t tick_nohz_get_next_hrtimer(void) +{ + return __this_cpu_read(tick_cpu_device.evtdev)->next_event; +} + /** * tick_nohz_get_sleep_length - return the expected length of the current sleep * @delta_next: duration until the next event if the tick cannot be stopped -- cgit v1.2.3 From 2f36bde0fc8f1ab79d54bd2caa7c1cf874fd2206 Mon Sep 17 00:00:00 2001 From: "Andrew-sh.Cheng" Date: Fri, 29 Mar 2019 14:46:10 +0800 Subject: OPP: Introduce dev_pm_opp_find_freq_ceil_by_volt() This patch introduces a new helper routine in the OPP core, which returns the OPP with the highest frequency which has voltage less than or equal to the target voltage passed to the helper. Signed-off-by: Andrew-sh.Cheng [ Viresh: Massaged the commit log and renamed the helper with some cleanups. ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pm_opp.h | 8 ++++++++ 2 files changed, 62 insertions(+) (limited to 'include') diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 0420f7e8ad5b..0e7703fe733f 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -526,6 +526,60 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); +/** + * dev_pm_opp_find_freq_ceil_by_volt() - Find OPP with highest frequency for + * target voltage. + * @dev: Device for which we do this operation. + * @u_volt: Target voltage. + * + * Search for OPP with highest (ceil) frequency and has voltage <= u_volt. + * + * Return: matching *opp, else returns ERR_PTR in case of error which should be + * handled using IS_ERR. + * + * Error return values can be: + * EINVAL: bad parameters + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev, + unsigned long u_volt) +{ + struct opp_table *opp_table; + struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); + + if (!dev || !u_volt) { + dev_err(dev, "%s: Invalid argument volt=%lu\n", __func__, + u_volt); + return ERR_PTR(-EINVAL); + } + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) + return ERR_CAST(opp_table); + + mutex_lock(&opp_table->lock); + + list_for_each_entry(temp_opp, &opp_table->opp_list, node) { + if (temp_opp->available) { + if (temp_opp->supplies[0].u_volt > u_volt) + break; + opp = temp_opp; + } + } + + /* Increment the reference count of OPP */ + if (!IS_ERR(opp)) + dev_pm_opp_get(opp); + + mutex_unlock(&opp_table->lock); + dev_pm_opp_put_opp_table(opp_table); + + return opp; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil_by_volt); + static int _set_opp_voltage(struct device *dev, struct regulator *reg, struct dev_pm_opp_supply *supply) { diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 24c757a32a7b..b150fe97ce5a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -102,6 +102,8 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, unsigned long *freq); +struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev, + unsigned long u_volt); struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq); @@ -207,6 +209,12 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, return ERR_PTR(-ENOTSUPP); } +static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil_by_volt(struct device *dev, + unsigned long u_volt) +{ + return ERR_PTR(-ENOTSUPP); +} + static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq) { -- cgit v1.2.3 From e94999688e3aa3c0a8ad5a60352cdc3ca3030434 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 11 Apr 2019 20:17:33 +0200 Subject: PM / Domains: Add genpd governor for CPUs After some preceding changes, PM domains managed by genpd may contain CPU devices, so idle state residency values should be taken into account during the state selection process. [The residency value is the minimum amount of time to be spent by a CPU (or a group of CPUs) in an idle state in order to save more energy than could be saved by picking up a shallower idle state.] For this purpose, add a new genpd governor, pm_domain_cpu_gov, to be used for selecting idle states of PM domains with CPU devices attached either directly or through subdomains. The new governor computes the minimum expected idle duration for all online CPUs attached to a PM domain and its subdomains. Next, it finds the deepest idle state whose target residency is within the expected idle duration and selects it as the target idle state of the domain. It should be noted that the minimum expected idle duration computation is based on the closest timer event information stored in the per-CPU variables cpuidle_devices for all of the CPUs in the domain. That needs to be revisited in future, as obviously there are other reasons why a CPU may be woken up from idle. Co-developed-by: Lina Iyer Acked-by: Daniel Lezcano Signed-off-by: Ulf Hansson [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain_governor.c | 67 +++++++++++++++++++++++++++++++++++- include/linux/pm_domain.h | 4 +++ 2 files changed, 70 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 4d07e38a8247..7912bc957244 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -10,6 +10,9 @@ #include #include #include +#include +#include +#include static int dev_update_qos_constraint(struct device *dev, void *data) { @@ -210,8 +213,10 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) struct generic_pm_domain *genpd = pd_to_genpd(pd); struct gpd_link *link; - if (!genpd->max_off_time_changed) + if (!genpd->max_off_time_changed) { + genpd->state_idx = genpd->cached_power_down_state_idx; return genpd->cached_power_down_ok; + } /* * We have to invalidate the cached results for the masters, so @@ -236,6 +241,7 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) genpd->state_idx--; } + genpd->cached_power_down_state_idx = genpd->state_idx; return genpd->cached_power_down_ok; } @@ -244,6 +250,65 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain) return false; } +#ifdef CONFIG_CPU_IDLE +static bool cpu_power_down_ok(struct dev_pm_domain *pd) +{ + struct generic_pm_domain *genpd = pd_to_genpd(pd); + struct cpuidle_device *dev; + ktime_t domain_wakeup, next_hrtimer; + s64 idle_duration_ns; + int cpu, i; + + /* Validate dev PM QoS constraints. */ + if (!default_power_down_ok(pd)) + return false; + + if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN)) + return true; + + /* + * Find the next wakeup for any of the online CPUs within the PM domain + * and its subdomains. Note, we only need the genpd->cpus, as it already + * contains a mask of all CPUs from subdomains. + */ + domain_wakeup = ktime_set(KTIME_SEC_MAX, 0); + for_each_cpu_and(cpu, genpd->cpus, cpu_online_mask) { + dev = per_cpu(cpuidle_devices, cpu); + if (dev) { + next_hrtimer = READ_ONCE(dev->next_hrtimer); + if (ktime_before(next_hrtimer, domain_wakeup)) + domain_wakeup = next_hrtimer; + } + } + + /* The minimum idle duration is from now - until the next wakeup. */ + idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, ktime_get())); + if (idle_duration_ns <= 0) + return false; + + /* + * Find the deepest idle state that has its residency value satisfied + * and by also taking into account the power off latency for the state. + * Start at the state picked by the dev PM QoS constraint validation. + */ + i = genpd->state_idx; + do { + if (idle_duration_ns >= (genpd->states[i].residency_ns + + genpd->states[i].power_off_latency_ns)) { + genpd->state_idx = i; + return true; + } + } while (--i >= 0); + + return false; +} + +struct dev_power_governor pm_domain_cpu_gov = { + .suspend_ok = default_suspend_ok, + .power_down_ok = cpu_power_down_ok, +}; +#endif + struct dev_power_governor simple_qos_governor = { .suspend_ok = default_suspend_ok, .power_down_ok = default_power_down_ok, diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index a6e251fe9deb..bc82e74560ee 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -118,6 +118,7 @@ struct generic_pm_domain { s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ bool max_off_time_changed; bool cached_power_down_ok; + bool cached_power_down_state_idx; int (*attach_dev)(struct generic_pm_domain *domain, struct device *dev); void (*detach_dev)(struct generic_pm_domain *domain, @@ -202,6 +203,9 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; +#ifdef CONFIG_CPU_IDLE +extern struct dev_power_governor pm_domain_cpu_gov; +#endif #else static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) -- cgit v1.2.3 From 60dd1ead65e83268af9ccd19b97c7011cb50186b Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 10 Apr 2019 10:20:25 +0200 Subject: drivers: firmware: psci: Announce support for OS initiated suspend mode PSCI firmware v1.0+, supports two different modes for CPU_SUSPEND. The Platform Coordinated mode, which is the default and mandatory mode, while support for the OS initiated (OSI) mode is optional. In some cases it's interesting for the user/developer to know if the OSI mode is supported by the PSCI FW, so print a message to the log if that is the case. Co-developed-by: Lina Iyer Signed-off-by: Ulf Hansson Reviewed-by: Daniel Lezcano Acked-by: Mark Rutland Acked-by: Lorenzo Pieralisi Signed-off-by: Rafael J. Wysocki --- drivers/firmware/psci/psci.c | 21 ++++++++++++++++++++- include/uapi/linux/psci.h | 5 +++++ 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index e480e0af632c..eabd01383cd6 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -95,6 +95,11 @@ static inline bool psci_has_ext_power_state(void) PSCI_1_0_FEATURES_CPU_SUSPEND_PF_MASK; } +static inline bool psci_has_osi_support(void) +{ + return psci_cpu_suspend_feature & PSCI_1_0_OS_INITIATED; +} + static inline bool psci_power_state_loses_context(u32 state) { const u32 mask = psci_has_ext_power_state() ? @@ -659,10 +664,24 @@ static int __init psci_0_1_init(struct device_node *np) return 0; } +static int __init psci_1_0_init(struct device_node *np) +{ + int err; + + err = psci_0_2_init(np); + if (err) + return err; + + if (psci_has_osi_support()) + pr_info("OSI mode supported.\n"); + + return 0; +} + static const struct of_device_id psci_of_match[] __initconst = { { .compatible = "arm,psci", .data = psci_0_1_init}, { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, - { .compatible = "arm,psci-1.0", .data = psci_0_2_init}, + { .compatible = "arm,psci-1.0", .data = psci_1_0_init}, {}, }; diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index b3bcabe380da..581f72085c33 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -49,6 +49,7 @@ #define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) #define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) +#define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15) #define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) @@ -97,6 +98,10 @@ #define PSCI_1_0_FEATURES_CPU_SUSPEND_PF_MASK \ (0x1 << PSCI_1_0_FEATURES_CPU_SUSPEND_PF_SHIFT) +#define PSCI_1_0_OS_INITIATED BIT(0) +#define PSCI_1_0_SUSPEND_MODE_PC 0 +#define PSCI_1_0_SUSPEND_MODE_OSI 1 + /* PSCI return values (inclusive of all PSCI versions) */ #define PSCI_RET_SUCCESS 0 #define PSCI_RET_NOT_SUPPORTED -1 -- cgit v1.2.3 From adfe3b76608ffe547af5a74415f15499b798f32a Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Thu, 21 Mar 2019 19:14:36 -0400 Subject: PM / devfreq: rockchip-dfi: Move GRF definitions to a common place. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some rk3399 GRF (Generic Register Files) definitions can be used for different drivers. Move these definitions to a common include so we don't need to duplicate these definitions. Signed-off-by: Enric Balletbo i Serra Acked-by: Chanwoo Choi Signed-off-by: Gaël PORTAY Signed-off-by: MyungJoo Ham --- drivers/devfreq/event/rockchip-dfi.c | 23 +++++++---------------- include/soc/rockchip/rk3399_grf.h | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 16 deletions(-) create mode 100644 include/soc/rockchip/rk3399_grf.h (limited to 'include') diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c index fcbf76ebf55d..a436ec4901bb 100644 --- a/drivers/devfreq/event/rockchip-dfi.c +++ b/drivers/devfreq/event/rockchip-dfi.c @@ -26,6 +26,8 @@ #include #include +#include + #define RK3399_DMC_NUM_CH 2 /* DDRMON_CTRL */ @@ -43,18 +45,6 @@ #define DDRMON_CH1_COUNT_NUM 0x3c #define DDRMON_CH1_DFI_ACCESS_NUM 0x40 -/* pmu grf */ -#define PMUGRF_OS_REG2 0x308 -#define DDRTYPE_SHIFT 13 -#define DDRTYPE_MASK 7 - -enum { - DDR3 = 3, - LPDDR3 = 6, - LPDDR4 = 7, - UNUSED = 0xFF -}; - struct dmc_usage { u32 access; u32 total; @@ -83,16 +73,17 @@ static void rockchip_dfi_start_hardware_counter(struct devfreq_event_dev *edev) u32 ddr_type; /* get ddr type */ - regmap_read(info->regmap_pmu, PMUGRF_OS_REG2, &val); - ddr_type = (val >> DDRTYPE_SHIFT) & DDRTYPE_MASK; + regmap_read(info->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val); + ddr_type = (val >> RK3399_PMUGRF_DDRTYPE_SHIFT) & + RK3399_PMUGRF_DDRTYPE_MASK; /* clear DDRMON_CTRL setting */ writel_relaxed(CLR_DDRMON_CTRL, dfi_regs + DDRMON_CTRL); /* set ddr type to dfi */ - if (ddr_type == LPDDR3) + if (ddr_type == RK3399_PMUGRF_DDRTYPE_LPDDR3) writel_relaxed(LPDDR3_EN, dfi_regs + DDRMON_CTRL); - else if (ddr_type == LPDDR4) + else if (ddr_type == RK3399_PMUGRF_DDRTYPE_LPDDR4) writel_relaxed(LPDDR4_EN, dfi_regs + DDRMON_CTRL); /* enable count, use software mode */ diff --git a/include/soc/rockchip/rk3399_grf.h b/include/soc/rockchip/rk3399_grf.h new file mode 100644 index 000000000000..3eebabcb2812 --- /dev/null +++ b/include/soc/rockchip/rk3399_grf.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Rockchip General Register Files definitions + * + * Copyright (c) 2018, Collabora Ltd. + * Author: Enric Balletbo i Serra + */ + +#ifndef __SOC_RK3399_GRF_H +#define __SOC_RK3399_GRF_H + +/* PMU GRF Registers */ +#define RK3399_PMUGRF_OS_REG2 0x308 +#define RK3399_PMUGRF_DDRTYPE_SHIFT 13 +#define RK3399_PMUGRF_DDRTYPE_MASK 7 +#define RK3399_PMUGRF_DDRTYPE_DDR3 3 +#define RK3399_PMUGRF_DDRTYPE_LPDDR2 5 +#define RK3399_PMUGRF_DDRTYPE_LPDDR3 6 +#define RK3399_PMUGRF_DDRTYPE_LPDDR4 7 + +#endif -- cgit v1.2.3 From 9173c5ceb035aab28171cd74dfddf27f47213b99 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Thu, 21 Mar 2019 19:14:38 -0400 Subject: PM / devfreq: rk3399_dmc: Pass ODT and auto power down parameters to TF-A. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trusted Firmware-A (TF-A) for rk3399 implements a SiP call to get the on-die termination (ODT) and auto power down parameters from kernel, this patch adds the functionality to do this. Also, if DDR clock frequency is lower than the on-die termination (ODT) disable frequency this driver should disable the DDR ODT. Signed-off-by: Enric Balletbo i Serra Reviewed-by: Chanwoo Choi Signed-off-by: Gaël PORTAY Signed-off-by: MyungJoo Ham --- drivers/devfreq/rk3399_dmc.c | 71 ++++++++++++++++++++++++++++++++++++- include/soc/rockchip/rockchip_sip.h | 1 + 2 files changed, 71 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index a228dad2bee4..567c034d0301 100644 --- a/drivers/devfreq/rk3399_dmc.c +++ b/drivers/devfreq/rk3399_dmc.c @@ -18,14 +18,17 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include +#include #include struct dram_timing { @@ -69,8 +72,11 @@ struct rk3399_dmcfreq { struct mutex lock; struct dram_timing timing; struct regulator *vdd_center; + struct regmap *regmap_pmu; unsigned long rate, target_rate; unsigned long volt, target_volt; + unsigned int odt_dis_freq; + int odt_pd_arg0, odt_pd_arg1; }; static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, @@ -80,6 +86,8 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, struct dev_pm_opp *opp; unsigned long old_clk_rate = dmcfreq->rate; unsigned long target_volt, target_rate; + struct arm_smccc_res res; + bool odt_enable = false; int err; opp = devfreq_recommended_opp(dev, freq, flags); @@ -95,6 +103,19 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq, mutex_lock(&dmcfreq->lock); + if (target_rate >= dmcfreq->odt_dis_freq) + odt_enable = true; + + /* + * This makes a SMC call to the TF-A to set the DDR PD (power-down) + * timings and to enable or disable the ODT (on-die termination) + * resistors. + */ + arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, dmcfreq->odt_pd_arg0, + dmcfreq->odt_pd_arg1, + ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD, + odt_enable, 0, 0, 0, &res); + /* * If frequency scaling from low to high, adjust voltage first. * If frequency scaling from high to low, adjust frequency first. @@ -294,11 +315,13 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) { struct arm_smccc_res res; struct device *dev = &pdev->dev; - struct device_node *np = pdev->dev.of_node; + struct device_node *np = pdev->dev.of_node, *node; struct rk3399_dmcfreq *data; int ret, index, size; uint32_t *timing; struct dev_pm_opp *opp; + u32 ddr_type; + u32 val; data = devm_kzalloc(dev, sizeof(struct rk3399_dmcfreq), GFP_KERNEL); if (!data) @@ -354,10 +377,56 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) } } + node = of_parse_phandle(np, "rockchip,pmu", 0); + if (node) { + data->regmap_pmu = syscon_node_to_regmap(node); + if (IS_ERR(data->regmap_pmu)) + return PTR_ERR(data->regmap_pmu); + } + + regmap_read(data->regmap_pmu, RK3399_PMUGRF_OS_REG2, &val); + ddr_type = (val >> RK3399_PMUGRF_DDRTYPE_SHIFT) & + RK3399_PMUGRF_DDRTYPE_MASK; + + switch (ddr_type) { + case RK3399_PMUGRF_DDRTYPE_DDR3: + data->odt_dis_freq = data->timing.ddr3_odt_dis_freq; + break; + case RK3399_PMUGRF_DDRTYPE_LPDDR3: + data->odt_dis_freq = data->timing.lpddr3_odt_dis_freq; + break; + case RK3399_PMUGRF_DDRTYPE_LPDDR4: + data->odt_dis_freq = data->timing.lpddr4_odt_dis_freq; + break; + default: + return -EINVAL; + }; + arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0, ROCKCHIP_SIP_CONFIG_DRAM_INIT, 0, 0, 0, 0, &res); + /* + * In TF-A there is a platform SIP call to set the PD (power-down) + * timings and to enable or disable the ODT (on-die termination). + * This call needs three arguments as follows: + * + * arg0: + * bit[0-7] : sr_idle + * bit[8-15] : sr_mc_gate_idle + * bit[16-31] : standby idle + * arg1: + * bit[0-11] : pd_idle + * bit[16-27] : srpd_lite_idle + * arg2: + * bit[0] : odt enable + */ + data->odt_pd_arg0 = (data->timing.sr_idle & 0xff) | + ((data->timing.sr_mc_gate_idle & 0xff) << 8) | + ((data->timing.standby_idle & 0xffff) << 16); + data->odt_pd_arg1 = (data->timing.pd_idle & 0xfff) | + ((data->timing.srpd_lite_idle & 0xfff) << 16); + /* * We add a devfreq driver to our parent since it has a device tree node * with operating points. diff --git a/include/soc/rockchip/rockchip_sip.h b/include/soc/rockchip/rockchip_sip.h index 7e28092c4d3d..ad9482c56797 100644 --- a/include/soc/rockchip/rockchip_sip.h +++ b/include/soc/rockchip/rockchip_sip.h @@ -23,5 +23,6 @@ #define ROCKCHIP_SIP_CONFIG_DRAM_GET_RATE 0x05 #define ROCKCHIP_SIP_CONFIG_DRAM_CLR_IRQ 0x06 #define ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM 0x07 +#define ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD 0x08 #endif -- cgit v1.2.3 From 1be0730f1dcd2971db4d2fe5497a20f438b837a7 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Mon, 18 Feb 2019 19:21:08 +0100 Subject: trace: events: add devfreq trace event file The patch adds a new file for with trace events for devfreq framework. They are used for performance analysis of the framework. It also contains updates in MAINTAINERS file adding new entry for devfreq maintainers. Signed-off-by: Lukasz Luba Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- MAINTAINERS | 1 + include/trace/events/devfreq.h | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 include/trace/events/devfreq.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 3671fdea5010..27ed10966c81 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4552,6 +4552,7 @@ S: Maintained F: drivers/devfreq/ F: include/linux/devfreq.h F: Documentation/devicetree/bindings/devfreq/ +F: include/trace/events/devfreq.h DEVICE FREQUENCY EVENT (DEVFREQ-EVENT) M: Chanwoo Choi diff --git a/include/trace/events/devfreq.h b/include/trace/events/devfreq.h new file mode 100644 index 000000000000..cf5b8772175d --- /dev/null +++ b/include/trace/events/devfreq.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM devfreq + +#if !defined(_TRACE_DEVFREQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DEVFREQ_H + +#include +#include + +TRACE_EVENT(devfreq_monitor, + TP_PROTO(struct devfreq *devfreq), + + TP_ARGS(devfreq), + + TP_STRUCT__entry( + __field(unsigned long, freq) + __field(unsigned long, busy_time) + __field(unsigned long, total_time) + __field(unsigned int, polling_ms) + __string(dev_name, dev_name(&devfreq->dev)) + ), + + TP_fast_assign( + __entry->freq = devfreq->previous_freq; + __entry->busy_time = devfreq->last_status.busy_time; + __entry->total_time = devfreq->last_status.total_time; + __entry->polling_ms = devfreq->profile->polling_ms; + __assign_str(dev_name, dev_name(&devfreq->dev)); + ), + + TP_printk("dev_name=%s freq=%lu polling_ms=%u load=%lu", + __get_str(dev_name), __entry->freq, __entry->polling_ms, + __entry->total_time == 0 ? 0 : + (100 * __entry->busy_time) / __entry->total_time) +); +#endif /* _TRACE_DEVFREQ_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 4302e381a870aafb547e6139830e5a4ee2cb8261 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 15 Apr 2019 12:47:46 +0100 Subject: firmware/psci: add support for SYSTEM_RESET2 PSCI v1.1 introduced SYSTEM_RESET2 to allow both architectural resets where the semantics are described by the PSCI specification itself as well as vendor-specific resets. Currently only system warm reset semantics is defined as part of architectural resets by the specification. This patch implements support for SYSTEM_RESET2 by making using of reboot_mode passed by the reboot infrastructure in the kernel. Acked-by: Mark Rutland Tested-by: Aaro Koskinen Signed-off-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- drivers/firmware/psci/psci.c | 24 +++++++++++++++++++++++- include/uapi/linux/psci.h | 2 ++ 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index eabd01383cd6..fe090ef43d28 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -88,6 +88,7 @@ static u32 psci_function_id[PSCI_FN_MAX]; PSCI_1_0_EXT_POWER_STATE_TYPE_MASK) static u32 psci_cpu_suspend_feature; +static bool psci_system_reset2_supported; static inline bool psci_has_ext_power_state(void) { @@ -258,7 +259,17 @@ static int get_set_conduit_method(struct device_node *np) static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) { - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); + if ((reboot_mode == REBOOT_WARM || reboot_mode == REBOOT_SOFT) && + psci_system_reset2_supported) { + /* + * reset_type[31] = 0 (architectural) + * reset_type[30:0] = 0 (SYSTEM_WARM_RESET) + * cookie = 0 (ignored by the implementation) + */ + invoke_psci_fn(PSCI_FN_NATIVE(1_1, SYSTEM_RESET2), 0, 0, 0); + } else { + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); + } } static void psci_sys_poweroff(void) @@ -460,6 +471,16 @@ static const struct platform_suspend_ops psci_suspend_ops = { .enter = psci_system_suspend_enter, }; +static void __init psci_init_system_reset2(void) +{ + int ret; + + ret = psci_features(PSCI_FN_NATIVE(1_1, SYSTEM_RESET2)); + + if (ret != PSCI_RET_NOT_SUPPORTED) + psci_system_reset2_supported = true; +} + static void __init psci_init_system_suspend(void) { int ret; @@ -597,6 +618,7 @@ static int __init psci_probe(void) psci_init_smccc(); psci_init_cpu_suspend(); psci_init_system_suspend(); + psci_init_system_reset2(); } return 0; diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 581f72085c33..2fcad1dd0b0e 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -50,8 +50,10 @@ #define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) #define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) #define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15) +#define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) #define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) +#define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff -- cgit v1.2.3 From f9ccd7c3a1d87cea3a6f9ed6c946dee9e7456b2e Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 25 Apr 2019 11:04:13 +0200 Subject: PM / Domains: Allow to attach a CPU via genpd_dev_pm_attach_by_id|name() Attaching a device via genpd_dev_pm_attach_by_id|name() makes genpd allocate a virtual device that it attaches instead. This leads to a problem in case when the base device belongs to a CPU. More precisely, it means genpd_get_cpu() compares against the virtual device, thus it fails to find a matching CPU device. Address this limitation by passing the base device to genpd_get_cpu() rather than the virtual device. Moreover, to deal with detach correctly from genpd_remove_device(), store the CPU number in struct generic_pm_domain_data, so as to be able to clear the corresponding bit in the cpumask for the genpd. Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 20 ++++++++++---------- include/linux/pm_domain.h | 1 + 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index da1c99178943..3d899e8abd58 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1499,10 +1499,11 @@ static int genpd_get_cpu(struct generic_pm_domain *genpd, struct device *dev) return -1; } -static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) +static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, + struct device *base_dev) { struct generic_pm_domain_data *gpd_data; - int ret, cpu; + int ret; dev_dbg(dev, "%s()\n", __func__); @@ -1513,7 +1514,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) if (IS_ERR(gpd_data)) return PTR_ERR(gpd_data); - cpu = genpd_get_cpu(genpd, dev); + gpd_data->cpu = genpd_get_cpu(genpd, base_dev); ret = genpd->attach_dev ? genpd->attach_dev(genpd, dev) : 0; if (ret) @@ -1521,7 +1522,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) genpd_lock(genpd); - genpd_set_cpumask(genpd, cpu); + genpd_set_cpumask(genpd, gpd_data->cpu); dev_pm_domain_set(dev, &genpd->domain); genpd->device_count++; @@ -1549,7 +1550,7 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) int ret; mutex_lock(&gpd_list_lock); - ret = genpd_add_device(genpd, dev); + ret = genpd_add_device(genpd, dev, dev); mutex_unlock(&gpd_list_lock); return ret; @@ -1561,14 +1562,13 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, { struct generic_pm_domain_data *gpd_data; struct pm_domain_data *pdd; - int cpu, ret = 0; + int ret = 0; dev_dbg(dev, "%s()\n", __func__); pdd = dev->power.subsys_data->domain_data; gpd_data = to_gpd_data(pdd); dev_pm_qos_remove_notifier(dev, &gpd_data->nb); - cpu = genpd_get_cpu(genpd, dev); genpd_lock(genpd); @@ -1580,7 +1580,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, genpd->device_count--; genpd->max_off_time_changed = true; - genpd_clear_cpumask(genpd, cpu); + genpd_clear_cpumask(genpd, gpd_data->cpu); dev_pm_domain_set(dev, NULL); list_del_init(&pdd->list_node); @@ -2256,7 +2256,7 @@ int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev) goto out; } - ret = genpd_add_device(genpd, dev); + ret = genpd_add_device(genpd, dev, dev); out: mutex_unlock(&gpd_list_lock); @@ -2426,7 +2426,7 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev, dev_dbg(dev, "adding to PM domain %s\n", pd->name); - ret = genpd_add_device(pd, dev); + ret = genpd_add_device(pd, dev, base_dev); mutex_unlock(&gpd_list_lock); if (ret < 0) { diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index bc82e74560ee..0e8e356bed6a 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -175,6 +175,7 @@ struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_timing_data td; struct notifier_block nb; + int cpu; unsigned int performance_state; void *data; }; -- cgit v1.2.3