diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-10 13:20:47 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-10 13:20:47 -0700 |
commit | d6498af58f5c7fb7b252f4791620fe4dd7213ca3 (patch) | |
tree | bf5d04b798050834a64680a1bbc276eb9369c747 /drivers | |
parent | e99f23c5bf59219d0cd9b6e0d7d4c1b641a98704 (diff) | |
parent | be2d24336f8876d60d8a4634f1a1e4753c4be124 (diff) | |
download | lwn-d6498af58f5c7fb7b252f4791620fe4dd7213ca3.tar.gz lwn-d6498af58f5c7fb7b252f4791620fe4dd7213ca3.zip |
Merge tag 'pm-5.15-rc1-3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull more power management updates from Rafael Wysocki:
"These improve hybrid processors support in intel_pstate, fix an issue
in the core devices PM code, clean up the handling of dedicated wake
IRQs, update the Energy Model documentation and update MAINTAINERS.
Specifics:
- Make the HWP performance levels calibration on hybrid processors in
intel_pstate more straightforward (Rafael Wysocki).
- Prevent the PM core from leaving devices in suspend after a failing
system-wide suspend transition in some cases when driver PM flags
are used (Prasad Sodagudi).
- Drop unused function argument from the dedicated wake IRQs handling
code (Sergey Shtylyov).
- Fix up Energy Model kerneldoc comments and include them in the
Energy Model documentation (Lukasz Luba).
- Use my kernel.org address in MAINTAINERS insead of the personal one
(Rafael Wysocki)"
* tag 'pm-5.15-rc1-3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
MAINTAINERS: Change Rafael's e-mail address
PM: sleep: core: Avoid setting power.must_resume to false
Documentation: power: include kernel-doc in Energy Model doc
PM: EM: fix kernel-doc comments
cpufreq: intel_pstate: hybrid: Rework HWP calibration
ACPI: CPPC: Introduce cppc_get_nominal_perf()
PM: sleep: wakeirq: drop useless parameter from dev_pm_attach_wake_irq()
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/acpi/cppc_acpi.c | 47 | ||||
-rw-r--r-- | drivers/base/power/main.c | 2 | ||||
-rw-r--r-- | drivers/base/power/wakeirq.c | 11 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 185 |
4 files changed, 107 insertions, 138 deletions
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index a4d4eebba1da..bd482108310c 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1008,23 +1008,14 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) return ret_val; } -/** - * cppc_get_desired_perf - Get the value of desired performance register. - * @cpunum: CPU from which to get desired performance. - * @desired_perf: address of a variable to store the returned desired performance - * - * Return: 0 for success, -EIO otherwise. - */ -int cppc_get_desired_perf(int cpunum, u64 *desired_perf) +static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); - int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); - struct cpc_register_resource *desired_reg; - struct cppc_pcc_data *pcc_ss_data = NULL; - - desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF]; + struct cpc_register_resource *reg = &cpc_desc->cpc_regs[reg_idx]; - if (CPC_IN_PCC(desired_reg)) { + if (CPC_IN_PCC(reg)) { + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); + struct cppc_pcc_data *pcc_ss_data = NULL; int ret = 0; if (pcc_ss_id < 0) @@ -1035,7 +1026,7 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf) down_write(&pcc_ss_data->pcc_lock); if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0) - cpc_read(cpunum, desired_reg, desired_perf); + cpc_read(cpunum, reg, perf); else ret = -EIO; @@ -1044,13 +1035,37 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf) return ret; } - cpc_read(cpunum, desired_reg, desired_perf); + cpc_read(cpunum, reg, perf); return 0; } + +/** + * cppc_get_desired_perf - Get the desired performance register value. + * @cpunum: CPU from which to get desired performance. + * @desired_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_get_desired_perf(int cpunum, u64 *desired_perf) +{ + return cppc_get_perf(cpunum, DESIRED_PERF, desired_perf); +} EXPORT_SYMBOL_GPL(cppc_get_desired_perf); /** + * cppc_get_nominal_perf - Get the nominal performance register value. + * @cpunum: CPU from which to get nominal performance. + * @nominal_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) +{ + return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf); +} + +/** * cppc_get_perf_caps - Get a CPU's performance capabilities. * @cpunum: CPU from which to get capabilities info. * @perf_caps: ptr to cppc_perf_caps. See cppc_acpi.h diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index d568772152c2..cbea78e79f3d 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1642,7 +1642,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) } dev->power.may_skip_resume = true; - dev->power.must_resume = false; + dev->power.must_resume = !dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME); dpm_watchdog_set(&wd, dev); device_lock(dev); diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 3bad3266a2ad..b91a3a9bf9f6 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -12,14 +12,11 @@ /** * dev_pm_attach_wake_irq - Attach device interrupt as a wake IRQ * @dev: Device entry - * @irq: Device wake-up capable interrupt * @wirq: Wake irq specific data * - * Internal function to attach either a device IO interrupt or a - * dedicated wake-up interrupt as a wake IRQ. + * Internal function to attach a dedicated wake-up interrupt as a wake IRQ. */ -static int dev_pm_attach_wake_irq(struct device *dev, int irq, - struct wake_irq *wirq) +static int dev_pm_attach_wake_irq(struct device *dev, struct wake_irq *wirq) { unsigned long flags; @@ -65,7 +62,7 @@ int dev_pm_set_wake_irq(struct device *dev, int irq) wirq->dev = dev; wirq->irq = irq; - err = dev_pm_attach_wake_irq(dev, irq, wirq); + err = dev_pm_attach_wake_irq(dev, wirq); if (err) kfree(wirq); @@ -196,7 +193,7 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq) if (err) goto err_free_name; - err = dev_pm_attach_wake_irq(dev, irq, wirq); + err = dev_pm_attach_wake_irq(dev, wirq); if (err) goto err_free_irq; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2d83a9f9651b..1097f826ad70 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -268,6 +268,7 @@ static struct cpudata **all_cpu_data; * @get_min: Callback to get minimum P state * @get_turbo: Callback to get turbo P state * @get_scaling: Callback to get frequency scaling factor + * @get_cpu_scaling: Get frequency scaling factor for a given cpu * @get_aperf_mperf_shift: Callback to get the APERF vs MPERF frequency difference * @get_val: Callback to convert P state to actual MSR write value * @get_vid: Callback to get VID data for Atom platforms @@ -281,6 +282,7 @@ struct pstate_funcs { int (*get_min)(void); int (*get_turbo)(void); int (*get_scaling)(void); + int (*get_cpu_scaling)(int cpu); int (*get_aperf_mperf_shift)(void); u64 (*get_val)(struct cpudata*, int pstate); void (*get_vid)(struct cpudata *); @@ -384,6 +386,15 @@ static int intel_pstate_get_cppc_guaranteed(int cpu) return cppc_perf.nominal_perf; } +static u32 intel_pstate_cppc_nominal(int cpu) +{ + u64 nominal_perf; + + if (cppc_get_nominal_perf(cpu, &nominal_perf)) + return 0; + + return nominal_perf; +} #else /* CONFIG_ACPI_CPPC_LIB */ static inline void intel_pstate_set_itmt_prio(int cpu) { @@ -470,20 +481,6 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) acpi_processor_unregister_performance(policy->cpu); } - -static bool intel_pstate_cppc_perf_valid(u32 perf, struct cppc_perf_caps *caps) -{ - return perf && perf <= caps->highest_perf && perf >= caps->lowest_perf; -} - -static bool intel_pstate_cppc_perf_caps(struct cpudata *cpu, - struct cppc_perf_caps *caps) -{ - if (cppc_get_perf_caps(cpu->cpu, caps)) - return false; - - return caps->highest_perf && caps->lowest_perf <= caps->highest_perf; -} #else /* CONFIG_ACPI */ static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) { @@ -506,15 +503,8 @@ static inline int intel_pstate_get_cppc_guaranteed(int cpu) } #endif /* CONFIG_ACPI_CPPC_LIB */ -static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu) -{ - pr_debug("CPU%d: Using PERF_CTL scaling for HWP\n", cpu->cpu); - - cpu->pstate.scaling = cpu->pstate.perf_ctl_scaling; -} - /** - * intel_pstate_hybrid_hwp_calibrate - Calibrate HWP performance levels. + * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels. * @cpu: Target CPU. * * On hybrid processors, HWP may expose more performance levels than there are @@ -522,115 +512,46 @@ static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu) * scaling factor between HWP performance levels and CPU frequency will be less * than the scaling factor between P-state values and CPU frequency. * - * In that case, the scaling factor between HWP performance levels and CPU - * frequency needs to be determined which can be done with the help of the - * observation that certain HWP performance levels should correspond to certain - * P-states, like for example the HWP highest performance should correspond - * to the maximum turbo P-state of the CPU. + * In that case, adjust the CPU parameters used in computations accordingly. */ -static void intel_pstate_hybrid_hwp_calibrate(struct cpudata *cpu) +static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) { int perf_ctl_max_phys = cpu->pstate.max_pstate_physical; int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling; int perf_ctl_turbo = pstate_funcs.get_turbo(); int turbo_freq = perf_ctl_turbo * perf_ctl_scaling; - int perf_ctl_max = pstate_funcs.get_max(); - int max_freq = perf_ctl_max * perf_ctl_scaling; - int scaling = INT_MAX; - int freq; + int scaling = cpu->pstate.scaling; pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys); - pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, perf_ctl_max); + pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, pstate_funcs.get_max()); pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo); pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling); - pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate); pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate); - -#ifdef CONFIG_ACPI - if (IS_ENABLED(CONFIG_ACPI_CPPC_LIB)) { - struct cppc_perf_caps caps; - - if (intel_pstate_cppc_perf_caps(cpu, &caps)) { - if (intel_pstate_cppc_perf_valid(caps.nominal_perf, &caps)) { - pr_debug("CPU%d: Using CPPC nominal\n", cpu->cpu); - - /* - * If the CPPC nominal performance is valid, it - * can be assumed to correspond to cpu_khz. - */ - if (caps.nominal_perf == perf_ctl_max_phys) { - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); - return; - } - scaling = DIV_ROUND_UP(cpu_khz, caps.nominal_perf); - } else if (intel_pstate_cppc_perf_valid(caps.guaranteed_perf, &caps)) { - pr_debug("CPU%d: Using CPPC guaranteed\n", cpu->cpu); - - /* - * If the CPPC guaranteed performance is valid, - * it can be assumed to correspond to max_freq. - */ - if (caps.guaranteed_perf == perf_ctl_max) { - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); - return; - } - scaling = DIV_ROUND_UP(max_freq, caps.guaranteed_perf); - } - } - } -#endif - /* - * If using the CPPC data to compute the HWP-to-frequency scaling factor - * doesn't work, use the HWP_CAP gauranteed perf for this purpose with - * the assumption that it corresponds to max_freq. - */ - if (scaling > perf_ctl_scaling) { - pr_debug("CPU%d: Using HWP_CAP guaranteed\n", cpu->cpu); - - if (cpu->pstate.max_pstate == perf_ctl_max) { - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); - return; - } - scaling = DIV_ROUND_UP(max_freq, cpu->pstate.max_pstate); - if (scaling > perf_ctl_scaling) { - /* - * This should not happen, because it would mean that - * the number of HWP perf levels was less than the - * number of P-states, so use the PERF_CTL scaling in - * that case. - */ - pr_debug("CPU%d: scaling (%d) out of range\n", cpu->cpu, - scaling); - - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); - return; - } - } + pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling); /* - * If the product of the HWP performance scaling factor obtained above - * and the HWP_CAP highest performance is greater than the maximum turbo - * frequency corresponding to the pstate_funcs.get_turbo() return value, - * the scaling factor is too high, so recompute it so that the HWP_CAP - * highest performance corresponds to the maximum turbo frequency. + * If the product of the HWP performance scaling factor and the HWP_CAP + * highest performance is greater than the maximum turbo frequency + * corresponding to the pstate_funcs.get_turbo() return value, the + * scaling factor is too high, so recompute it to make the HWP_CAP + * highest performance correspond to the maximum turbo frequency. */ if (turbo_freq < cpu->pstate.turbo_pstate * scaling) { - pr_debug("CPU%d: scaling too high (%d)\n", cpu->cpu, scaling); - cpu->pstate.turbo_freq = turbo_freq; scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate); - } + cpu->pstate.scaling = scaling; - cpu->pstate.scaling = scaling; - - pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling); + pr_debug("CPU%d: refined HWP-to-frequency scaling factor: %d\n", + cpu->cpu, scaling); + } cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling, perf_ctl_scaling); - freq = perf_ctl_max_phys * perf_ctl_scaling; - cpu->pstate.max_pstate_physical = DIV_ROUND_UP(freq, scaling); + cpu->pstate.max_pstate_physical = + DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling, + scaling); cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling; /* @@ -1861,6 +1782,38 @@ static int knl_get_turbo_pstate(void) return ret; } +#ifdef CONFIG_ACPI_CPPC_LIB +static u32 hybrid_ref_perf; + +static int hybrid_get_cpu_scaling(int cpu) +{ + return DIV_ROUND_UP(core_get_scaling() * hybrid_ref_perf, + intel_pstate_cppc_nominal(cpu)); +} + +static void intel_pstate_cppc_set_cpu_scaling(void) +{ + u32 min_nominal_perf = U32_MAX; + int cpu; + + for_each_present_cpu(cpu) { + u32 nominal_perf = intel_pstate_cppc_nominal(cpu); + + if (nominal_perf && nominal_perf < min_nominal_perf) + min_nominal_perf = nominal_perf; + } + + if (min_nominal_perf < U32_MAX) { + hybrid_ref_perf = min_nominal_perf; + pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling; + } +} +#else +static inline void intel_pstate_cppc_set_cpu_scaling(void) +{ +} +#endif /* CONFIG_ACPI_CPPC_LIB */ + static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) { trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); @@ -1889,10 +1842,8 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu) static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { - bool hybrid_cpu = boot_cpu_has(X86_FEATURE_HYBRID_CPU); int perf_ctl_max_phys = pstate_funcs.get_max_physical(); - int perf_ctl_scaling = hybrid_cpu ? cpu_khz / perf_ctl_max_phys : - pstate_funcs.get_scaling(); + int perf_ctl_scaling = pstate_funcs.get_scaling(); cpu->pstate.min_pstate = pstate_funcs.get_min(); cpu->pstate.max_pstate_physical = perf_ctl_max_phys; @@ -1901,10 +1852,13 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) if (hwp_active && !hwp_mode_bdw) { __intel_pstate_get_hwp_cap(cpu); - if (hybrid_cpu) - intel_pstate_hybrid_hwp_calibrate(cpu); - else + if (pstate_funcs.get_cpu_scaling) { + cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu); + if (cpu->pstate.scaling != perf_ctl_scaling) + intel_pstate_hybrid_hwp_adjust(cpu); + } else { cpu->pstate.scaling = perf_ctl_scaling; + } } else { cpu->pstate.scaling = perf_ctl_scaling; cpu->pstate.max_pstate = pstate_funcs.get_max(); @@ -3276,6 +3230,9 @@ static int __init intel_pstate_init(void) if (!default_driver) default_driver = &intel_pstate; + if (boot_cpu_has(X86_FEATURE_HYBRID_CPU)) + intel_pstate_cppc_set_cpu_scaling(); + goto hwp_cpu_matched; } } else { |