diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-12 13:14:19 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-12 13:14:19 -0700 |
commit | 19c1940feab777bb037c665a09f495d08a6c4e6c (patch) | |
tree | 68d4de31f30c9d69a1f1ea080b8a69ce61e1ead5 /drivers/cpufreq | |
parent | 7c574cf6aeb75920ba4d3af937bb1b3c42785ac4 (diff) | |
parent | d715a226b0b3dae48865d05e8c36175a8f75a809 (diff) | |
download | lwn-19c1940feab777bb037c665a09f495d08a6c4e6c.tar.gz lwn-19c1940feab777bb037c665a09f495d08a6c4e6c.zip |
Merge tag 'pm+acpi-3.16-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull more ACPI and power management updates from Rafael Wysocki:
"These are fixups on top of the previous PM+ACPI pull request,
regression fixes (ACPI hotplug, cpufreq ppc-corenet), other bug fixes
(ACPI reset, cpufreq), new PM trace points for system suspend
profiling and a copyright notice update.
Specifics:
- I didn't remember correctly that the Hans de Goede's ACPI video
patches actually didn't flip the video.use_native_backlight
default, although we had discussed that and decided to do that.
Since I said we would do that in the previous PM+ACPI pull request,
make that change for real now.
- ACPI bus check notifications for PCI host bridges don't cause the
bus below the host bridge to be checked for changes as they should
because of a mistake in the ACPI-based PCI hotplug (ACPIPHP)
subsystem that forgets to add hotplug contexts to PCI host bridge
ACPI device objects. Create hotplug contexts for PCI host bridges
too as appropriate.
- Revert recent cpufreq commit related to the big.LITTLE cpufreq
driver that breaks arm64 builds.
- Fix for a regression in the ppc-corenet cpufreq driver introduced
during the 3.15 cycle and causing the driver to use the remainder
from do_div instead of the quotient. From Ed Swarthout.
- Resets triggered by panic activate a BUG_ON() in vmalloc.c on
systems where the ACPI reset register is located in memory address
space. Fix from Randy Wright.
- Fix for a problem with cpufreq governors that decisions made by
them may be suboptimal due to the fact that deferrable timers are
used by them for CPU load sampling. From Srivatsa S Bhat.
- Fix for a problem with the Tegra cpufreq driver where the CPU
frequency is temporarily switched to a "stable" level that is
different from both the initial and target frequencies during
transitions which causes udelay() to expire earlier than it should
sometimes. From Viresh Kumar.
- New trace points and rework of some existing trace points for
system suspend/resume profiling from Todd Brandt.
- Assorted cpufreq fixes and cleanups from Stratos Karafotis and
Viresh Kumar.
- Copyright notice update for suspend-and-cpuhotplug.txt from
Srivatsa S Bhat"
* tag 'pm+acpi-3.16-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
ACPI / hotplug / PCI: Add hotplug contexts to PCI host bridges
PM / sleep: trace events for device PM callbacks
cpufreq: cpufreq-cpu0: remove dependency on THERMAL and REGULATOR
cpufreq: tegra: update comment for clarity
cpufreq: intel_pstate: Remove duplicate CPU ID check
cpufreq: Mark CPU0 driver with CPUFREQ_NEED_INITIAL_FREQ_CHECK flag
PM / Documentation: Update copyright in suspend-and-cpuhotplug.txt
cpufreq: governor: remove copy_prev_load from 'struct cpu_dbs_common_info'
cpufreq: governor: Be friendly towards latency-sensitive bursty workloads
PM / sleep: trace events for suspend/resume
cpufreq: ppc-corenet-cpu-freq: do_div use quotient
Revert "cpufreq: Enable big.LITTLE cpufreq driver on arm64"
cpufreq: Tegra: implement intermediate frequency callbacks
cpufreq: add support for intermediate (stable) frequencies
ACPI / video: Change the default for video.use_native_backlight to 1
ACPI: Fix bug when ACPI reset register is implemented in system memory
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r-- | drivers/cpufreq/Kconfig | 2 | ||||
-rw-r--r-- | drivers/cpufreq/Kconfig.arm | 3 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq-cpu0.c | 2 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 67 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.c | 67 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.h | 7 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 6 | ||||
-rw-r--r-- | drivers/cpufreq/ppc-corenet-cpufreq.c | 9 | ||||
-rw-r--r-- | drivers/cpufreq/tegra-cpufreq.c | 100 |
9 files changed, 204 insertions, 59 deletions
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 1fbe11f2a146..e473d6555f96 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -185,7 +185,7 @@ config CPU_FREQ_GOV_CONSERVATIVE config GENERIC_CPUFREQ_CPU0 tristate "Generic CPU0 cpufreq driver" - depends on HAVE_CLK && REGULATOR && OF && THERMAL && CPU_THERMAL + depends on HAVE_CLK && OF select PM_OPP help This adds a generic cpufreq driver for CPU0 frequency management. diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 36d20d0fce27..ebac67115009 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -5,8 +5,7 @@ # big LITTLE core layer and glue drivers config ARM_BIG_LITTLE_CPUFREQ tristate "Generic ARM big LITTLE CPUfreq driver" - depends on (BIG_LITTLE && ARM_CPU_TOPOLOGY) || (ARM64 && SMP) - depends on HAVE_CLK + depends on ARM && BIG_LITTLE && ARM_CPU_TOPOLOGY && HAVE_CLK select PM_OPP help This enables the Generic CPUfreq driver for ARM big.LITTLE platforms. diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c index 09b9129c7bd3..ee1ae303a07c 100644 --- a/drivers/cpufreq/cpufreq-cpu0.c +++ b/drivers/cpufreq/cpufreq-cpu0.c @@ -104,7 +104,7 @@ static int cpu0_cpufreq_init(struct cpufreq_policy *policy) } static struct cpufreq_driver cpu0_cpufreq_driver = { - .flags = CPUFREQ_STICKY, + .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, .verify = cpufreq_generic_frequency_table_verify, .target_index = cpu0_set_target, .get = cpufreq_generic_get, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index ae11dd51f81d..aed2b0cb83dc 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1816,20 +1816,55 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier); * GOVERNORS * *********************************************************************/ +/* Must set freqs->new to intermediate frequency */ +static int __target_intermediate(struct cpufreq_policy *policy, + struct cpufreq_freqs *freqs, int index) +{ + int ret; + + freqs->new = cpufreq_driver->get_intermediate(policy, index); + + /* We don't need to switch to intermediate freq */ + if (!freqs->new) + return 0; + + pr_debug("%s: cpu: %d, switching to intermediate freq: oldfreq: %u, intermediate freq: %u\n", + __func__, policy->cpu, freqs->old, freqs->new); + + cpufreq_freq_transition_begin(policy, freqs); + ret = cpufreq_driver->target_intermediate(policy, index); + cpufreq_freq_transition_end(policy, freqs, ret); + + if (ret) + pr_err("%s: Failed to change to intermediate frequency: %d\n", + __func__, ret); + + return ret; +} + static int __target_index(struct cpufreq_policy *policy, struct cpufreq_frequency_table *freq_table, int index) { - struct cpufreq_freqs freqs; + struct cpufreq_freqs freqs = {.old = policy->cur, .flags = 0}; + unsigned int intermediate_freq = 0; int retval = -EINVAL; bool notify; notify = !(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION); - if (notify) { - freqs.old = policy->cur; - freqs.new = freq_table[index].frequency; - freqs.flags = 0; + /* Handle switching to intermediate frequency */ + if (cpufreq_driver->get_intermediate) { + retval = __target_intermediate(policy, &freqs, index); + if (retval) + return retval; + + intermediate_freq = freqs.new; + /* Set old freq to intermediate */ + if (intermediate_freq) + freqs.old = freqs.new; + } + freqs.new = freq_table[index].frequency; pr_debug("%s: cpu: %d, oldfreq: %u, new freq: %u\n", __func__, policy->cpu, freqs.old, freqs.new); @@ -1841,9 +1876,23 @@ static int __target_index(struct cpufreq_policy *policy, pr_err("%s: Failed to change cpu frequency: %d\n", __func__, retval); - if (notify) + if (notify) { cpufreq_freq_transition_end(policy, &freqs, retval); + /* + * Failed after setting to intermediate freq? Driver should have + * reverted back to initial frequency and so should we. Check + * here for intermediate_freq instead of get_intermediate, in + * case we have't switched to intermediate freq at all. + */ + if (unlikely(retval && intermediate_freq)) { + freqs.old = intermediate_freq; + freqs.new = policy->restore_freq; + cpufreq_freq_transition_begin(policy, &freqs); + cpufreq_freq_transition_end(policy, &freqs, 0); + } + } + return retval; } @@ -1875,6 +1924,9 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (target_freq == policy->cur) return 0; + /* Save last value to restore later on errors */ + policy->restore_freq = policy->cur; + if (cpufreq_driver->target) retval = cpufreq_driver->target(policy, target_freq, relation); else if (cpufreq_driver->target_index) { @@ -2361,7 +2413,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) !(driver_data->setpolicy || driver_data->target_index || driver_data->target) || (driver_data->setpolicy && (driver_data->target_index || - driver_data->target))) + driver_data->target)) || + (!!driver_data->get_intermediate != !!driver_data->target_intermediate)) return -EINVAL; pr_debug("trying to register driver %s\n", driver_data->name); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index e1c6433b16e0..1b44496b2d2b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -36,14 +36,29 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; struct cpufreq_policy *policy; + unsigned int sampling_rate; unsigned int max_load = 0; unsigned int ignore_nice; unsigned int j; - if (dbs_data->cdata->governor == GOV_ONDEMAND) + if (dbs_data->cdata->governor == GOV_ONDEMAND) { + struct od_cpu_dbs_info_s *od_dbs_info = + dbs_data->cdata->get_cpu_dbs_info_s(cpu); + + /* + * Sometimes, the ondemand governor uses an additional + * multiplier to give long delays. So apply this multiplier to + * the 'sampling_rate', so as to keep the wake-up-from-idle + * detection logic a bit conservative. + */ + sampling_rate = od_tuners->sampling_rate; + sampling_rate *= od_dbs_info->rate_mult; + ignore_nice = od_tuners->ignore_nice_load; - else + } else { + sampling_rate = cs_tuners->sampling_rate; ignore_nice = cs_tuners->ignore_nice_load; + } policy = cdbs->cur_policy; @@ -96,7 +111,46 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) if (unlikely(!wall_time || wall_time < idle_time)) continue; - load = 100 * (wall_time - idle_time) / wall_time; + /* + * If the CPU had gone completely idle, and a task just woke up + * on this CPU now, it would be unfair to calculate 'load' the + * usual way for this elapsed time-window, because it will show + * near-zero load, irrespective of how CPU intensive that task + * actually is. This is undesirable for latency-sensitive bursty + * workloads. + * + * To avoid this, we reuse the 'load' from the previous + * time-window and give this task a chance to start with a + * reasonably high CPU frequency. (However, we shouldn't over-do + * this copy, lest we get stuck at a high load (high frequency) + * for too long, even when the current system load has actually + * dropped down. So we perform the copy only once, upon the + * first wake-up from idle.) + * + * Detecting this situation is easy: the governor's deferrable + * timer would not have fired during CPU-idle periods. Hence + * an unusually large 'wall_time' (as compared to the sampling + * rate) indicates this scenario. + * + * prev_load can be zero in two cases and we must recalculate it + * for both cases: + * - during long idle intervals + * - explicitly set to zero + */ + if (unlikely(wall_time > (2 * sampling_rate) && + j_cdbs->prev_load)) { + load = j_cdbs->prev_load; + + /* + * Perform a destructive copy, to ensure that we copy + * the previous load only once, upon the first wake-up + * from idle. + */ + j_cdbs->prev_load = 0; + } else { + load = 100 * (wall_time - idle_time) / wall_time; + j_cdbs->prev_load = load; + } if (load > max_load) max_load = load; @@ -318,11 +372,18 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, for_each_cpu(j, policy->cpus) { struct cpu_dbs_common_info *j_cdbs = dbs_data->cdata->get_cpu_cdbs(j); + unsigned int prev_load; j_cdbs->cpu = j; j_cdbs->cur_policy = policy; j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); + + prev_load = (unsigned int) + (j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle); + j_cdbs->prev_load = 100 * prev_load / + (unsigned int) j_cdbs->prev_cpu_wall; + if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index bfb9ae14142c..cc401d147e72 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -134,6 +134,13 @@ struct cpu_dbs_common_info { u64 prev_cpu_idle; u64 prev_cpu_wall; u64 prev_cpu_nice; + /* + * Used to keep track of load in the previous interval. However, when + * explicitly set to zero, it is used as a flag to ensure that we copy + * the previous load to the current interval only once, upon the first + * wake-up from idle. + */ + unsigned int prev_load; struct cpufreq_policy *cur_policy; struct delayed_work work; /* diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index aebd4572eb6d..4e7f492ad583 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -691,14 +691,8 @@ MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); static int intel_pstate_init_cpu(unsigned int cpunum) { - - const struct x86_cpu_id *id; struct cpudata *cpu; - id = x86_match_cpu(intel_pstate_cpu_ids); - if (!id) - return -ENODEV; - all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL); if (!all_cpu_data[cpunum]) return -ENOMEM; diff --git a/drivers/cpufreq/ppc-corenet-cpufreq.c b/drivers/cpufreq/ppc-corenet-cpufreq.c index 0af618abebaf..3607070797af 100644 --- a/drivers/cpufreq/ppc-corenet-cpufreq.c +++ b/drivers/cpufreq/ppc-corenet-cpufreq.c @@ -138,7 +138,7 @@ static int corenet_cpufreq_cpu_init(struct cpufreq_policy *policy) struct cpufreq_frequency_table *table; struct cpu_data *data; unsigned int cpu = policy->cpu; - u64 transition_latency_hz; + u64 u64temp; np = of_get_cpu_node(cpu, NULL); if (!np) @@ -206,9 +206,10 @@ static int corenet_cpufreq_cpu_init(struct cpufreq_policy *policy) for_each_cpu(i, per_cpu(cpu_mask, cpu)) per_cpu(cpu_data, i) = data; - transition_latency_hz = 12ULL * NSEC_PER_SEC; - policy->cpuinfo.transition_latency = - do_div(transition_latency_hz, fsl_get_sys_freq()); + /* Minimum transition latency is 12 platform clocks */ + u64temp = 12ULL * NSEC_PER_SEC; + do_div(u64temp, fsl_get_sys_freq()); + policy->cpuinfo.transition_latency = u64temp + 1; of_node_put(np); diff --git a/drivers/cpufreq/tegra-cpufreq.c b/drivers/cpufreq/tegra-cpufreq.c index 6e774c6ac20b..8084c7f7e206 100644 --- a/drivers/cpufreq/tegra-cpufreq.c +++ b/drivers/cpufreq/tegra-cpufreq.c @@ -45,46 +45,54 @@ static struct clk *cpu_clk; static struct clk *pll_x_clk; static struct clk *pll_p_clk; static struct clk *emc_clk; +static bool pll_x_prepared; -static int tegra_cpu_clk_set_rate(unsigned long rate) +static unsigned int tegra_get_intermediate(struct cpufreq_policy *policy, + unsigned int index) +{ + unsigned int ifreq = clk_get_rate(pll_p_clk) / 1000; + + /* + * Don't switch to intermediate freq if: + * - we are already at it, i.e. policy->cur == ifreq + * - index corresponds to ifreq + */ + if ((freq_table[index].frequency == ifreq) || (policy->cur == ifreq)) + return 0; + + return ifreq; +} + +static int tegra_target_intermediate(struct cpufreq_policy *policy, + unsigned int index) { int ret; /* * Take an extra reference to the main pll so it doesn't turn - * off when we move the cpu off of it + * off when we move the cpu off of it as enabling it again while we + * switch to it from tegra_target() would take additional time. + * + * When target-freq is equal to intermediate freq we don't need to + * switch to an intermediate freq and so this routine isn't called. + * Also, we wouldn't be using pll_x anymore and must not take extra + * reference to it, as it can be disabled now to save some power. */ clk_prepare_enable(pll_x_clk); ret = clk_set_parent(cpu_clk, pll_p_clk); - if (ret) { - pr_err("Failed to switch cpu to clock pll_p\n"); - goto out; - } - - if (rate == clk_get_rate(pll_p_clk)) - goto out; - - ret = clk_set_rate(pll_x_clk, rate); - if (ret) { - pr_err("Failed to change pll_x to %lu\n", rate); - goto out; - } - - ret = clk_set_parent(cpu_clk, pll_x_clk); - if (ret) { - pr_err("Failed to switch cpu to clock pll_x\n"); - goto out; - } + if (ret) + clk_disable_unprepare(pll_x_clk); + else + pll_x_prepared = true; -out: - clk_disable_unprepare(pll_x_clk); return ret; } static int tegra_target(struct cpufreq_policy *policy, unsigned int index) { unsigned long rate = freq_table[index].frequency; + unsigned int ifreq = clk_get_rate(pll_p_clk) / 1000; int ret = 0; /* @@ -98,10 +106,30 @@ static int tegra_target(struct cpufreq_policy *policy, unsigned int index) else clk_set_rate(emc_clk, 100000000); /* emc 50Mhz */ - ret = tegra_cpu_clk_set_rate(rate * 1000); + /* + * target freq == pll_p, don't need to take extra reference to pll_x_clk + * as it isn't used anymore. + */ + if (rate == ifreq) + return clk_set_parent(cpu_clk, pll_p_clk); + + ret = clk_set_rate(pll_x_clk, rate * 1000); + /* Restore to earlier frequency on error, i.e. pll_x */ if (ret) - pr_err("cpu-tegra: Failed to set cpu frequency to %lu kHz\n", - rate); + pr_err("Failed to change pll_x to %lu\n", rate); + + ret = clk_set_parent(cpu_clk, pll_x_clk); + /* This shouldn't fail while changing or restoring */ + WARN_ON(ret); + + /* + * Drop count to pll_x clock only if we switched to intermediate freq + * earlier while transitioning to a target frequency. + */ + if (pll_x_prepared) { + clk_disable_unprepare(pll_x_clk); + pll_x_prepared = false; + } return ret; } @@ -137,16 +165,18 @@ static int tegra_cpu_exit(struct cpufreq_policy *policy) } static struct cpufreq_driver tegra_cpufreq_driver = { - .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, - .verify = cpufreq_generic_frequency_table_verify, - .target_index = tegra_target, - .get = cpufreq_generic_get, - .init = tegra_cpu_init, - .exit = tegra_cpu_exit, - .name = "tegra", - .attr = cpufreq_generic_attr, + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .verify = cpufreq_generic_frequency_table_verify, + .get_intermediate = tegra_get_intermediate, + .target_intermediate = tegra_target_intermediate, + .target_index = tegra_target, + .get = cpufreq_generic_get, + .init = tegra_cpu_init, + .exit = tegra_cpu_exit, + .name = "tegra", + .attr = cpufreq_generic_attr, #ifdef CONFIG_PM - .suspend = cpufreq_generic_suspend, + .suspend = cpufreq_generic_suspend, #endif }; |