Merge tag 'pm+acpi-3.16-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull more ACPI and power management updates from Rafael Wysocki: "These are fixups on top of the previous PM+ACPI pull request, regression fixes (ACPI hotplug, cpufreq ppc-corenet), other bug fixes (ACPI reset, cpufreq), new PM trace points for system suspend profiling and a copyright notice update. Specifics: - I didn't remember correctly that the Hans de Goede's ACPI video patches actually didn't flip the video.use_native_backlight default, although we had discussed that and decided to do that. Since I said we would do that in the previous PM+ACPI pull request, make that change for real now. - ACPI bus check notifications for PCI host bridges don't cause the bus below the host bridge to be checked for changes as they should because of a mistake in the ACPI-based PCI hotplug (ACPIPHP) subsystem that forgets to add hotplug contexts to PCI host bridge ACPI device objects. Create hotplug contexts for PCI host bridges too as appropriate. - Revert recent cpufreq commit related to the big.LITTLE cpufreq driver that breaks arm64 builds. - Fix for a regression in the ppc-corenet cpufreq driver introduced during the 3.15 cycle and causing the driver to use the remainder from do_div instead of the quotient. From Ed Swarthout. - Resets triggered by panic activate a BUG_ON() in vmalloc.c on systems where the ACPI reset register is located in memory address space. Fix from Randy Wright. - Fix for a problem with cpufreq governors that decisions made by them may be suboptimal due to the fact that deferrable timers are used by them for CPU load sampling. From Srivatsa S Bhat. - Fix for a problem with the Tegra cpufreq driver where the CPU frequency is temporarily switched to a "stable" level that is different from both the initial and target frequencies during transitions which causes udelay() to expire earlier than it should sometimes. From Viresh Kumar. - New trace points and rework of some existing trace points for system suspend/resume profiling from Todd Brandt. - Assorted cpufreq fixes and cleanups from Stratos Karafotis and Viresh Kumar. - Copyright notice update for suspend-and-cpuhotplug.txt from Srivatsa S Bhat" * tag 'pm+acpi-3.16-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: ACPI / hotplug / PCI: Add hotplug contexts to PCI host bridges PM / sleep: trace events for device PM callbacks cpufreq: cpufreq-cpu0: remove dependency on THERMAL and REGULATOR cpufreq: tegra: update comment for clarity cpufreq: intel_pstate: Remove duplicate CPU ID check cpufreq: Mark CPU0 driver with CPUFREQ_NEED_INITIAL_FREQ_CHECK flag PM / Documentation: Update copyright in suspend-and-cpuhotplug.txt cpufreq: governor: remove copy_prev_load from 'struct cpu_dbs_common_info' cpufreq: governor: Be friendly towards latency-sensitive bursty workloads PM / sleep: trace events for suspend/resume cpufreq: ppc-corenet-cpu-freq: do_div use quotient Revert "cpufreq: Enable big.LITTLE cpufreq driver on arm64" cpufreq: Tegra: implement intermediate frequency callbacks cpufreq: add support for intermediate (stable) frequencies ACPI / video: Change the default for video.use_native_backlight to 1 ACPI: Fix bug when ACPI reset register is implemented in system memory
author: Linus Torvalds <torvalds@linux-foundation.org> 2014-06-12 13:14:19 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-06-12 13:14:19 -0700
commit: 19c1940feab777bb037c665a09f495d08a6c4e6c (patch)
tree: 68d4de31f30c9d69a1f1ea080b8a69ce61e1ead5 /drivers/cpufreq
parent: 7c574cf6aeb75920ba4d3af937bb1b3c42785ac4 (diff)
parent: d715a226b0b3dae48865d05e8c36175a8f75a809 (diff)
download: lwn-19c1940feab777bb037c665a09f495d08a6c4e6c.tar.gz
lwn-19c1940feab777bb037c665a09f495d08a6c4e6c.zip
9 files changed, 204 insertions, 59 deletions
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 1fbe11f2a146..e473d6555f96 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -185,7 +185,7 @@ config CPU_FREQ_GOV_CONSERVATIVE
 
 config GENERIC_CPUFREQ_CPU0
 	tristate "Generic CPU0 cpufreq driver"
-	depends on HAVE_CLK && REGULATOR && OF && THERMAL && CPU_THERMAL
+	depends on HAVE_CLK && OF
 	select PM_OPP
 	help
 	  This adds a generic cpufreq driver for CPU0 frequency management.
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 36d20d0fce27..ebac67115009 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -5,8 +5,7 @@
 # big LITTLE core layer and glue drivers
 config ARM_BIG_LITTLE_CPUFREQ
 	tristate "Generic ARM big LITTLE CPUfreq driver"
-	depends on (BIG_LITTLE && ARM_CPU_TOPOLOGY) || (ARM64 && SMP)
-	depends on HAVE_CLK
+	depends on ARM && BIG_LITTLE && ARM_CPU_TOPOLOGY && HAVE_CLK
 	select PM_OPP
 	help
 	  This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
index 09b9129c7bd3..ee1ae303a07c 100644
--- a/drivers/cpufreq/cpufreq-cpu0.c
+++ b/drivers/cpufreq/cpufreq-cpu0.c
@@ -104,7 +104,7 @@ static int cpu0_cpufreq_init(struct cpufreq_policy *policy)
 }
 
 static struct cpufreq_driver cpu0_cpufreq_driver = {
-	.flags = CPUFREQ_STICKY,
+	.flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
 	.verify = cpufreq_generic_frequency_table_verify,
 	.target_index = cpu0_set_target,
 	.get = cpufreq_generic_get,
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index ae11dd51f81d..aed2b0cb83dc 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1816,20 +1816,55 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier);
  *                              GOVERNORS                            *
  *********************************************************************/
 
+/* Must set freqs->new to intermediate frequency */
+static int __target_intermediate(struct cpufreq_policy *policy,
+				 struct cpufreq_freqs *freqs, int index)
+{
+	int ret;
+
+	freqs->new = cpufreq_driver->get_intermediate(policy, index);
+
+	/* We don't need to switch to intermediate freq */
+	if (!freqs->new)
+		return 0;
+
+	pr_debug("%s: cpu: %d, switching to intermediate freq: oldfreq: %u, intermediate freq: %u\n",
+		 __func__, policy->cpu, freqs->old, freqs->new);
+
+	cpufreq_freq_transition_begin(policy, freqs);
+	ret = cpufreq_driver->target_intermediate(policy, index);
+	cpufreq_freq_transition_end(policy, freqs, ret);
+
+	if (ret)
+		pr_err("%s: Failed to change to intermediate frequency: %d\n",
+		       __func__, ret);
+
+	return ret;
+}
+
 static int __target_index(struct cpufreq_policy *policy,
 			  struct cpufreq_frequency_table *freq_table, int index)
 {
-	struct cpufreq_freqs freqs;
+	struct cpufreq_freqs freqs = {.old = policy->cur, .flags = 0};
+	unsigned int intermediate_freq = 0;
 	int retval = -EINVAL;
 	bool notify;
 
 	notify = !(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION);
-
 	if (notify) {
-		freqs.old = policy->cur;
-		freqs.new = freq_table[index].frequency;
-		freqs.flags = 0;
+		/* Handle switching to intermediate frequency */
+		if (cpufreq_driver->get_intermediate) {
+			retval = __target_intermediate(policy, &freqs, index);
+			if (retval)
+				return retval;
+
+			intermediate_freq = freqs.new;
+			/* Set old freq to intermediate */
+			if (intermediate_freq)
+				freqs.old = freqs.new;
+		}
 
+		freqs.new = freq_table[index].frequency;
 		pr_debug("%s: cpu: %d, oldfreq: %u, new freq: %u\n",
 			 __func__, policy->cpu, freqs.old, freqs.new);
 
@@ -1841,9 +1876,23 @@ static int __target_index(struct cpufreq_policy *policy,
 		pr_err("%s: Failed to change cpu frequency: %d\n", __func__,
 		       retval);
 
-	if (notify)
+	if (notify) {
 		cpufreq_freq_transition_end(policy, &freqs, retval);
 
+		/*
+		 * Failed after setting to intermediate freq? Driver should have
+		 * reverted back to initial frequency and so should we. Check
+		 * here for intermediate_freq instead of get_intermediate, in
+		 * case we have't switched to intermediate freq at all.
+		 */
+		if (unlikely(retval && intermediate_freq)) {
+			freqs.old = intermediate_freq;
+			freqs.new = policy->restore_freq;
+			cpufreq_freq_transition_begin(policy, &freqs);
+			cpufreq_freq_transition_end(policy, &freqs, 0);
+		}
+	}
+
 	return retval;
 }
 
@@ -1875,6 +1924,9 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 	if (target_freq == policy->cur)
 		return 0;
 
+	/* Save last value to restore later on errors */
+	policy->restore_freq = policy->cur;
+
 	if (cpufreq_driver->target)
 		retval = cpufreq_driver->target(policy, target_freq, relation);
 	else if (cpufreq_driver->target_index) {
@@ -2361,7 +2413,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	    !(driver_data->setpolicy || driver_data->target_index ||
 		    driver_data->target) ||
 	     (driver_data->setpolicy && (driver_data->target_index ||
-		    driver_data->target)))
+		    driver_data->target)) ||
+	     (!!driver_data->get_intermediate != !!driver_data->target_intermediate))
 		return -EINVAL;
 
 	pr_debug("trying to register driver %s\n", driver_data->name);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index e1c6433b16e0..1b44496b2d2b 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -36,14 +36,29 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	struct cpufreq_policy *policy;
+	unsigned int sampling_rate;
 	unsigned int max_load = 0;
 	unsigned int ignore_nice;
 	unsigned int j;
 
-	if (dbs_data->cdata->governor == GOV_ONDEMAND)
+	if (dbs_data->cdata->governor == GOV_ONDEMAND) {
+		struct od_cpu_dbs_info_s *od_dbs_info =
+				dbs_data->cdata->get_cpu_dbs_info_s(cpu);
+
+		/*
+		 * Sometimes, the ondemand governor uses an additional
+		 * multiplier to give long delays. So apply this multiplier to
+		 * the 'sampling_rate', so as to keep the wake-up-from-idle
+		 * detection logic a bit conservative.
+		 */
+		sampling_rate = od_tuners->sampling_rate;
+		sampling_rate *= od_dbs_info->rate_mult;
+
 		ignore_nice = od_tuners->ignore_nice_load;
-	else
+	} else {
+		sampling_rate = cs_tuners->sampling_rate;
 		ignore_nice = cs_tuners->ignore_nice_load;
+	}
 
 	policy = cdbs->cur_policy;
 
@@ -96,7 +111,46 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 		if (unlikely(!wall_time || wall_time < idle_time))
 			continue;
 
-		load = 100 * (wall_time - idle_time) / wall_time;
+		/*
+		 * If the CPU had gone completely idle, and a task just woke up
+		 * on this CPU now, it would be unfair to calculate 'load' the
+		 * usual way for this elapsed time-window, because it will show
+		 * near-zero load, irrespective of how CPU intensive that task
+		 * actually is. This is undesirable for latency-sensitive bursty
+		 * workloads.
+		 *
+		 * To avoid this, we reuse the 'load' from the previous
+		 * time-window and give this task a chance to start with a
+		 * reasonably high CPU frequency. (However, we shouldn't over-do
+		 * this copy, lest we get stuck at a high load (high frequency)
+		 * for too long, even when the current system load has actually
+		 * dropped down. So we perform the copy only once, upon the
+		 * first wake-up from idle.)
+		 *
+		 * Detecting this situation is easy: the governor's deferrable
+		 * timer would not have fired during CPU-idle periods. Hence
+		 * an unusually large 'wall_time' (as compared to the sampling
+		 * rate) indicates this scenario.
+		 *
+		 * prev_load can be zero in two cases and we must recalculate it
+		 * for both cases:
+		 * - during long idle intervals
+		 * - explicitly set to zero
+		 */
+		if (unlikely(wall_time > (2 * sampling_rate) &&
+			     j_cdbs->prev_load)) {
+			load = j_cdbs->prev_load;
+
+			/*
+			 * Perform a destructive copy, to ensure that we copy
+			 * the previous load only once, upon the first wake-up
+			 * from idle.
+			 */
+			j_cdbs->prev_load = 0;
+		} else {
+			load = 100 * (wall_time - idle_time) / wall_time;
+			j_cdbs->prev_load = load;
+		}
 
 		if (load > max_load)
 			max_load = load;
@@ -318,11 +372,18 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 		for_each_cpu(j, policy->cpus) {
 			struct cpu_dbs_common_info *j_cdbs =
 				dbs_data->cdata->get_cpu_cdbs(j);
+			unsigned int prev_load;
 
 			j_cdbs->cpu = j;
 			j_cdbs->cur_policy = policy;
 			j_cdbs->prev_cpu_idle = get_cpu_idle_time(j,
 					       &j_cdbs->prev_cpu_wall, io_busy);
+
+			prev_load = (unsigned int)
+				(j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle);
+			j_cdbs->prev_load = 100 * prev_load /
+					(unsigned int) j_cdbs->prev_cpu_wall;
+
 			if (ignore_nice)
 				j_cdbs->prev_cpu_nice =
 					kcpustat_cpu(j).cpustat[CPUTIME_NICE];
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index bfb9ae14142c..cc401d147e72 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -134,6 +134,13 @@ struct cpu_dbs_common_info {
 	u64 prev_cpu_idle;
 	u64 prev_cpu_wall;
 	u64 prev_cpu_nice;
+	/*
+	 * Used to keep track of load in the previous interval. However, when
+	 * explicitly set to zero, it is used as a flag to ensure that we copy
+	 * the previous load to the current interval only once, upon the first
+	 * wake-up from idle.
+	 */
+	unsigned int prev_load;
 	struct cpufreq_policy *cur_policy;
 	struct delayed_work work;
 	/*
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index aebd4572eb6d..4e7f492ad583 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -691,14 +691,8 @@ MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
 
 static int intel_pstate_init_cpu(unsigned int cpunum)
 {
-
-	const struct x86_cpu_id *id;
 	struct cpudata *cpu;
 
-	id = x86_match_cpu(intel_pstate_cpu_ids);
-	if (!id)
-		return -ENODEV;
-
 	all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL);
 	if (!all_cpu_data[cpunum])
 		return -ENOMEM;
diff --git a/drivers/cpufreq/ppc-corenet-cpufreq.c b/drivers/cpufreq/ppc-corenet-cpufreq.c
index 0af618abebaf..3607070797af 100644
--- a/drivers/cpufreq/ppc-corenet-cpufreq.c
+++ b/drivers/cpufreq/ppc-corenet-cpufreq.c
@@ -138,7 +138,7 @@ static int corenet_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	struct cpufreq_frequency_table *table;
 	struct cpu_data *data;
 	unsigned int cpu = policy->cpu;
-	u64 transition_latency_hz;
+	u64 u64temp;
 
 	np = of_get_cpu_node(cpu, NULL);
 	if (!np)
@@ -206,9 +206,10 @@ static int corenet_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	for_each_cpu(i, per_cpu(cpu_mask, cpu))
 		per_cpu(cpu_data, i) = data;
 
-	transition_latency_hz = 12ULL * NSEC_PER_SEC;
-	policy->cpuinfo.transition_latency =
-		do_div(transition_latency_hz, fsl_get_sys_freq());
+	/* Minimum transition latency is 12 platform clocks */
+	u64temp = 12ULL * NSEC_PER_SEC;
+	do_div(u64temp, fsl_get_sys_freq());
+	policy->cpuinfo.transition_latency = u64temp + 1;
 
 	of_node_put(np);
 
diff --git a/drivers/cpufreq/tegra-cpufreq.c b/drivers/cpufreq/tegra-cpufreq.c
index 6e774c6ac20b..8084c7f7e206 100644
--- a/drivers/cpufreq/tegra-cpufreq.c
+++ b/drivers/cpufreq/tegra-cpufreq.c
@@ -45,46 +45,54 @@ static struct clk *cpu_clk;
 static struct clk *pll_x_clk;
 static struct clk *pll_p_clk;
 static struct clk *emc_clk;
+static bool pll_x_prepared;
 
-static int tegra_cpu_clk_set_rate(unsigned long rate)
+static unsigned int tegra_get_intermediate(struct cpufreq_policy *policy,
+					   unsigned int index)
+{
+	unsigned int ifreq = clk_get_rate(pll_p_clk) / 1000;
+
+	/*
+	 * Don't switch to intermediate freq if:
+	 * - we are already at it, i.e. policy->cur == ifreq
+	 * - index corresponds to ifreq
+	 */
+	if ((freq_table[index].frequency == ifreq) || (policy->cur == ifreq))
+		return 0;
+
+	return ifreq;
+}
+
+static int tegra_target_intermediate(struct cpufreq_policy *policy,
+				     unsigned int index)
 {
 	int ret;
 
 	/*
 	 * Take an extra reference to the main pll so it doesn't turn
-	 * off when we move the cpu off of it
+	 * off when we move the cpu off of it as enabling it again while we
+	 * switch to it from tegra_target() would take additional time.
+	 *
+	 * When target-freq is equal to intermediate freq we don't need to
+	 * switch to an intermediate freq and so this routine isn't called.
+	 * Also, we wouldn't be using pll_x anymore and must not take extra
+	 * reference to it, as it can be disabled now to save some power.
 	 */
 	clk_prepare_enable(pll_x_clk);
 
 	ret = clk_set_parent(cpu_clk, pll_p_clk);
-	if (ret) {
-		pr_err("Failed to switch cpu to clock pll_p\n");
-		goto out;
-	}
-
-	if (rate == clk_get_rate(pll_p_clk))
-		goto out;
-
-	ret = clk_set_rate(pll_x_clk, rate);
-	if (ret) {
-		pr_err("Failed to change pll_x to %lu\n", rate);
-		goto out;
-	}
-
-	ret = clk_set_parent(cpu_clk, pll_x_clk);
-	if (ret) {
-		pr_err("Failed to switch cpu to clock pll_x\n");
-		goto out;
-	}
+	if (ret)
+		clk_disable_unprepare(pll_x_clk);
+	else
+		pll_x_prepared = true;
 
-out:
-	clk_disable_unprepare(pll_x_clk);
 	return ret;
 }
 
 static int tegra_target(struct cpufreq_policy *policy, unsigned int index)
 {
 	unsigned long rate = freq_table[index].frequency;
+	unsigned int ifreq = clk_get_rate(pll_p_clk) / 1000;
 	int ret = 0;
 
 	/*
@@ -98,10 +106,30 @@ static int tegra_target(struct cpufreq_policy *policy, unsigned int index)
 	else
 		clk_set_rate(emc_clk, 100000000);  /* emc 50Mhz */
 
-	ret = tegra_cpu_clk_set_rate(rate * 1000);
+	/*
+	 * target freq == pll_p, don't need to take extra reference to pll_x_clk
+	 * as it isn't used anymore.
+	 */
+	if (rate == ifreq)
+		return clk_set_parent(cpu_clk, pll_p_clk);
+
+	ret = clk_set_rate(pll_x_clk, rate * 1000);
+	/* Restore to earlier frequency on error, i.e. pll_x */
 	if (ret)
-		pr_err("cpu-tegra: Failed to set cpu frequency to %lu kHz\n",
-			rate);
+		pr_err("Failed to change pll_x to %lu\n", rate);
+
+	ret = clk_set_parent(cpu_clk, pll_x_clk);
+	/* This shouldn't fail while changing or restoring */
+	WARN_ON(ret);
+
+	/*
+	 * Drop count to pll_x clock only if we switched to intermediate freq
+	 * earlier while transitioning to a target frequency.
+	 */
+	if (pll_x_prepared) {
+		clk_disable_unprepare(pll_x_clk);
+		pll_x_prepared = false;
+	}
 
 	return ret;
 }
@@ -137,16 +165,18 @@ static int tegra_cpu_exit(struct cpufreq_policy *policy)
 }
 
 static struct cpufreq_driver tegra_cpufreq_driver = {
-	.flags		= CPUFREQ_NEED_INITIAL_FREQ_CHECK,
-	.verify		= cpufreq_generic_frequency_table_verify,
-	.target_index	= tegra_target,
-	.get		= cpufreq_generic_get,
-	.init		= tegra_cpu_init,
-	.exit		= tegra_cpu_exit,
-	.name		= "tegra",
-	.attr		= cpufreq_generic_attr,
+	.flags			= CPUFREQ_NEED_INITIAL_FREQ_CHECK,
+	.verify			= cpufreq_generic_frequency_table_verify,
+	.get_intermediate	= tegra_get_intermediate,
+	.target_intermediate	= tegra_target_intermediate,
+	.target_index		= tegra_target,
+	.get			= cpufreq_generic_get,
+	.init			= tegra_cpu_init,
+	.exit			= tegra_cpu_exit,
+	.name			= "tegra",
+	.attr			= cpufreq_generic_attr,
 #ifdef CONFIG_PM
-	.suspend	= cpufreq_generic_suspend,
+	.suspend		= cpufreq_generic_suspend,
 #endif
 };
author	Linus Torvalds <torvalds@linux-foundation.org>	2014-06-12 13:14:19 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-06-12 13:14:19 -0700
commit	19c1940feab777bb037c665a09f495d08a6c4e6c (patch)
tree	68d4de31f30c9d69a1f1ea080b8a69ce61e1ead5 /drivers/cpufreq
parent	7c574cf6aeb75920ba4d3af937bb1b3c42785ac4 (diff)
parent	d715a226b0b3dae48865d05e8c36175a8f75a809 (diff)
download	lwn-19c1940feab777bb037c665a09f495d08a6c4e6c.tar.gz lwn-19c1940feab777bb037c665a09f495d08a6c4e6c.zip