summaryrefslogtreecommitdiff
path: root/drivers/cpufreq
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2016-12-12 20:45:01 +0100
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2016-12-12 20:45:01 +0100
commitfecc8c0ebd30c41cc66303b6f9476481c5d6d260 (patch)
treeb2e06edcc3425c2a992eb99a8cab1098c4246473 /drivers/cpufreq
parent57def856f33a5b50c71d3d40faf73d00b034e29c (diff)
parent2bf3b685a35c80fe368dad9da0e77ba48b460939 (diff)
downloadlwn-fecc8c0ebd30c41cc66303b6f9476481c5d6d260.tar.gz
lwn-fecc8c0ebd30c41cc66303b6f9476481c5d6d260.zip
Merge branch 'pm-cpufreq'
* pm-cpufreq: (51 commits) Documentation: intel_pstate: Document HWP energy/performance hints cpufreq: intel_pstate: Support for energy performance hints with HWP cpufreq: intel_pstate: Add locking around HWP requests cpufreq: ondemand: Set MIN_FREQUENCY_UP_THRESHOLD to 1 cpufreq: intel_pstate: Add Knights Mill CPUID MAINTAINERS: Add bug tracking system location entry for cpufreq cpufreq: dt: Add support for zx296718 cpufreq: acpi-cpufreq: drop rdmsr_on_cpus() usage cpufreq: acpi-cpufreq: Convert to hotplug state machine cpufreq: intel_pstate: fix intel_pstate_exit_perf_limits() prototype cpufreq: intel_pstate: Set EPP/EPB to 0 in performance mode cpufreq: schedutil: Rectify comment in sugov_irq_work() function cpufreq: intel_pstate: increase precision of performance limits cpufreq: intel_pstate: round up min_perf limits cpufreq: Make cpufreq_update_policy() void ACPI / processor: Make acpi_processor_ppc_has_changed() void cpufreq: Avoid using inactive policies cpufreq: intel_pstate: Generic governors support cpufreq: intel_pstate: Request P-states control from SMM if needed cpufreq: dt: Add support for r8a7743 and r8a7745 ...
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r--drivers/cpufreq/Kconfig.arm29
-rw-r--r--drivers/cpufreq/Makefile2
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c117
-rw-r--r--drivers/cpufreq/brcmstb-avs-cpufreq.c1057
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c7
-rw-r--r--drivers/cpufreq/cpufreq-dt-platdev.c15
-rw-r--r--drivers/cpufreq/cpufreq.c25
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c46
-rw-r--r--drivers/cpufreq/cpufreq_governor.c30
-rw-r--r--drivers/cpufreq/cpufreq_governor.h5
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c17
-rw-r--r--drivers/cpufreq/cpufreq_stats.c22
-rw-r--r--drivers/cpufreq/integrator-cpufreq.c239
-rw-r--r--drivers/cpufreq/intel_pstate.c826
-rw-r--r--drivers/cpufreq/powernv-cpufreq.c65
15 files changed, 2006 insertions, 496 deletions
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index d89b8afe23b6..920c469f3953 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -12,6 +12,27 @@ config ARM_BIG_LITTLE_CPUFREQ
help
This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
+config ARM_BRCMSTB_AVS_CPUFREQ
+ tristate "Broadcom STB AVS CPUfreq driver"
+ depends on ARCH_BRCMSTB || COMPILE_TEST
+ default y
+ help
+ Some Broadcom STB SoCs use a co-processor running proprietary firmware
+ ("AVS") to handle voltage and frequency scaling. This driver provides
+ a standard CPUfreq interface to to the firmware.
+
+ Say Y, if you have a Broadcom SoC with AVS support for DFS or DVFS.
+
+config ARM_BRCMSTB_AVS_CPUFREQ_DEBUG
+ bool "Broadcom STB AVS CPUfreq driver sysfs debug capability"
+ depends on ARM_BRCMSTB_AVS_CPUFREQ
+ help
+ Enabling this option turns on debug support via sysfs under
+ /sys/kernel/debug/brcmstb-avs-cpufreq. It is possible to read all and
+ write some AVS mailbox registers through sysfs entries.
+
+ If in doubt, say N.
+
config ARM_DT_BL_CPUFREQ
tristate "Generic probing via DT for ARM big LITTLE CPUfreq driver"
depends on ARM_BIG_LITTLE_CPUFREQ && OF
@@ -60,14 +81,6 @@ config ARM_IMX6Q_CPUFREQ
If in doubt, say N.
-config ARM_INTEGRATOR
- tristate "CPUfreq driver for ARM Integrator CPUs"
- depends on ARCH_INTEGRATOR
- default y
- help
- This enables the CPUfreq driver for ARM Integrator CPUs.
- If in doubt, say Y.
-
config ARM_KIRKWOOD_CPUFREQ
def_bool MACH_KIRKWOOD
help
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 0a9b6a093646..1e46c3918e7a 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -51,12 +51,12 @@ obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ) += arm_big_little.o
# LITTLE drivers, so that it is probed last.
obj-$(CONFIG_ARM_DT_BL_CPUFREQ) += arm_big_little_dt.o
+obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ) += brcmstb-avs-cpufreq.o
obj-$(CONFIG_ARCH_DAVINCI) += davinci-cpufreq.o
obj-$(CONFIG_UX500_SOC_DB8500) += dbx500-cpufreq.o
obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ) += exynos5440-cpufreq.o
obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o
obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o
-obj-$(CONFIG_ARM_INTEGRATOR) += integrator-cpufreq.o
obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o
obj-$(CONFIG_ARM_MT8173_CPUFREQ) += mt8173-cpufreq.o
obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 297e9128fe9f..3a98702b7445 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -84,7 +84,6 @@ static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufre
static struct cpufreq_driver acpi_cpufreq_driver;
static unsigned int acpi_pstate_strict;
-static struct msr __percpu *msrs;
static bool boost_state(unsigned int cpu)
{
@@ -104,11 +103,10 @@ static bool boost_state(unsigned int cpu)
return false;
}
-static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
+static int boost_set_msr(bool enable)
{
- u32 cpu;
u32 msr_addr;
- u64 msr_mask;
+ u64 msr_mask, val;
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
@@ -120,26 +118,31 @@ static void boost_set_msrs(bool enable, const struct cpumask *cpumask)
msr_mask = MSR_K7_HWCR_CPB_DIS;
break;
default:
- return;
+ return -EINVAL;
}
- rdmsr_on_cpus(cpumask, msr_addr, msrs);
+ rdmsrl(msr_addr, val);
- for_each_cpu(cpu, cpumask) {
- struct msr *reg = per_cpu_ptr(msrs, cpu);
- if (enable)
- reg->q &= ~msr_mask;
- else
- reg->q |= msr_mask;
- }
+ if (enable)
+ val &= ~msr_mask;
+ else
+ val |= msr_mask;
+
+ wrmsrl(msr_addr, val);
+ return 0;
+}
+
+static void boost_set_msr_each(void *p_en)
+{
+ bool enable = (bool) p_en;
- wrmsr_on_cpus(cpumask, msr_addr, msrs);
+ boost_set_msr(enable);
}
static int set_boost(int val)
{
get_online_cpus();
- boost_set_msrs(val, cpu_online_mask);
+ on_each_cpu(boost_set_msr_each, (void *)(long)val, 1);
put_online_cpus();
pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis");
@@ -536,46 +539,24 @@ static void free_acpi_perf_data(void)
free_percpu(acpi_perf_data);
}
-static int boost_notify(struct notifier_block *nb, unsigned long action,
- void *hcpu)
+static int cpufreq_boost_online(unsigned int cpu)
{
- unsigned cpu = (long)hcpu;
- const struct cpumask *cpumask;
-
- cpumask = get_cpu_mask(cpu);
+ /*
+ * On the CPU_UP path we simply keep the boost-disable flag
+ * in sync with the current global state.
+ */
+ return boost_set_msr(acpi_cpufreq_driver.boost_enabled);
+}
+static int cpufreq_boost_down_prep(unsigned int cpu)
+{
/*
* Clear the boost-disable bit on the CPU_DOWN path so that
- * this cpu cannot block the remaining ones from boosting. On
- * the CPU_UP path we simply keep the boost-disable flag in
- * sync with the current global state.
+ * this cpu cannot block the remaining ones from boosting.
*/
-
- switch (action) {
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
- break;
-
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- boost_set_msrs(1, cpumask);
- break;
-
- default:
- break;
- }
-
- return NOTIFY_OK;
+ return boost_set_msr(1);
}
-
-static struct notifier_block boost_nb = {
- .notifier_call = boost_notify,
-};
-
/*
* acpi_cpufreq_early_init - initialize ACPI P-States library
*
@@ -922,37 +903,35 @@ static struct cpufreq_driver acpi_cpufreq_driver = {
.attr = acpi_cpufreq_attr,
};
+static enum cpuhp_state acpi_cpufreq_online;
+
static void __init acpi_cpufreq_boost_init(void)
{
- if (boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)) {
- msrs = msrs_alloc();
-
- if (!msrs)
- return;
-
- acpi_cpufreq_driver.set_boost = set_boost;
- acpi_cpufreq_driver.boost_enabled = boost_state(0);
-
- cpu_notifier_register_begin();
+ int ret;
- /* Force all MSRs to the same value */
- boost_set_msrs(acpi_cpufreq_driver.boost_enabled,
- cpu_online_mask);
+ if (!(boot_cpu_has(X86_FEATURE_CPB) || boot_cpu_has(X86_FEATURE_IDA)))
+ return;
- __register_cpu_notifier(&boost_nb);
+ acpi_cpufreq_driver.set_boost = set_boost;
+ acpi_cpufreq_driver.boost_enabled = boost_state(0);
- cpu_notifier_register_done();
+ /*
+ * This calls the online callback on all online cpu and forces all
+ * MSRs to the same value.
+ */
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "cpufreq/acpi:online",
+ cpufreq_boost_online, cpufreq_boost_down_prep);
+ if (ret < 0) {
+ pr_err("acpi_cpufreq: failed to register hotplug callbacks\n");
+ return;
}
+ acpi_cpufreq_online = ret;
}
static void acpi_cpufreq_boost_exit(void)
{
- if (msrs) {
- unregister_cpu_notifier(&boost_nb);
-
- msrs_free(msrs);
- msrs = NULL;
- }
+ if (acpi_cpufreq_online >= 0)
+ cpuhp_remove_state_nocalls(acpi_cpufreq_online);
}
static int __init acpi_cpufreq_init(void)
diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c
new file mode 100644
index 000000000000..4fda623e55bb
--- /dev/null
+++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c
@@ -0,0 +1,1057 @@
+/*
+ * CPU frequency scaling for Broadcom SoCs with AVS firmware that
+ * supports DVS or DVFS
+ *
+ * Copyright (c) 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * "AVS" is the name of a firmware developed at Broadcom. It derives
+ * its name from the technique called "Adaptive Voltage Scaling".
+ * Adaptive voltage scaling was the original purpose of this firmware.
+ * The AVS firmware still supports "AVS mode", where all it does is
+ * adaptive voltage scaling. However, on some newer Broadcom SoCs, the
+ * AVS Firmware, despite its unchanged name, also supports DFS mode and
+ * DVFS mode.
+ *
+ * In the context of this document and the related driver, "AVS" by
+ * itself always means the Broadcom firmware and never refers to the
+ * technique called "Adaptive Voltage Scaling".
+ *
+ * The Broadcom STB AVS CPUfreq driver provides voltage and frequency
+ * scaling on Broadcom SoCs using AVS firmware with support for DFS and
+ * DVFS. The AVS firmware is running on its own co-processor. The
+ * driver supports both uniprocessor (UP) and symmetric multiprocessor
+ * (SMP) systems which share clock and voltage across all CPUs.
+ *
+ * Actual voltage and frequency scaling is done solely by the AVS
+ * firmware. This driver does not change frequency or voltage itself.
+ * It provides a standard CPUfreq interface to the rest of the kernel
+ * and to userland. It interfaces with the AVS firmware to effect the
+ * requested changes and to report back the current system status in a
+ * way that is expected by existing tools.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/semaphore.h>
+
+#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#endif
+
+/* Max number of arguments AVS calls take */
+#define AVS_MAX_CMD_ARGS 4
+/*
+ * This macro is used to generate AVS parameter register offsets. For
+ * x >= AVS_MAX_CMD_ARGS, it returns 0 to protect against accidental memory
+ * access outside of the parameter range. (Offset 0 is the first parameter.)
+ */
+#define AVS_PARAM_MULT(x) ((x) < AVS_MAX_CMD_ARGS ? (x) : 0)
+
+/* AVS Mailbox Register offsets */
+#define AVS_MBOX_COMMAND 0x00
+#define AVS_MBOX_STATUS 0x04
+#define AVS_MBOX_VOLTAGE0 0x08
+#define AVS_MBOX_TEMP0 0x0c
+#define AVS_MBOX_PV0 0x10
+#define AVS_MBOX_MV0 0x14
+#define AVS_MBOX_PARAM(x) (0x18 + AVS_PARAM_MULT(x) * sizeof(u32))
+#define AVS_MBOX_REVISION 0x28
+#define AVS_MBOX_PSTATE 0x2c
+#define AVS_MBOX_HEARTBEAT 0x30
+#define AVS_MBOX_MAGIC 0x34
+#define AVS_MBOX_SIGMA_HVT 0x38
+#define AVS_MBOX_SIGMA_SVT 0x3c
+#define AVS_MBOX_VOLTAGE1 0x40
+#define AVS_MBOX_TEMP1 0x44
+#define AVS_MBOX_PV1 0x48
+#define AVS_MBOX_MV1 0x4c
+#define AVS_MBOX_FREQUENCY 0x50
+
+/* AVS Commands */
+#define AVS_CMD_AVAILABLE 0x00
+#define AVS_CMD_DISABLE 0x10
+#define AVS_CMD_ENABLE 0x11
+#define AVS_CMD_S2_ENTER 0x12
+#define AVS_CMD_S2_EXIT 0x13
+#define AVS_CMD_BBM_ENTER 0x14
+#define AVS_CMD_BBM_EXIT 0x15
+#define AVS_CMD_S3_ENTER 0x16
+#define AVS_CMD_S3_EXIT 0x17
+#define AVS_CMD_BALANCE 0x18
+/* PMAP and P-STATE commands */
+#define AVS_CMD_GET_PMAP 0x30
+#define AVS_CMD_SET_PMAP 0x31
+#define AVS_CMD_GET_PSTATE 0x40
+#define AVS_CMD_SET_PSTATE 0x41
+
+/* Different modes AVS supports (for GET_PMAP/SET_PMAP) */
+#define AVS_MODE_AVS 0x0
+#define AVS_MODE_DFS 0x1
+#define AVS_MODE_DVS 0x2
+#define AVS_MODE_DVFS 0x3
+
+/*
+ * PMAP parameter p1
+ * unused:31-24, mdiv_p0:23-16, unused:15-14, pdiv:13-10 , ndiv_int:9-0
+ */
+#define NDIV_INT_SHIFT 0
+#define NDIV_INT_MASK 0x3ff
+#define PDIV_SHIFT 10
+#define PDIV_MASK 0xf
+#define MDIV_P0_SHIFT 16
+#define MDIV_P0_MASK 0xff
+/*
+ * PMAP parameter p2
+ * mdiv_p4:31-24, mdiv_p3:23-16, mdiv_p2:15:8, mdiv_p1:7:0
+ */
+#define MDIV_P1_SHIFT 0
+#define MDIV_P1_MASK 0xff
+#define MDIV_P2_SHIFT 8
+#define MDIV_P2_MASK 0xff
+#define MDIV_P3_SHIFT 16
+#define MDIV_P3_MASK 0xff
+#define MDIV_P4_SHIFT 24
+#define MDIV_P4_MASK 0xff
+
+/* Different P-STATES AVS supports (for GET_PSTATE/SET_PSTATE) */
+#define AVS_PSTATE_P0 0x0
+#define AVS_PSTATE_P1 0x1
+#define AVS_PSTATE_P2 0x2
+#define AVS_PSTATE_P3 0x3
+#define AVS_PSTATE_P4 0x4
+#define AVS_PSTATE_MAX AVS_PSTATE_P4
+
+/* CPU L2 Interrupt Controller Registers */
+#define AVS_CPU_L2_SET0 0x04
+#define AVS_CPU_L2_INT_MASK BIT(31)
+
+/* AVS Command Status Values */
+#define AVS_STATUS_CLEAR 0x00
+/* Command/notification accepted */
+#define AVS_STATUS_SUCCESS 0xf0
+/* Command/notification rejected */
+#define AVS_STATUS_FAILURE 0xff
+/* Invalid command/notification (unknown) */
+#define AVS_STATUS_INVALID 0xf1
+/* Non-AVS modes are not supported */
+#define AVS_STATUS_NO_SUPP 0xf2
+/* Cannot set P-State until P-Map supplied */
+#define AVS_STATUS_NO_MAP 0xf3
+/* Cannot change P-Map after initial P-Map set */
+#define AVS_STATUS_MAP_SET 0xf4
+/* Max AVS status; higher numbers are used for debugging */
+#define AVS_STATUS_MAX 0xff
+
+/* Other AVS related constants */
+#define AVS_LOOP_LIMIT 10000
+#define AVS_TIMEOUT 300 /* in ms; expected completion is < 10ms */
+#define AVS_FIRMWARE_MAGIC 0xa11600d1
+
+#define BRCM_AVS_CPUFREQ_PREFIX "brcmstb-avs"
+#define BRCM_AVS_CPUFREQ_NAME BRCM_AVS_CPUFREQ_PREFIX "-cpufreq"
+#define BRCM_AVS_CPU_DATA "brcm,avs-cpu-data-mem"
+#define BRCM_AVS_CPU_INTR "brcm,avs-cpu-l2-intr"
+#define BRCM_AVS_HOST_INTR "sw_intr"
+
+struct pmap {
+ unsigned int mode;
+ unsigned int p1;
+ unsigned int p2;
+ unsigned int state;
+};
+
+struct private_data {
+ void __iomem *base;
+ void __iomem *avs_intr_base;
+ struct device *dev;
+#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG
+ struct dentry *debugfs;
+#endif
+ struct completion done;
+ struct semaphore sem;
+ struct pmap pmap;
+};
+
+#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG
+
+enum debugfs_format {
+ DEBUGFS_NORMAL,
+ DEBUGFS_FLOAT,
+ DEBUGFS_REV,
+};
+
+struct debugfs_data {
+ struct debugfs_entry *entry;
+ struct private_data *priv;
+};
+
+struct debugfs_entry {
+ char *name;
+ u32 offset;
+ fmode_t mode;
+ enum debugfs_format format;
+};
+
+#define DEBUGFS_ENTRY(name, mode, format) { \
+ #name, AVS_MBOX_##name, mode, format \
+}
+
+/*
+ * These are used for debugfs only. Otherwise we use AVS_MBOX_PARAM() directly.
+ */
+#define AVS_MBOX_PARAM1 AVS_MBOX_PARAM(0)
+#define AVS_MBOX_PARAM2 AVS_MBOX_PARAM(1)
+#define AVS_MBOX_PARAM3 AVS_MBOX_PARAM(2)
+#define AVS_MBOX_PARAM4 AVS_MBOX_PARAM(3)
+
+/*
+ * This table stores the name, access permissions and offset for each hardware
+ * register and is used to generate debugfs entries.
+ */
+static struct debugfs_entry debugfs_entries[] = {
+ DEBUGFS_ENTRY(COMMAND, S_IWUSR, DEBUGFS_NORMAL),
+ DEBUGFS_ENTRY(STATUS, S_IWUSR, DEBUGFS_NORMAL),
+ DEBUGFS_ENTRY(VOLTAGE0, 0, DEBUGFS_FLOAT),
+ DEBUGFS_ENTRY(TEMP0, 0, DEBUGFS_FLOAT),
+ DEBUGFS_ENTRY(PV0, 0, DEBUGFS_FLOAT),
+ DEBUGFS_ENTRY(MV0, 0, DEBUGFS_FLOAT),
+ DEBUGFS_ENTRY(PARAM1, S_IWUSR, DEBUGFS_NORMAL),
+ DEBUGFS_ENTRY(PARAM2, S_IWUSR, DEBUGFS_NORMAL),
+ DEBUGFS_ENTRY(PARAM3, S_IWUSR, DEBUGFS_NORMAL),
+ DEBUGFS_ENTRY(PARAM4, S_IWUSR, DEBUGFS_NORMAL),
+ DEBUGFS_ENTRY(REVISION, 0, DEBUGFS_REV),
+ DEBUGFS_ENTRY(PSTATE, 0, DEBUGFS_NORMAL),
+ DEBUGFS_ENTRY(HEARTBEAT, 0, DEBUGFS_NORMAL),
+ DEBUGFS_ENTRY(MAGIC, S_IWUSR, DEBUGFS_NORMAL),
+ DEBUGFS_ENTRY(SIGMA_HVT, 0, DEBUGFS_NORMAL),
+ DEBUGFS_ENTRY(SIGMA_SVT, 0, DEBUGFS_NORMAL),
+ DEBUGFS_ENTRY(VOLTAGE1, 0, DEBUGFS_FLOAT),
+ DEBUGFS_ENTRY(TEMP1, 0, DEBUGFS_FLOAT),
+ DEBUGFS_ENTRY(PV1, 0, DEBUGFS_FLOAT),
+ DEBUGFS_ENTRY(MV1, 0, DEBUGFS_FLOAT),
+ DEBUGFS_ENTRY(FREQUENCY, 0, DEBUGFS_NORMAL),
+};
+
+static int brcm_avs_target_index(struct cpufreq_policy *, unsigned int);
+
+static char *__strtolower(char *s)
+{
+ char *p;
+
+ for (p = s; *p; p++)
+ *p = tolower(*p);
+
+ return s;
+}
+
+#endif /* CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG */
+
+static void __iomem *__map_region(const char *name)
+{
+ struct device_node *np;
+ void __iomem *ptr;
+
+ np = of_find_compatible_node(NULL, NULL, name);
+ if (!np)
+ return NULL;
+
+ ptr = of_iomap(np, 0);
+ of_node_put(np);
+
+ return ptr;
+}
+
+static int __issue_avs_command(struct private_data *priv, int cmd, bool is_send,
+ u32 args[])
+{
+ unsigned long time_left = msecs_to_jiffies(AVS_TIMEOUT);
+ void __iomem *base = priv->base;
+ unsigned int i;
+ int ret;
+ u32 val;
+
+ ret = down_interruptible(&priv->sem);
+ if (ret)
+ return ret;
+
+ /*
+ * Make sure no other command is currently running: cmd is 0 if AVS
+ * co-processor is idle. Due to the guard above, we should almost never
+ * have to wait here.
+ */
+ for (i = 0, val = 1; val != 0 && i < AVS_LOOP_LIMIT; i++)
+ val = readl(base + AVS_MBOX_COMMAND);
+
+ /* Give the caller a chance to retry if AVS is busy. */
+ if (i == AVS_LOOP_LIMIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ /* Clear status before we begin. */
+ writel(AVS_STATUS_CLEAR, base + AVS_MBOX_STATUS);
+
+ /* We need to send arguments for this command. */
+ if (args && is_send) {
+ for (i = 0; i < AVS_MAX_CMD_ARGS; i++)
+ writel(args[i], base + AVS_MBOX_PARAM(i));
+ }
+
+ /* Protect from spurious interrupts. */
+ reinit_completion(&priv->done);
+
+ /* Now issue the command & tell firmware to wake up to process it. */
+ writel(cmd, base + AVS_MBOX_COMMAND);
+ writel(AVS_CPU_L2_INT_MASK, priv->avs_intr_base + AVS_CPU_L2_SET0);
+
+ /* Wait for AVS co-processor to finish processing the command. */
+ time_left = wait_for_completion_timeout(&priv->done, time_left);
+
+ /*
+ * If the AVS status is not in the expected range, it means AVS didn't
+ * complete our command in time, and we return an error. Also, if there
+ * is no "time left", we timed out waiting for the interrupt.
+ */
+ val = readl(base + AVS_MBOX_STATUS);
+ if (time_left == 0 || val == 0 || val > AVS_STATUS_MAX) {
+ dev_err(priv->dev, "AVS command %#x didn't complete in time\n",
+ cmd);
+ dev_err(priv->dev, " Time left: %u ms, AVS status: %#x\n",
+ jiffies_to_msecs(time_left), val);
+ ret = -ETIMEDOUT;
+ goto out;
+ }
+
+ /* This command returned arguments, so we read them back. */
+ if (args && !is_send) {
+ for (i = 0; i < AVS_MAX_CMD_ARGS; i++)
+ args[i] = readl(base + AVS_MBOX_PARAM(i));
+ }
+
+ /* Clear status to tell AVS co-processor we are done. */
+ writel(AVS_STATUS_CLEAR, base + AVS_MBOX_STATUS);
+
+ /* Convert firmware errors to errno's as much as possible. */
+ switch (val) {
+ case AVS_STATUS_INVALID:
+ ret = -EINVAL;
+ break;
+ case AVS_STATUS_NO_SUPP:
+ ret = -ENOTSUPP;
+ break;
+ case AVS_STATUS_NO_MAP:
+ ret = -ENOENT;
+ break;
+ case AVS_STATUS_MAP_SET:
+ ret = -EEXIST;
+ break;
+ case AVS_STATUS_FAILURE:
+ ret = -EIO;
+ break;
+ }
+
+out:
+ up(&priv->sem);
+
+ return ret;
+}
+
+static irqreturn_t irq_handler(int irq, void *data)
+{
+ struct private_data *priv = data;
+
+ /* AVS command completed execution. Wake up __issue_avs_command(). */
+ complete(&priv->done);
+
+ return IRQ_HANDLED;
+}
+
+static char *brcm_avs_mode_to_string(unsigned int mode)
+{
+ switch (mode) {
+ case AVS_MODE_AVS:
+ return "AVS";
+ case AVS_MODE_DFS:
+ return "DFS";
+ case AVS_MODE_DVS:
+ return "DVS";
+ case AVS_MODE_DVFS:
+ return "DVFS";
+ }
+ return NULL;
+}
+
+static void brcm_avs_parse_p1(u32 p1, unsigned int *mdiv_p0, unsigned int *pdiv,
+ unsigned int *ndiv)
+{
+ *mdiv_p0 = (p1 >> MDIV_P0_SHIFT) & MDIV_P0_MASK;
+ *pdiv = (p1 >> PDIV_SHIFT) & PDIV_MASK;
+ *ndiv = (p1 >> NDIV_INT_SHIFT) & NDIV_INT_MASK;
+}
+
+static void brcm_avs_parse_p2(u32 p2, unsigned int *mdiv_p1,
+ unsigned int *mdiv_p2, unsigned int *mdiv_p3,
+ unsigned int *mdiv_p4)
+{
+ *mdiv_p4 = (p2 >> MDIV_P4_SHIFT) & MDIV_P4_MASK;
+ *mdiv_p3 = (p2 >> MDIV_P3_SHIFT) & MDIV_P3_MASK;
+ *mdiv_p2 = (p2 >> MDIV_P2_SHIFT) & MDIV_P2_MASK;
+ *mdiv_p1 = (p2 >> MDIV_P1_SHIFT) & MDIV_P1_MASK;
+}
+
+static int brcm_avs_get_pmap(struct private_data *priv, struct pmap *pmap)
+{
+ u32 args[AVS_MAX_CMD_ARGS];
+ int ret;
+
+ ret = __issue_avs_command(priv, AVS_CMD_GET_PMAP, false, args);
+ if (ret || !pmap)
+ return ret;
+
+ pmap->mode = args[0];
+ pmap->p1 = args[1];
+ pmap->p2 = args[2];
+ pmap->state = args[3];
+
+ return 0;
+}
+
+static int brcm_avs_set_pmap(struct private_data *priv, struct pmap *pmap)
+{
+ u32 args[AVS_MAX_CMD_ARGS];
+
+ args[0] = pmap->mode;
+ args[1] = pmap->p1;
+ args[2] = pmap->p2;
+ args[3] = pmap->state;
+
+ return __issue_avs_command(priv, AVS_CMD_SET_PMAP, true, args);
+}
+
+static int brcm_avs_get_pstate(struct private_data *priv, unsigned int *pstate)
+{
+ u32 args[AVS_MAX_CMD_ARGS];
+ int ret;
+
+ ret = __issue_avs_command(priv, AVS_CMD_GET_PSTATE, false, args);
+ if (ret)
+ return ret;
+ *pstate = args[0];
+
+ return 0;
+}
+
+static int brcm_avs_set_pstate(struct private_data *priv, unsigned int pstate)
+{
+ u32 args[AVS_MAX_CMD_ARGS];
+
+ args[0] = pstate;
+
+ return __issue_avs_command(priv, AVS_CMD_SET_PSTATE, true, args);
+}
+
+static unsigned long brcm_avs_get_voltage(void __iomem *base)
+{
+ return readl(base + AVS_MBOX_VOLTAGE1);
+}
+
+static unsigned long brcm_avs_get_frequency(void __iomem *base)
+{
+ return readl(base + AVS_MBOX_FREQUENCY) * 1000; /* in kHz */
+}
+
+/*
+ * We determine which frequencies are supported by cycling through all P-states
+ * and reading back what frequency we are running at for each P-state.
+ */
+static struct cpufreq_frequency_table *
+brcm_avs_get_freq_table(struct device *dev, struct private_data *priv)
+{
+ struct cpufreq_frequency_table *table;
+ unsigned int pstate;
+ int i, ret;
+
+ /* Remember P-state for later */
+ ret = brcm_avs_get_pstate(priv, &pstate);
+ if (ret)
+ return ERR_PTR(ret);
+
+ table = devm_kzalloc(dev, (AVS_PSTATE_MAX + 1) * sizeof(*table),
+ GFP_KERNEL);
+ if (!table)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = AVS_PSTATE_P0; i <= AVS_PSTATE_MAX; i++) {
+ ret = brcm_avs_set_pstate(priv, i);
+ if (ret)
+ return ERR_PTR(ret);
+ table[i].frequency = brcm_avs_get_frequency(priv->base);
+ table[i].driver_data = i;
+ }
+ table[i].frequency = CPUFREQ_TABLE_END;
+
+ /* Restore P-state */
+ ret = brcm_avs_set_pstate(priv, pstate);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return table;
+}
+
+#ifdef CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG
+
+#define MANT(x) (unsigned int)(abs((x)) / 1000)
+#define FRAC(x) (unsigned int)(abs((x)) - abs((x)) / 1000 * 1000)
+
+static int brcm_avs_debug_show(struct seq_file *s, void *data)
+{
+ struct debugfs_data *dbgfs = s->private;
+ void __iomem *base;
+ u32 val, offset;
+
+ if (!dbgfs) {
+ seq_puts(s, "No device pointer\n");
+ return 0;
+ }
+
+ base = dbgfs->priv->base;
+ offset = dbgfs->entry->offset;
+ val = readl(base + offset);
+ switch (dbgfs->entry->format) {
+ case DEBUGFS_NORMAL:
+ seq_printf(s, "%u\n", val);
+ break;
+ case DEBUGFS_FLOAT:
+ seq_printf(s, "%d.%03d\n", MANT(val), FRAC(val));
+ break;
+ case DEBUGFS_REV:
+ seq_printf(s, "%c.%c.%c.%c\n", (val >> 24 & 0xff),
+ (val >> 16 & 0xff), (val >> 8 & 0xff),
+ val & 0xff);
+ break;
+ }
+ seq_printf(s, "0x%08x\n", val);
+
+ return 0;
+}
+
+#undef MANT
+#undef FRAC
+
+static ssize_t brcm_avs_seq_write(struct file *file, const char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct seq_file *s = file->private_data;
+ struct debugfs_data *dbgfs = s->private;
+ struct private_data *priv = dbgfs->priv;
+ void __iomem *base, *avs_intr_base;
+ bool use_issue_command = false;
+ unsigned long val, offset;
+ char str[128];
+ int ret;
+ char *str_ptr = str;
+
+ if (size >= sizeof(str))
+ return -E2BIG;
+
+ memset(str, 0, sizeof(str));
+ ret = copy_from_user(str, buf, size);
+ if (ret)
+ return ret;
+
+ base = priv->base;
+ avs_intr_base = priv->avs_intr_base;
+ offset = dbgfs->entry->offset;
+ /*
+ * Special case writing to "command" entry only: if the string starts
+ * with a 'c', we use the driver's __issue_avs_command() function.
+ * Otherwise, we perform a raw write. This should allow testing of raw
+ * access as well as using the higher level function. (Raw access
+ * doesn't clear the firmware return status after issuing the command.)
+ */
+ if (str_ptr[0] == 'c' && offset == AVS_MBOX_COMMAND) {
+ use_issue_command = true;
+ str_ptr++;
+ }
+ if (kstrtoul(str_ptr, 0, &val) != 0)
+ return -EINVAL;
+
+ /*
+ * Setting the P-state is a special case. We need to update the CPU
+ * frequency we report.
+ */
+ if (val == AVS_CMD_SET_PSTATE) {
+ struct cpufreq_policy *policy;
+ unsigned int pstate;
+
+ policy = cpufreq_cpu_get(smp_processor_id());
+ /* Read back the P-state we are about to set */
+ pstate = readl(base + AVS_MBOX_PARAM(0));
+ if (use_issue_command) {
+ ret = brcm_avs_target_index(policy, pstate);
+ return ret ? ret : size;
+ }
+ policy->cur = policy->freq_table[pstate].frequency;
+ }
+
+ if (use_issue_command) {
+ ret = __issue_avs_command(priv, val, false, NULL);
+ } else {
+ /* Locking here is not perfect, but is only for debug. */
+ ret = down_interruptible(&priv->sem);
+ if (ret)
+ return ret;
+
+ writel(val, base + offset);
+ /* We have to wake up the firmware to process a command. */
+ if (offset == AVS_MBOX_COMMAND)
+ writel(AVS_CPU_L2_INT_MASK,
+ avs_intr_base + AVS_CPU_L2_SET0);
+ up(&priv->sem);
+ }
+
+ return ret ? ret : size;
+}
+
+static struct debugfs_entry *__find_debugfs_entry(const char *name)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(debugfs_entries); i++)
+ if (strcasecmp(debugfs_entries[i].name, name) == 0)
+ return &debugfs_entries[i];
+
+ return NULL;
+}
+
+static int brcm_avs_debug_open(struct inode *inode, struct file *file)
+{
+ struct debugfs_data *data;
+ fmode_t fmode;
+ int ret;
+
+ /*
+ * seq_open(), which is called by single_open(), clears "write" access.
+ * We need write access to some files, so we preserve our access mode
+ * and restore it.
+ */
+ fmode = file->f_mode;
+ /*
+ * Check access permissions even for root. We don't want to be writing
+ * to read-only registers. Access for regular users has already been
+ * checked by the VFS layer.
+ */
+ if ((fmode & FMODE_WRITER) && !(inode->i_mode & S_IWUSR))
+ return -EACCES;
+
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ /*
+ * We use the same file system operations for all our debug files. To
+ * produce specific output, we look up the file name upon opening a
+ * debugfs entry and map it to a memory offset. This offset is then used
+ * in the generic "show" function to read a specific register.
+ */
+ data->entry = __find_debugfs_entry(file->f_path.dentry->d_iname);
+ data->priv = inode->i_private;
+
+ ret = single_open(file, brcm_avs_debug_show, data);
+ if (ret)
+ kfree(data);
+ file->f_mode = fmode;
+
+ return ret;
+}
+
+static int brcm_avs_debug_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq_priv = file->private_data;
+ struct debugfs_data *data = seq_priv->private;
+
+ kfree(data);
+ return single_release(inode, file);
+}
+
+static const struct file_operations brcm_avs_debug_ops = {
+ .open = brcm_avs_debug_open,
+ .read = seq_read,
+ .write = brcm_avs_seq_write,
+ .llseek = seq_lseek,
+ .release = brcm_avs_debug_release,
+};
+
+static void brcm_avs_cpufreq_debug_init(struct platform_device *pdev)
+{
+ struct private_data *priv = platform_get_drvdata(pdev);
+ struct dentry *dir;
+ int i;
+
+ if (!priv)
+ return;
+
+ dir = debugfs_create_dir(BRCM_AVS_CPUFREQ_NAME, NULL);
+ if (IS_ERR_OR_NULL(dir))
+ return;
+ priv->debugfs = dir;
+
+ for (i = 0; i < ARRAY_SIZE(debugfs_entries); i++) {
+ /*
+ * The DEBUGFS_ENTRY macro generates uppercase strings. We
+ * convert them to lowercase before creating the debugfs
+ * entries.
+ */
+ char *entry = __strtolower(debugfs_entries[i].name);
+ fmode_t mode = debugfs_entries[i].mode;
+
+ if (!debugfs_create_file(entry, S_IFREG | S_IRUGO | mode,
+ dir, priv, &brcm_avs_debug_ops)) {
+ priv->debugfs = NULL;
+ debugfs_remove_recursive(dir);
+ break;
+ }
+ }
+}
+
+static void brcm_avs_cpufreq_debug_exit(struct platform_device *pdev)
+{
+ struct private_data *priv = platform_get_drvdata(pdev);
+
+ if (priv && priv->debugfs) {
+ debugfs_remove_recursive(priv->debugfs);
+ priv->debugfs = NULL;
+ }
+}
+
+#else
+
+static void brcm_avs_cpufreq_debug_init(struct platform_device *pdev) {}
+static void brcm_avs_cpufreq_debug_exit(struct platform_device *pdev) {}
+
+#endif /* CONFIG_ARM_BRCMSTB_AVS_CPUFREQ_DEBUG */
+
+/*
+ * To ensure the right firmware is running we need to
+ * - check the MAGIC matches what we expect
+ * - brcm_avs_get_pmap() doesn't return -ENOTSUPP or -EINVAL
+ * We need to set up our interrupt handling before calling brcm_avs_get_pmap()!
+ */
+static bool brcm_avs_is_firmware_loaded(struct private_data *priv)
+{
+ u32 magic;
+ int rc;
+
+ rc = brcm_avs_get_pmap(priv, NULL);
+ magic = readl(priv->base + AVS_MBOX_MAGIC);
+
+ return (magic == AVS_FIRMWARE_MAGIC) && (rc != -ENOTSUPP) &&
+ (rc != -EINVAL);
+}
+
+static unsigned int brcm_avs_cpufreq_get(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ struct private_data *priv = policy->driver_data;
+
+ return brcm_avs_get_frequency(priv->base);
+}
+
+static int brcm_avs_target_index(struct cpufreq_policy *policy,
+ unsigned int index)
+{
+ return brcm_avs_set_pstate(policy->driver_data,
+ policy->freq_table[index].driver_data);
+}
+
+static int brcm_avs_suspend(struct cpufreq_policy *policy)
+{
+ struct private_data *priv = policy->driver_data;
+
+ return brcm_avs_get_pmap(priv, &priv->pmap);
+}
+
+static int brcm_avs_resume(struct cpufreq_policy *policy)
+{
+ struct private_data *priv = policy->driver_data;
+ int ret;
+
+ ret = brcm_avs_set_pmap(priv, &priv->pmap);
+ if (ret == -EEXIST) {
+ struct platform_device *pdev = cpufreq_get_driver_data();
+ struct device *dev = &pdev->dev;
+
+ dev_warn(dev, "PMAP was already set\n");
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/*
+ * All initialization code that we only want to execute once goes here. Setup
+ * code that can be re-tried on every core (if it failed before) can go into
+ * brcm_avs_cpufreq_init().
+ */
+static int brcm_avs_prepare_init(struct platform_device *pdev)
+{
+ struct private_data *priv;
+ struct device *dev;
+ int host_irq, ret;
+
+ dev = &pdev->dev;
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->dev = dev;
+ sema_init(&priv->sem, 1);
+ init_completion(&priv->done);
+ platform_set_drvdata(pdev, priv);
+
+ priv->base = __map_region(BRCM_AVS_CPU_DATA);
+ if (!priv->base) {
+ dev_err(dev, "Couldn't find property %s in device tree.\n",
+ BRCM_AVS_CPU_DATA);
+ return -ENOENT;
+ }
+
+ priv->avs_intr_base = __map_region(BRCM_AVS_CPU_INTR);
+ if (!priv->avs_intr_base) {
+ dev_err(dev, "Couldn't find property %s in device tree.\n",
+ BRCM_AVS_CPU_INTR);
+ ret = -ENOENT;
+ goto unmap_base;
+ }
+
+ host_irq = platform_get_irq_byname(pdev, BRCM_AVS_HOST_INTR);
+ if (host_irq < 0) {
+ dev_err(dev, "Couldn't find interrupt %s -- %d\n",
+ BRCM_AVS_HOST_INTR, host_irq);
+ ret = host_irq;
+ goto unmap_intr_base;
+ }
+
+ ret = devm_request_irq(dev, host_irq, irq_handler, IRQF_TRIGGER_RISING,
+ BRCM_AVS_HOST_INTR, priv);
+ if (ret) {
+ dev_err(dev, "IRQ request failed: %s (%d) -- %d\n",
+ BRCM_AVS_HOST_INTR, host_irq, ret);
+ goto unmap_intr_base;
+ }
+
+ if (brcm_avs_is_firmware_loaded(priv))
+ return 0;
+
+ dev_err(dev, "AVS firmware is not loaded or doesn't support DVFS\n");
+ ret = -ENODEV;
+
+unmap_intr_base:
+ iounmap(priv->avs_intr_base);
+unmap_base:
+ iounmap(priv->base);
+ platform_set_drvdata(pdev, NULL);
+
+ return ret;
+}
+
+static int brcm_avs_cpufreq_init(struct cpufreq_policy *policy)
+{
+ struct cpufreq_frequency_table *freq_table;
+ struct platform_device *pdev;
+ struct private_data *priv;
+ struct device *dev;
+ int ret;
+
+ pdev = cpufreq_get_driver_data();
+ priv = platform_get_drvdata(pdev);
+ policy->driver_data = priv;
+ dev = &pdev->dev;
+
+ freq_table = brcm_avs_get_freq_table(dev, priv);
+ if (IS_ERR(freq_table)) {
+ ret = PTR_ERR(freq_table);
+ dev_err(dev, "Couldn't determine frequency table (%d).\n", ret);
+ return ret;
+ }
+
+ ret = cpufreq_table_validate_and_show(policy, freq_table);
+ if (ret) {
+ dev_err(dev, "invalid frequency table: %d\n", ret);
+ return ret;
+ }
+
+ /* All cores share the same clock and thus the same policy. */
+ cpumask_setall(policy->cpus);
+
+ ret = __issue_avs_command(priv, AVS_CMD_ENABLE, false, NULL);
+ if (!ret) {
+ unsigned int pstate;
+
+ ret = brcm_avs_get_pstate(priv, &pstate);
+ if (!ret) {
+ policy->cur = freq_table[pstate].frequency;
+ dev_info(dev, "registered\n");
+ return 0;
+ }
+ }
+
+ dev_err(dev, "couldn't initialize driver (%d)\n", ret);
+
+ return ret;
+}
+
+static ssize_t show_brcm_avs_pstate(struct cpufreq_policy *policy, char *buf)
+{
+ struct private_data *priv = policy->driver_data;
+ unsigned int pstate;
+
+ if (brcm_avs_get_pstate(priv, &pstate))
+ return sprintf(buf, "<unknown>\n");
+
+ return sprintf(buf, "%u\n", pstate);
+}
+
+static ssize_t show_brcm_avs_mode(struct cpufreq_policy *policy, char *buf)
+{
+ struct private_data *priv = policy->driver_data;
+ struct pmap pmap;
+
+ if (brcm_avs_get_pmap(priv, &pmap))
+ return sprintf(buf, "<unknown>\n");
+
+ return sprintf(buf, "%s %u\n", brcm_avs_mode_to_string(pmap.mode),
+ pmap.mode);
+}
+
+static ssize_t show_brcm_avs_pmap(struct cpufreq_policy *policy, char *buf)
+{
+ unsigned int mdiv_p0, mdiv_p1, mdiv_p2, mdiv_p3, mdiv_p4;
+ struct private_data *priv = policy->driver_data;
+ unsigned int ndiv, pdiv;
+ struct pmap pmap;
+
+ if (brcm_avs_get_pmap(priv, &pmap))
+ return sprintf(buf, "<unknown>\n");
+
+ brcm_avs_parse_p1(pmap.p1, &mdiv_p0, &pdiv, &ndiv);
+ brcm_avs_parse_p2(pmap.p2, &mdiv_p1, &mdiv_p2, &mdiv_p3, &mdiv_p4);
+
+ return sprintf(buf, "0x%08x 0x%08x %u %u %u %u %u %u %u\n",
+ pmap.p1, pmap.p2, ndiv, pdiv, mdiv_p0, mdiv_p1, mdiv_p2,
+ mdiv_p3, mdiv_p4);
+}
+
+static ssize_t show_brcm_avs_voltage(struct cpufreq_policy *policy, char *buf)
+{
+ struct private_data *priv = policy->driver_data;
+
+ return sprintf(buf, "0x%08lx\n", brcm_avs_get_voltage(priv->base));
+}
+
+static ssize_t show_brcm_avs_frequency(struct cpufreq_policy *policy, char *buf)
+{
+ struct private_data *priv = policy->driver_data;
+
+ return sprintf(buf, "0x%08lx\n", brcm_avs_get_frequency(priv->base));
+}
+
+cpufreq_freq_attr_ro(brcm_avs_pstate);
+cpufreq_freq_attr_ro(brcm_avs_mode);
+cpufreq_freq_attr_ro(brcm_avs_pmap);
+cpufreq_freq_attr_ro(brcm_avs_voltage);
+cpufreq_freq_attr_ro(brcm_avs_frequency);
+
+static struct freq_attr *brcm_avs_cpufreq_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ &brcm_avs_pstate,
+ &brcm_avs_mode,
+ &brcm_avs_pmap,
+ &brcm_avs_voltage,
+ &brcm_avs_frequency,
+ NULL
+};
+
+static struct cpufreq_driver brcm_avs_driver = {
+ .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK,
+ .verify = cpufreq_generic_frequency_table_verify,
+ .target_index = brcm_avs_target_index,
+ .get = brcm_avs_cpufreq_get,
+ .suspend = brcm_avs_suspend,
+ .resume = brcm_avs_resume,
+ .init = brcm_avs_cpufreq_init,
+ .attr = brcm_avs_cpufreq_attr,
+ .name = BRCM_AVS_CPUFREQ_PREFIX,
+};
+
+static int brcm_avs_cpufreq_probe(struct platform_device *pdev)
+{
+ int ret;
+
+ ret = brcm_avs_prepare_init(pdev);
+ if (ret)
+ return ret;
+
+ brcm_avs_driver.driver_data = pdev;
+ ret = cpufreq_register_driver(&brcm_avs_driver);
+ if (!ret)
+ brcm_avs_cpufreq_debug_init(pdev);
+
+ return ret;
+}
+
+static int brcm_avs_cpufreq_remove(struct platform_device *pdev)
+{
+ struct private_data *priv;
+ int ret;
+
+ ret = cpufreq_unregister_driver(&brcm_avs_driver);
+ if (ret)
+ return ret;
+
+ brcm_avs_cpufreq_debug_exit(pdev);
+
+ priv = platform_get_drvdata(pdev);
+ iounmap(priv->base);
+ iounmap(priv->avs_intr_base);
+ platform_set_drvdata(pdev, NULL);
+
+ return 0;
+}
+
+static const struct of_device_id brcm_avs_cpufreq_match[] = {
+ { .compatible = BRCM_AVS_CPU_DATA },
+ { }
+};
+MODULE_DEVICE_TABLE(of, brcm_avs_cpufreq_match);
+
+static struct platform_driver brcm_avs_cpufreq_platdrv = {
+ .driver = {
+ .name = BRCM_AVS_CPUFREQ_NAME,
+ .of_match_table = brcm_avs_cpufreq_match,
+ },
+ .probe = brcm_avs_cpufreq_probe,
+ .remove = brcm_avs_cpufreq_remove,
+};
+module_platform_driver(brcm_avs_cpufreq_platdrv);
+
+MODULE_AUTHOR("Markus Mayer <mmayer@broadcom.com>");
+MODULE_DESCRIPTION("CPUfreq driver for Broadcom STB AVS");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 4852d9efe74e..e82bb3c30b92 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -247,3 +247,10 @@ MODULE_DESCRIPTION("CPUFreq driver based on the ACPI CPPC v5.0+ spec");
MODULE_LICENSE("GPL");
late_initcall(cppc_cpufreq_init);
+
+static const struct acpi_device_id cppc_acpi_ids[] = {
+ {ACPI_PROCESSOR_DEVICE_HID, },
+ {}
+};
+
+MODULE_DEVICE_TABLE(acpi, cppc_acpi_ids);
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 71267626456b..bc97b6a4b1cf 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -26,6 +26,9 @@ static const struct of_device_id machines[] __initconst = {
{ .compatible = "allwinner,sun8i-a83t", },
{ .compatible = "allwinner,sun8i-h3", },
+ { .compatible = "arm,integrator-ap", },
+ { .compatible = "arm,integrator-cp", },
+
{ .compatible = "hisilicon,hi6220", },
{ .compatible = "fsl,imx27", },
@@ -34,6 +37,8 @@ static const struct of_device_id machines[] __initconst = {
{ .compatible = "fsl,imx7d", },
{ .compatible = "marvell,berlin", },
+ { .compatible = "marvell,pxa250", },
+ { .compatible = "marvell,pxa270", },
{ .compatible = "samsung,exynos3250", },
{ .compatible = "samsung,exynos4210", },
@@ -50,6 +55,8 @@ static const struct of_device_id machines[] __initconst = {
{ .compatible = "renesas,r7s72100", },
{ .compatible = "renesas,r8a73a4", },
{ .compatible = "renesas,r8a7740", },
+ { .compatible = "renesas,r8a7743", },
+ { .compatible = "renesas,r8a7745", },
{ .compatible = "renesas,r8a7778", },
{ .compatible = "renesas,r8a7779", },
{ .compatible = "renesas,r8a7790", },
@@ -72,6 +79,12 @@ static const struct of_device_id machines[] __initconst = {
{ .compatible = "sigma,tango4" },
+ { .compatible = "socionext,uniphier-pro5", },
+ { .compatible = "socionext,uniphier-pxs2", },
+ { .compatible = "socionext,uniphier-ld6b", },
+ { .compatible = "socionext,uniphier-ld11", },
+ { .compatible = "socionext,uniphier-ld20", },
+
{ .compatible = "ti,am33xx", },
{ .compatible = "ti,dra7", },
{ .compatible = "ti,omap2", },
@@ -81,6 +94,8 @@ static const struct of_device_id machines[] __initconst = {
{ .compatible = "xlnx,zynq-7000", },
+ { .compatible = "zte,zx296718", },
+
{ }
};
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 6e6c1fb60fbc..cc475eff90b3 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1526,7 +1526,10 @@ unsigned int cpufreq_get(unsigned int cpu)
if (policy) {
down_read(&policy->rwsem);
- ret_freq = __cpufreq_get(policy);
+
+ if (!policy_is_inactive(policy))
+ ret_freq = __cpufreq_get(policy);
+
up_read(&policy->rwsem);
cpufreq_cpu_put(policy);
@@ -2254,17 +2257,19 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
* Useful for policy notifiers which have different necessities
* at different times.
*/
-int cpufreq_update_policy(unsigned int cpu)
+void cpufreq_update_policy(unsigned int cpu)
{
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
struct cpufreq_policy new_policy;
- int ret;
if (!policy)
- return -ENODEV;
+ return;
down_write(&policy->rwsem);
+ if (policy_is_inactive(policy))
+ goto unlock;
+
pr_debug("updating policy for CPU %u\n", cpu);
memcpy(&new_policy, policy, sizeof(*policy));
new_policy.min = policy->user_policy.min;
@@ -2275,24 +2280,20 @@ int cpufreq_update_policy(unsigned int cpu)
* -> ask driver for current freq and notify governors about a change
*/
if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
- if (cpufreq_suspended) {
- ret = -EAGAIN;
+ if (cpufreq_suspended)
goto unlock;
- }
+
new_policy.cur = cpufreq_update_current_freq(policy);
- if (WARN_ON(!new_policy.cur)) {
- ret = -EIO;
+ if (WARN_ON(!new_policy.cur))
goto unlock;
- }
}
- ret = cpufreq_set_policy(policy, &new_policy);
+ cpufreq_set_policy(policy, &new_policy);
unlock:
up_write(&policy->rwsem);
cpufreq_cpu_put(policy);
- return ret;
}
EXPORT_SYMBOL(cpufreq_update_policy);
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 13475890d792..992f7c20760f 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -37,16 +37,16 @@ struct cs_dbs_tuners {
#define DEF_SAMPLING_DOWN_FACTOR (1)
#define MAX_SAMPLING_DOWN_FACTOR (10)
-static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners,
- struct cpufreq_policy *policy)
+static inline unsigned int get_freq_step(struct cs_dbs_tuners *cs_tuners,
+ struct cpufreq_policy *policy)
{
- unsigned int freq_target = (cs_tuners->freq_step * policy->max) / 100;
+ unsigned int freq_step = (cs_tuners->freq_step * policy->max) / 100;
/* max freq cannot be less than 100. But who knows... */
- if (unlikely(freq_target == 0))
- freq_target = DEF_FREQUENCY_STEP;
+ if (unlikely(freq_step == 0))
+ freq_step = DEF_FREQUENCY_STEP;
- return freq_target;
+ return freq_step;
}
/*
@@ -55,10 +55,10 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners,
* sampling_down_factor, we check, if current idle time is more than 80%
* (default), then we try to decrease frequency
*
- * Any frequency increase takes it to the maximum frequency. Frequency reduction
- * happens at minimum steps of 5% (default) of maximum frequency
+ * Frequency updates happen at minimum steps of 5% (default) of maximum
+ * frequency
*/
-static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
+static unsigned int cs_dbs_update(struct cpufreq_policy *policy)
{
struct policy_dbs_info *policy_dbs = policy->governor_data;
struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs);
@@ -66,6 +66,7 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
struct dbs_data *dbs_data = policy_dbs->dbs_data;
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
unsigned int load = dbs_update(policy);
+ unsigned int freq_step;
/*
* break out if we 'cannot' reduce the speed as the user might
@@ -82,6 +83,23 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
if (requested_freq > policy->max || requested_freq < policy->min)
requested_freq = policy->cur;
+ freq_step = get_freq_step(cs_tuners, policy);
+
+ /*
+ * Decrease requested_freq one freq_step for each idle period that
+ * we didn't update the frequency.
+ */
+ if (policy_dbs->idle_periods < UINT_MAX) {
+ unsigned int freq_steps = policy_dbs->idle_periods * freq_step;
+
+ if (requested_freq > freq_steps)
+ requested_freq -= freq_steps;
+ else
+ requested_freq = policy->min;
+
+ policy_dbs->idle_periods = UINT_MAX;
+ }
+
/* Check for frequency increase */
if (load > dbs_data->up_threshold) {
dbs_info->down_skip = 0;
@@ -90,7 +108,7 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
if (requested_freq == policy->max)
goto out;
- requested_freq += get_freq_target(cs_tuners, policy);
+ requested_freq += freq_step;
if (requested_freq > policy->max)
requested_freq = policy->max;
@@ -106,16 +124,14 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
/* Check for frequency decrease */
if (load < cs_tuners->down_threshold) {
- unsigned int freq_target;
/*
* if we cannot reduce the frequency anymore, break out early
*/
if (requested_freq == policy->min)
goto out;
- freq_target = get_freq_target(cs_tuners, policy);
- if (requested_freq > freq_target)
- requested_freq -= freq_target;
+ if (requested_freq > freq_step)
+ requested_freq -= freq_step;
else
requested_freq = policy->min;
@@ -305,7 +321,7 @@ static void cs_start(struct cpufreq_policy *policy)
static struct dbs_governor cs_governor = {
.gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"),
.kobj_type = { .default_attrs = cs_attributes },
- .gov_dbs_timer = cs_dbs_timer,
+ .gov_dbs_update = cs_dbs_update,
.alloc = cs_alloc,
.free = cs_free,
.init = cs_init,
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 642dd0f183a8..0196467280bd 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -61,7 +61,7 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
* entries can't be freed concurrently.
*/
list_for_each_entry(policy_dbs, &attr_set->policy_list, list) {
- mutex_lock(&policy_dbs->timer_mutex);
+ mutex_lock(&policy_dbs->update_mutex);
/*
* On 32-bit architectures this may race with the
* sample_delay_ns read in dbs_update_util_handler(), but that
@@ -76,7 +76,7 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
* taken, so it shouldn't be significant.
*/
gov_update_sample_delay(policy_dbs, 0);
- mutex_unlock(&policy_dbs->timer_mutex);
+ mutex_unlock(&policy_dbs->update_mutex);
}
return count;
@@ -117,7 +117,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
struct policy_dbs_info *policy_dbs = policy->governor_data;
struct dbs_data *dbs_data = policy_dbs->dbs_data;
unsigned int ignore_nice = dbs_data->ignore_nice_load;
- unsigned int max_load = 0;
+ unsigned int max_load = 0, idle_periods = UINT_MAX;
unsigned int sampling_rate, io_busy, j;
/*
@@ -215,9 +215,19 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
j_cdbs->prev_load = load;
}
+ if (time_elapsed > 2 * sampling_rate) {
+ unsigned int periods = time_elapsed / sampling_rate;
+
+ if (periods < idle_periods)
+ idle_periods = periods;
+ }
+
if (load > max_load)
max_load = load;
}
+
+ policy_dbs->idle_periods = idle_periods;
+
return max_load;
}
EXPORT_SYMBOL_GPL(dbs_update);
@@ -236,9 +246,9 @@ static void dbs_work_handler(struct work_struct *work)
* Make sure cpufreq_governor_limits() isn't evaluating load or the
* ondemand governor isn't updating the sampling rate in parallel.
*/
- mutex_lock(&policy_dbs->timer_mutex);
- gov_update_sample_delay(policy_dbs, gov->gov_dbs_timer(policy));
- mutex_unlock(&policy_dbs->timer_mutex);
+ mutex_lock(&policy_dbs->update_mutex);
+ gov_update_sample_delay(policy_dbs, gov->gov_dbs_update(policy));
+ mutex_unlock(&policy_dbs->update_mutex);
/* Allow the utilization update handler to queue up more work. */
atomic_set(&policy_dbs->work_count, 0);
@@ -348,7 +358,7 @@ static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *poli
return NULL;
policy_dbs->policy = policy;
- mutex_init(&policy_dbs->timer_mutex);
+ mutex_init(&policy_dbs->update_mutex);
atomic_set(&policy_dbs->work_count, 0);
init_irq_work(&policy_dbs->irq_work, dbs_irq_work);
INIT_WORK(&policy_dbs->work, dbs_work_handler);
@@ -367,7 +377,7 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs,
{
int j;
- mutex_destroy(&policy_dbs->timer_mutex);
+ mutex_destroy(&policy_dbs->update_mutex);
for_each_cpu(j, policy_dbs->policy->related_cpus) {
struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
@@ -547,10 +557,10 @@ void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy)
{
struct policy_dbs_info *policy_dbs = policy->governor_data;
- mutex_lock(&policy_dbs->timer_mutex);
+ mutex_lock(&policy_dbs->update_mutex);
cpufreq_policy_apply_limits(policy);
gov_update_sample_delay(policy_dbs, 0);
- mutex_unlock(&policy_dbs->timer_mutex);
+ mutex_unlock(&policy_dbs->update_mutex);
}
EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_limits);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index ef1037e9c92b..f5717ca070cc 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -85,7 +85,7 @@ struct policy_dbs_info {
* Per policy mutex that serializes load evaluation from limit-change
* and work-handler.
*/
- struct mutex timer_mutex;
+ struct mutex update_mutex;
u64 last_sample_time;
s64 sample_delay_ns;
@@ -97,6 +97,7 @@ struct policy_dbs_info {
struct list_head list;
/* Multiplier for increasing sample delay temporarily. */
unsigned int rate_mult;
+ unsigned int idle_periods; /* For conservative */
/* Status indicators */
bool is_shared; /* This object is used by multiple CPUs */
bool work_in_progress; /* Work is being queued up or in progress */
@@ -135,7 +136,7 @@ struct dbs_governor {
*/
struct dbs_data *gdbs_data;
- unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy);
+ unsigned int (*gov_dbs_update)(struct cpufreq_policy *policy);
struct policy_dbs_info *(*alloc)(void);
void (*free)(struct policy_dbs_info *policy_dbs);
int (*init)(struct dbs_data *dbs_data);
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 3a1f49f5f4c6..4a017e895296 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -25,7 +25,7 @@
#define MAX_SAMPLING_DOWN_FACTOR (100000)
#define MICRO_FREQUENCY_UP_THRESHOLD (95)
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
-#define MIN_FREQUENCY_UP_THRESHOLD (11)
+#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)
static struct od_ops od_ops;
@@ -169,7 +169,7 @@ static void od_update(struct cpufreq_policy *policy)
}
}
-static unsigned int od_dbs_timer(struct cpufreq_policy *policy)
+static unsigned int od_dbs_update(struct cpufreq_policy *policy)
{
struct policy_dbs_info *policy_dbs = policy->governor_data;
struct dbs_data *dbs_data = policy_dbs->dbs_data;
@@ -191,7 +191,7 @@ static unsigned int od_dbs_timer(struct cpufreq_policy *policy)
od_update(policy);
if (dbs_info->freq_lo) {
- /* Setup timer for SUB_SAMPLE */
+ /* Setup SUB_SAMPLE */
dbs_info->sample_type = OD_SUB_SAMPLE;
return dbs_info->freq_hi_delay_us;
}
@@ -255,11 +255,11 @@ static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
list_for_each_entry(policy_dbs, &attr_set->policy_list, list) {
/*
* Doing this without locking might lead to using different
- * rate_mult values in od_update() and od_dbs_timer().
+ * rate_mult values in od_update() and od_dbs_update().
*/
- mutex_lock(&policy_dbs->timer_mutex);
+ mutex_lock(&policy_dbs->update_mutex);
policy_dbs->rate_mult = 1;
- mutex_unlock(&policy_dbs->timer_mutex);
+ mutex_unlock(&policy_dbs->update_mutex);
}
return count;
@@ -374,8 +374,7 @@ static int od_init(struct dbs_data *dbs_data)
dbs_data->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
/*
* In nohz/micro accounting case we set the minimum frequency
- * not depending on HZ, but fixed (very low). The deferred
- * timer might skip some samples if idle/sleeping as needed.
+ * not depending on HZ, but fixed (very low).
*/
dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
} else {
@@ -415,7 +414,7 @@ static struct od_ops od_ops = {
static struct dbs_governor od_dbs_gov = {
.gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("ondemand"),
.kobj_type = { .default_attrs = od_attributes },
- .gov_dbs_timer = od_dbs_timer,
+ .gov_dbs_update = od_dbs_update,
.alloc = od_alloc,
.free = od_free,
.init = od_init,
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 06d3abdffd3a..ac284e66839c 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -41,6 +41,18 @@ static int cpufreq_stats_update(struct cpufreq_stats *stats)
return 0;
}
+static void cpufreq_stats_clear_table(struct cpufreq_stats *stats)
+{
+ unsigned int count = stats->max_state;
+
+ memset(stats->time_in_state, 0, count * sizeof(u64));
+#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
+ memset(stats->trans_table, 0, count * count * sizeof(int));
+#endif
+ stats->last_time = get_jiffies_64();
+ stats->total_trans = 0;
+}
+
static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf)
{
return sprintf(buf, "%d\n", policy->stats->total_trans);
@@ -64,6 +76,14 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
return len;
}
+static ssize_t store_reset(struct cpufreq_policy *policy, const char *buf,
+ size_t count)
+{
+ /* We don't care what is written to the attribute. */
+ cpufreq_stats_clear_table(policy->stats);
+ return count;
+}
+
#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
{
@@ -113,10 +133,12 @@ cpufreq_freq_attr_ro(trans_table);
cpufreq_freq_attr_ro(total_trans);
cpufreq_freq_attr_ro(time_in_state);
+cpufreq_freq_attr_wo(reset);
static struct attribute *default_attrs[] = {
&total_trans.attr,
&time_in_state.attr,
+ &reset.attr,
#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
&trans_table.attr,
#endif
diff --git a/drivers/cpufreq/integrator-cpufreq.c b/drivers/cpufreq/integrator-cpufreq.c
deleted file mode 100644
index 79e3ff2771a6..000000000000
--- a/drivers/cpufreq/integrator-cpufreq.c
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (C) 2001-2002 Deep Blue Solutions Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * CPU support functions
- */
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/cpufreq.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-
-#include <asm/mach-types.h>
-#include <asm/hardware/icst.h>
-
-static void __iomem *cm_base;
-/* The cpufreq driver only use the OSC register */
-#define INTEGRATOR_HDR_OSC_OFFSET 0x08
-#define INTEGRATOR_HDR_LOCK_OFFSET 0x14
-
-static struct cpufreq_driver integrator_driver;
-
-static const struct icst_params lclk_params = {
- .ref = 24000000,
- .vco_max = ICST525_VCO_MAX_5V,
- .vco_min = ICST525_VCO_MIN,
- .vd_min = 8,
- .vd_max = 132,
- .rd_min = 24,
- .rd_max = 24,
- .s2div = icst525_s2div,
- .idx2s = icst525_idx2s,
-};
-
-static const struct icst_params cclk_params = {
- .ref = 24000000,
- .vco_max = ICST525_VCO_MAX_5V,
- .vco_min = ICST525_VCO_MIN,
- .vd_min = 12,
- .vd_max = 160,
- .rd_min = 24,
- .rd_max = 24,
- .s2div = icst525_s2div,
- .idx2s = icst525_idx2s,
-};
-
-/*
- * Validate the speed policy.
- */
-static int integrator_verify_policy(struct cpufreq_policy *policy)
-{
- struct icst_vco vco;
-
- cpufreq_verify_within_cpu_limits(policy);
-
- vco = icst_hz_to_vco(&cclk_params, policy->max * 1000);
- policy->max = icst_hz(&cclk_params, vco) / 1000;
-
- vco = icst_hz_to_vco(&cclk_params, policy->min * 1000);
- policy->min = icst_hz(&cclk_params, vco) / 1000;
-
- cpufreq_verify_within_cpu_limits(policy);
- return 0;
-}
-
-
-static int integrator_set_target(struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation)
-{
- cpumask_t cpus_allowed;
- int cpu = policy->cpu;
- struct icst_vco vco;
- struct cpufreq_freqs freqs;
- u_int cm_osc;
-
- /*
- * Save this threads cpus_allowed mask.
- */
- cpus_allowed = current->cpus_allowed;
-
- /*
- * Bind to the specified CPU. When this call returns,
- * we should be running on the right CPU.
- */
- set_cpus_allowed_ptr(current, cpumask_of(cpu));
- BUG_ON(cpu != smp_processor_id());
-
- /* get current setting */
- cm_osc = __raw_readl(cm_base + INTEGRATOR_HDR_OSC_OFFSET);
-
- if (machine_is_integrator())
- vco.s = (cm_osc >> 8) & 7;
- else if (machine_is_cintegrator())
- vco.s = 1;
- vco.v = cm_osc & 255;
- vco.r = 22;
- freqs.old = icst_hz(&cclk_params, vco) / 1000;
-
- /* icst_hz_to_vco rounds down -- so we need the next
- * larger freq in case of CPUFREQ_RELATION_L.
- */
- if (relation == CPUFREQ_RELATION_L)
- target_freq += 999;
- if (target_freq > policy->max)
- target_freq = policy->max;
- vco = icst_hz_to_vco(&cclk_params, target_freq * 1000);
- freqs.new = icst_hz(&cclk_params, vco) / 1000;
-
- if (freqs.old == freqs.new) {
- set_cpus_allowed_ptr(current, &cpus_allowed);
- return 0;
- }
-
- cpufreq_freq_transition_begin(policy, &freqs);
-
- cm_osc = __raw_readl(cm_base + INTEGRATOR_HDR_OSC_OFFSET);
-
- if (machine_is_integrator()) {
- cm_osc &= 0xfffff800;
- cm_osc |= vco.s << 8;
- } else if (machine_is_cintegrator()) {
- cm_osc &= 0xffffff00;
- }
- cm_osc |= vco.v;
-
- __raw_writel(0xa05f, cm_base + INTEGRATOR_HDR_LOCK_OFFSET);
- __raw_writel(cm_osc, cm_base + INTEGRATOR_HDR_OSC_OFFSET);
- __raw_writel(0, cm_base + INTEGRATOR_HDR_LOCK_OFFSET);
-
- /*
- * Restore the CPUs allowed mask.
- */
- set_cpus_allowed_ptr(current, &cpus_allowed);
-
- cpufreq_freq_transition_end(policy, &freqs, 0);
-
- return 0;
-}
-
-static unsigned int integrator_get(unsigned int cpu)
-{
- cpumask_t cpus_allowed;
- unsigned int current_freq;
- u_int cm_osc;
- struct icst_vco vco;
-
- cpus_allowed = current->cpus_allowed;
-
- set_cpus_allowed_ptr(current, cpumask_of(cpu));
- BUG_ON(cpu != smp_processor_id());
-
- /* detect memory etc. */
- cm_osc = __raw_readl(cm_base + INTEGRATOR_HDR_OSC_OFFSET);
-
- if (machine_is_integrator())
- vco.s = (cm_osc >> 8) & 7;
- else
- vco.s = 1;
- vco.v = cm_osc & 255;
- vco.r = 22;
-
- current_freq = icst_hz(&cclk_params, vco) / 1000; /* current freq */
-
- set_cpus_allowed_ptr(current, &cpus_allowed);
-
- return current_freq;
-}
-
-static int integrator_cpufreq_init(struct cpufreq_policy *policy)
-{
-
- /* set default policy and cpuinfo */
- policy->max = policy->cpuinfo.max_freq = 160000;
- policy->min = policy->cpuinfo.min_freq = 12000;
- policy->cpuinfo.transition_latency = 1000000; /* 1 ms, assumed */
-
- return 0;
-}
-
-static struct cpufreq_driver integrator_driver = {
- .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK,
- .verify = integrator_verify_policy,
- .target = integrator_set_target,
- .get = integrator_get,
- .init = integrator_cpufreq_init,
- .name = "integrator",
-};
-
-static int __init integrator_cpufreq_probe(struct platform_device *pdev)
-{
- struct resource *res;
-
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res)
- return -ENODEV;
-
- cm_base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
- if (!cm_base)
- return -ENODEV;
-
- return cpufreq_register_driver(&integrator_driver);
-}
-
-static int __exit integrator_cpufreq_remove(struct platform_device *pdev)
-{
- return cpufreq_unregister_driver(&integrator_driver);
-}
-
-static const struct of_device_id integrator_cpufreq_match[] = {
- { .compatible = "arm,core-module-integrator"},
- { },
-};
-
-MODULE_DEVICE_TABLE(of, integrator_cpufreq_match);
-
-static struct platform_driver integrator_cpufreq_driver = {
- .driver = {
- .name = "integrator-cpufreq",
- .of_match_table = integrator_cpufreq_match,
- },
- .remove = __exit_p(integrator_cpufreq_remove),
-};
-
-module_platform_driver_probe(integrator_cpufreq_driver,
- integrator_cpufreq_probe);
-
-MODULE_AUTHOR("Russell M. King");
-MODULE_DESCRIPTION("cpufreq driver for ARM Integrator CPUs");
-MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 4737520ec823..7cd0177ddeaf 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -37,6 +37,8 @@
#include <asm/cpufeature.h>
#include <asm/intel-family.h>
+#define INTEL_CPUFREQ_TRANSITION_LATENCY 20000
+
#define ATOM_RATIOS 0x66a
#define ATOM_VIDS 0x66b
#define ATOM_TURBO_RATIOS 0x66c
@@ -52,6 +54,8 @@
#define EXT_BITS 6
#define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
+#define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS)
+#define int_ext_tofp(X) ((int64_t)(X) << EXT_FRAC_BITS)
static inline int32_t mul_fp(int32_t x, int32_t y)
{
@@ -122,6 +126,8 @@ struct sample {
* @scaling: Scaling factor to convert frequency to cpufreq
* frequency units
* @turbo_pstate: Max Turbo P state possible for this platform
+ * @max_freq: @max_pstate frequency in cpufreq units
+ * @turbo_freq: @turbo_pstate frequency in cpufreq units
*
* Stores the per cpu model P state limits and current P state.
*/
@@ -132,6 +138,8 @@ struct pstate_data {
int max_pstate_physical;
int scaling;
int turbo_pstate;
+ unsigned int max_freq;
+ unsigned int turbo_freq;
};
/**
@@ -177,6 +185,48 @@ struct _pid {
};
/**
+ * struct perf_limits - Store user and policy limits
+ * @no_turbo: User requested turbo state from intel_pstate sysfs
+ * @turbo_disabled: Platform turbo status either from msr
+ * MSR_IA32_MISC_ENABLE or when maximum available pstate
+ * matches the maximum turbo pstate
+ * @max_perf_pct: Effective maximum performance limit in percentage, this
+ * is minimum of either limits enforced by cpufreq policy
+ * or limits from user set limits via intel_pstate sysfs
+ * @min_perf_pct: Effective minimum performance limit in percentage, this
+ * is maximum of either limits enforced by cpufreq policy
+ * or limits from user set limits via intel_pstate sysfs
+ * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct
+ * This value is used to limit max pstate
+ * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct
+ * This value is used to limit min pstate
+ * @max_policy_pct: The maximum performance in percentage enforced by
+ * cpufreq setpolicy interface
+ * @max_sysfs_pct: The maximum performance in percentage enforced by
+ * intel pstate sysfs interface, unused when per cpu
+ * controls are enforced
+ * @min_policy_pct: The minimum performance in percentage enforced by
+ * cpufreq setpolicy interface
+ * @min_sysfs_pct: The minimum performance in percentage enforced by
+ * intel pstate sysfs interface, unused when per cpu
+ * controls are enforced
+ *
+ * Storage for user and policy defined limits.
+ */
+struct perf_limits {
+ int no_turbo;
+ int turbo_disabled;
+ int max_perf_pct;
+ int min_perf_pct;
+ int32_t max_perf;
+ int32_t min_perf;
+ int max_policy_pct;
+ int max_sysfs_pct;
+ int min_policy_pct;
+ int min_sysfs_pct;
+};
+
+/**
* struct cpudata - Per CPU instance data storage
* @cpu: CPU number for this instance data
* @policy: CPUFreq policy value
@@ -194,8 +244,19 @@ struct _pid {
* @prev_cummulative_iowait: IO Wait time difference from last and
* current sample
* @sample: Storage for storing last Sample data
+ * @perf_limits: Pointer to perf_limit unique to this CPU
+ * Not all field in the structure are applicable
+ * when per cpu controls are enforced
* @acpi_perf_data: Stores ACPI perf information read from _PSS
* @valid_pss_table: Set to true for valid ACPI _PSS entries found
+ * @epp_powersave: Last saved HWP energy performance preference
+ * (EPP) or energy performance bias (EPB),
+ * when policy switched to performance
+ * @epp_policy: Last saved policy used to set EPP/EPB
+ * @epp_default: Power on default HWP energy performance
+ * preference/bias
+ * @epp_saved: Saved EPP/EPB during system suspend or CPU offline
+ * operation
*
* This structure stores per CPU instance data for all CPUs.
*/
@@ -217,11 +278,16 @@ struct cpudata {
u64 prev_tsc;
u64 prev_cummulative_iowait;
struct sample sample;
+ struct perf_limits *perf_limits;
#ifdef CONFIG_ACPI
struct acpi_processor_performance acpi_perf_data;
bool valid_pss_table;
#endif
unsigned int iowait_boost;
+ s16 epp_powersave;
+ s16 epp_policy;
+ s16 epp_default;
+ s16 epp_saved;
};
static struct cpudata **all_cpu_data;
@@ -235,7 +301,6 @@ static struct cpudata **all_cpu_data;
* @p_gain_pct: PID proportional gain
* @i_gain_pct: PID integral gain
* @d_gain_pct: PID derivative gain
- * @boost_iowait: Whether or not to use iowait boosting.
*
* Stores per CPU model static PID configuration data.
*/
@@ -247,7 +312,6 @@ struct pstate_adjust_policy {
int p_gain_pct;
int d_gain_pct;
int i_gain_pct;
- bool boost_iowait;
};
/**
@@ -291,58 +355,19 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
static struct pstate_adjust_policy pid_params __read_mostly;
static struct pstate_funcs pstate_funcs __read_mostly;
static int hwp_active __read_mostly;
+static bool per_cpu_limits __read_mostly;
#ifdef CONFIG_ACPI
static bool acpi_ppc;
#endif
-/**
- * struct perf_limits - Store user and policy limits
- * @no_turbo: User requested turbo state from intel_pstate sysfs
- * @turbo_disabled: Platform turbo status either from msr
- * MSR_IA32_MISC_ENABLE or when maximum available pstate
- * matches the maximum turbo pstate
- * @max_perf_pct: Effective maximum performance limit in percentage, this
- * is minimum of either limits enforced by cpufreq policy
- * or limits from user set limits via intel_pstate sysfs
- * @min_perf_pct: Effective minimum performance limit in percentage, this
- * is maximum of either limits enforced by cpufreq policy
- * or limits from user set limits via intel_pstate sysfs
- * @max_perf: This is a scaled value between 0 to 255 for max_perf_pct
- * This value is used to limit max pstate
- * @min_perf: This is a scaled value between 0 to 255 for min_perf_pct
- * This value is used to limit min pstate
- * @max_policy_pct: The maximum performance in percentage enforced by
- * cpufreq setpolicy interface
- * @max_sysfs_pct: The maximum performance in percentage enforced by
- * intel pstate sysfs interface
- * @min_policy_pct: The minimum performance in percentage enforced by
- * cpufreq setpolicy interface
- * @min_sysfs_pct: The minimum performance in percentage enforced by
- * intel pstate sysfs interface
- *
- * Storage for user and policy defined limits.
- */
-struct perf_limits {
- int no_turbo;
- int turbo_disabled;
- int max_perf_pct;
- int min_perf_pct;
- int32_t max_perf;
- int32_t min_perf;
- int max_policy_pct;
- int max_sysfs_pct;
- int min_policy_pct;
- int min_sysfs_pct;
-};
-
static struct perf_limits performance_limits = {
.no_turbo = 0,
.turbo_disabled = 0,
.max_perf_pct = 100,
- .max_perf = int_tofp(1),
+ .max_perf = int_ext_tofp(1),
.min_perf_pct = 100,
- .min_perf = int_tofp(1),
+ .min_perf = int_ext_tofp(1),
.max_policy_pct = 100,
.max_sysfs_pct = 100,
.min_policy_pct = 0,
@@ -353,7 +378,7 @@ static struct perf_limits powersave_limits = {
.no_turbo = 0,
.turbo_disabled = 0,
.max_perf_pct = 100,
- .max_perf = int_tofp(1),
+ .max_perf = int_ext_tofp(1),
.min_perf_pct = 0,
.min_perf = 0,
.max_policy_pct = 100,
@@ -368,6 +393,8 @@ static struct perf_limits *limits = &performance_limits;
static struct perf_limits *limits = &powersave_limits;
#endif
+static DEFINE_MUTEX(intel_pstate_limits_lock);
+
#ifdef CONFIG_ACPI
static bool intel_pstate_get_ppc_enable_status(void)
@@ -459,11 +486,11 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
}
#else
-static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
+static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
{
}
-static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
{
}
#endif
@@ -559,24 +586,252 @@ static inline void update_turbo_state(void)
cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
}
+static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
+{
+ u64 epb;
+ int ret;
+
+ if (!static_cpu_has(X86_FEATURE_EPB))
+ return -ENXIO;
+
+ ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ if (ret)
+ return (s16)ret;
+
+ return (s16)(epb & 0x0f);
+}
+
+static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data)
+{
+ s16 epp;
+
+ if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ /*
+ * When hwp_req_data is 0, means that caller didn't read
+ * MSR_HWP_REQUEST, so need to read and get EPP.
+ */
+ if (!hwp_req_data) {
+ epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
+ &hwp_req_data);
+ if (epp)
+ return epp;
+ }
+ epp = (hwp_req_data >> 24) & 0xff;
+ } else {
+ /* When there is no EPP present, HWP uses EPB settings */
+ epp = intel_pstate_get_epb(cpu_data);
+ }
+
+ return epp;
+}
+
+static int intel_pstate_set_epb(int cpu, s16 pref)
+{
+ u64 epb;
+ int ret;
+
+ if (!static_cpu_has(X86_FEATURE_EPB))
+ return -ENXIO;
+
+ ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ if (ret)
+ return ret;
+
+ epb = (epb & ~0x0f) | pref;
+ wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb);
+
+ return 0;
+}
+
+/*
+ * EPP/EPB display strings corresponding to EPP index in the
+ * energy_perf_strings[]
+ * index String
+ *-------------------------------------
+ * 0 default
+ * 1 performance
+ * 2 balance_performance
+ * 3 balance_power
+ * 4 power
+ */
+static const char * const energy_perf_strings[] = {
+ "default",
+ "performance",
+ "balance_performance",
+ "balance_power",
+ "power",
+ NULL
+};
+
+static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
+{
+ s16 epp;
+ int index = -EINVAL;
+
+ epp = intel_pstate_get_epp(cpu_data, 0);
+ if (epp < 0)
+ return epp;
+
+ if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ /*
+ * Range:
+ * 0x00-0x3F : Performance
+ * 0x40-0x7F : Balance performance
+ * 0x80-0xBF : Balance power
+ * 0xC0-0xFF : Power
+ * The EPP is a 8 bit value, but our ranges restrict the
+ * value which can be set. Here only using top two bits
+ * effectively.
+ */
+ index = (epp >> 6) + 1;
+ } else if (static_cpu_has(X86_FEATURE_EPB)) {
+ /*
+ * Range:
+ * 0x00-0x03 : Performance
+ * 0x04-0x07 : Balance performance
+ * 0x08-0x0B : Balance power
+ * 0x0C-0x0F : Power
+ * The EPB is a 4 bit value, but our ranges restrict the
+ * value which can be set. Here only using top two bits
+ * effectively.
+ */
+ index = (epp >> 2) + 1;
+ }
+
+ return index;
+}
+
+static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
+ int pref_index)
+{
+ int epp = -EINVAL;
+ int ret;
+
+ if (!pref_index)
+ epp = cpu_data->epp_default;
+
+ mutex_lock(&intel_pstate_limits_lock);
+
+ if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ u64 value;
+
+ ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value);
+ if (ret)
+ goto return_pref;
+
+ value &= ~GENMASK_ULL(31, 24);
+
+ /*
+ * If epp is not default, convert from index into
+ * energy_perf_strings to epp value, by shifting 6
+ * bits left to use only top two bits in epp.
+ * The resultant epp need to shifted by 24 bits to
+ * epp position in MSR_HWP_REQUEST.
+ */
+ if (epp == -EINVAL)
+ epp = (pref_index - 1) << 6;
+
+ value |= (u64)epp << 24;
+ ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
+ } else {
+ if (epp == -EINVAL)
+ epp = (pref_index - 1) << 2;
+ ret = intel_pstate_set_epb(cpu_data->cpu, epp);
+ }
+return_pref:
+ mutex_unlock(&intel_pstate_limits_lock);
+
+ return ret;
+}
+
+static ssize_t show_energy_performance_available_preferences(
+ struct cpufreq_policy *policy, char *buf)
+{
+ int i = 0;
+ int ret = 0;
+
+ while (energy_perf_strings[i] != NULL)
+ ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]);
+
+ ret += sprintf(&buf[ret], "\n");
+
+ return ret;
+}
+
+cpufreq_freq_attr_ro(energy_performance_available_preferences);
+
+static ssize_t store_energy_performance_preference(
+ struct cpufreq_policy *policy, const char *buf, size_t count)
+{
+ struct cpudata *cpu_data = all_cpu_data[policy->cpu];
+ char str_preference[21];
+ int ret, i = 0;
+
+ ret = sscanf(buf, "%20s", str_preference);
+ if (ret != 1)
+ return -EINVAL;
+
+ while (energy_perf_strings[i] != NULL) {
+ if (!strcmp(str_preference, energy_perf_strings[i])) {
+ intel_pstate_set_energy_pref_index(cpu_data, i);
+ return count;
+ }
+ ++i;
+ }
+
+ return -EINVAL;
+}
+
+static ssize_t show_energy_performance_preference(
+ struct cpufreq_policy *policy, char *buf)
+{
+ struct cpudata *cpu_data = all_cpu_data[policy->cpu];
+ int preference;
+
+ preference = intel_pstate_get_energy_pref_index(cpu_data);
+ if (preference < 0)
+ return preference;
+
+ return sprintf(buf, "%s\n", energy_perf_strings[preference]);
+}
+
+cpufreq_freq_attr_rw(energy_performance_preference);
+
+static struct freq_attr *hwp_cpufreq_attrs[] = {
+ &energy_performance_preference,
+ &energy_performance_available_preferences,
+ NULL,
+};
+
static void intel_pstate_hwp_set(const struct cpumask *cpumask)
{
int min, hw_min, max, hw_max, cpu, range, adj_range;
+ struct perf_limits *perf_limits = limits;
u64 value, cap;
for_each_cpu(cpu, cpumask) {
+ int max_perf_pct, min_perf_pct;
+ struct cpudata *cpu_data = all_cpu_data[cpu];
+ s16 epp;
+
+ if (per_cpu_limits)
+ perf_limits = all_cpu_data[cpu]->perf_limits;
+
rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
hw_min = HWP_LOWEST_PERF(cap);
hw_max = HWP_HIGHEST_PERF(cap);
range = hw_max - hw_min;
+ max_perf_pct = perf_limits->max_perf_pct;
+ min_perf_pct = perf_limits->min_perf_pct;
+
rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
- adj_range = limits->min_perf_pct * range / 100;
+ adj_range = min_perf_pct * range / 100;
min = hw_min + adj_range;
value &= ~HWP_MIN_PERF(~0L);
value |= HWP_MIN_PERF(min);
- adj_range = limits->max_perf_pct * range / 100;
+ adj_range = max_perf_pct * range / 100;
max = hw_min + adj_range;
if (limits->no_turbo) {
hw_max = HWP_GUARANTEED_PERF(cap);
@@ -586,6 +841,53 @@ static void intel_pstate_hwp_set(const struct cpumask *cpumask)
value &= ~HWP_MAX_PERF(~0L);
value |= HWP_MAX_PERF(max);
+
+ if (cpu_data->epp_policy == cpu_data->policy)
+ goto skip_epp;
+
+ cpu_data->epp_policy = cpu_data->policy;
+
+ if (cpu_data->epp_saved >= 0) {
+ epp = cpu_data->epp_saved;
+ cpu_data->epp_saved = -EINVAL;
+ goto update_epp;
+ }
+
+ if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
+ epp = intel_pstate_get_epp(cpu_data, value);
+ cpu_data->epp_powersave = epp;
+ /* If EPP read was failed, then don't try to write */
+ if (epp < 0)
+ goto skip_epp;
+
+
+ epp = 0;
+ } else {
+ /* skip setting EPP, when saved value is invalid */
+ if (cpu_data->epp_powersave < 0)
+ goto skip_epp;
+
+ /*
+ * No need to restore EPP when it is not zero. This
+ * means:
+ * - Policy is not changed
+ * - user has manually changed
+ * - Error reading EPB
+ */
+ epp = intel_pstate_get_epp(cpu_data, value);
+ if (epp)
+ goto skip_epp;
+
+ epp = cpu_data->epp_powersave;
+ }
+update_epp:
+ if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ value &= ~GENMASK_ULL(31, 24);
+ value |= (u64)epp << 24;
+ } else {
+ intel_pstate_set_epb(cpu, epp);
+ }
+skip_epp:
wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
}
}
@@ -598,6 +900,28 @@ static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy)
return 0;
}
+static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
+{
+ struct cpudata *cpu_data = all_cpu_data[policy->cpu];
+
+ if (!hwp_active)
+ return 0;
+
+ cpu_data->epp_saved = intel_pstate_get_epp(cpu_data, 0);
+
+ return 0;
+}
+
+static int intel_pstate_resume(struct cpufreq_policy *policy)
+{
+ if (!hwp_active)
+ return 0;
+
+ all_cpu_data[policy->cpu]->epp_policy = 0;
+
+ return intel_pstate_hwp_set_policy(policy);
+}
+
static void intel_pstate_hwp_set_online_cpus(void)
{
get_online_cpus();
@@ -640,8 +964,10 @@ static void __init intel_pstate_debug_expose_params(void)
struct dentry *debugfs_parent;
int i = 0;
- if (hwp_active)
+ if (hwp_active ||
+ pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load)
return;
+
debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
if (IS_ERR_OR_NULL(debugfs_parent))
return;
@@ -714,9 +1040,12 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
if (ret != 1)
return -EINVAL;
+ mutex_lock(&intel_pstate_limits_lock);
+
update_turbo_state();
if (limits->turbo_disabled) {
pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
+ mutex_unlock(&intel_pstate_limits_lock);
return -EPERM;
}
@@ -725,6 +1054,8 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
if (hwp_active)
intel_pstate_hwp_set_online_cpus();
+ mutex_unlock(&intel_pstate_limits_lock);
+
return count;
}
@@ -738,6 +1069,8 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
if (ret != 1)
return -EINVAL;
+ mutex_lock(&intel_pstate_limits_lock);
+
limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
limits->max_perf_pct = min(limits->max_policy_pct,
limits->max_sysfs_pct);
@@ -745,10 +1078,13 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
limits->max_perf_pct);
limits->max_perf_pct = max(limits->min_perf_pct,
limits->max_perf_pct);
- limits->max_perf = div_fp(limits->max_perf_pct, 100);
+ limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
if (hwp_active)
intel_pstate_hwp_set_online_cpus();
+
+ mutex_unlock(&intel_pstate_limits_lock);
+
return count;
}
@@ -762,6 +1098,8 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
if (ret != 1)
return -EINVAL;
+ mutex_lock(&intel_pstate_limits_lock);
+
limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
limits->min_perf_pct = max(limits->min_policy_pct,
limits->min_sysfs_pct);
@@ -769,10 +1107,13 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
limits->min_perf_pct);
limits->min_perf_pct = min(limits->max_perf_pct,
limits->min_perf_pct);
- limits->min_perf = div_fp(limits->min_perf_pct, 100);
+ limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
if (hwp_active)
intel_pstate_hwp_set_online_cpus();
+
+ mutex_unlock(&intel_pstate_limits_lock);
+
return count;
}
@@ -787,8 +1128,6 @@ define_one_global_ro(num_pstates);
static struct attribute *intel_pstate_attributes[] = {
&no_turbo.attr,
- &max_perf_pct.attr,
- &min_perf_pct.attr,
&turbo_pct.attr,
&num_pstates.attr,
NULL
@@ -805,9 +1144,26 @@ static void __init intel_pstate_sysfs_expose_params(void)
intel_pstate_kobject = kobject_create_and_add("intel_pstate",
&cpu_subsys.dev_root->kobj);
- BUG_ON(!intel_pstate_kobject);
+ if (WARN_ON(!intel_pstate_kobject))
+ return;
+
rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
- BUG_ON(rc);
+ if (WARN_ON(rc))
+ return;
+
+ /*
+ * If per cpu limits are enforced there are no global limits, so
+ * return without creating max/min_perf_pct attributes
+ */
+ if (per_cpu_limits)
+ return;
+
+ rc = sysfs_create_file(intel_pstate_kobject, &max_perf_pct.attr);
+ WARN_ON(rc);
+
+ rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr);
+ WARN_ON(rc);
+
}
/************************** sysfs end ************************/
@@ -818,6 +1174,9 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
+ cpudata->epp_policy = 0;
+ if (cpudata->epp_default == -EINVAL)
+ cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
}
static int atom_get_min_pstate(void)
@@ -1045,7 +1404,6 @@ static const struct cpu_defaults silvermont_params = {
.p_gain_pct = 14,
.d_gain_pct = 0,
.i_gain_pct = 4,
- .boost_iowait = true,
},
.funcs = {
.get_max = atom_get_max_pstate,
@@ -1067,7 +1425,6 @@ static const struct cpu_defaults airmont_params = {
.p_gain_pct = 14,
.d_gain_pct = 0,
.i_gain_pct = 4,
- .boost_iowait = true,
},
.funcs = {
.get_max = atom_get_max_pstate,
@@ -1109,7 +1466,6 @@ static const struct cpu_defaults bxt_params = {
.p_gain_pct = 14,
.d_gain_pct = 0,
.i_gain_pct = 4,
- .boost_iowait = true,
},
.funcs = {
.get_max = core_get_max_pstate,
@@ -1127,20 +1483,24 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
int max_perf = cpu->pstate.turbo_pstate;
int max_perf_adj;
int min_perf;
+ struct perf_limits *perf_limits = limits;
if (limits->no_turbo || limits->turbo_disabled)
max_perf = cpu->pstate.max_pstate;
+ if (per_cpu_limits)
+ perf_limits = cpu->perf_limits;
+
/*
* performance can be limited by user through sysfs, by cpufreq
* policy, or by cpu specific default values determined through
* experimentation.
*/
- max_perf_adj = fp_toint(max_perf * limits->max_perf);
+ max_perf_adj = fp_ext_toint(max_perf * perf_limits->max_perf);
*max = clamp_t(int, max_perf_adj,
cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
- min_perf = fp_toint(max_perf * limits->min_perf);
+ min_perf = fp_ext_toint(max_perf * perf_limits->min_perf);
*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
}
@@ -1178,6 +1538,8 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
cpu->pstate.scaling = pstate_funcs.get_scaling();
+ cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
+ cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
if (pstate_funcs.get_vid)
pstate_funcs.get_vid(cpu);
@@ -1316,15 +1678,19 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
return cpu->pstate.current_pstate - pid_calc(&cpu->pid, perf_scaled);
}
-static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
+static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
{
int max_perf, min_perf;
- update_turbo_state();
-
intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
pstate = clamp_t(int, pstate, min_perf, max_perf);
trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
+ return pstate;
+}
+
+static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
+{
+ pstate = intel_pstate_prepare_request(cpu, pstate);
if (pstate == cpu->pstate.current_pstate)
return;
@@ -1342,6 +1708,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ?
cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu);
+ update_turbo_state();
+
intel_pstate_update_pstate(cpu, target_pstate);
sample = &cpu->sample;
@@ -1362,7 +1730,7 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time,
struct cpudata *cpu = container_of(data, struct cpudata, update_util);
u64 delta_ns;
- if (pid_params.boost_iowait) {
+ if (pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load) {
if (flags & SCHED_CPUFREQ_IOWAIT) {
cpu->iowait_boost = int_tofp(1);
} else if (cpu->iowait_boost) {
@@ -1408,6 +1776,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_params),
ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params),
ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_params),
+ ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_params),
ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_params),
{}
};
@@ -1424,11 +1793,26 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
{
struct cpudata *cpu;
- if (!all_cpu_data[cpunum])
- all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
- GFP_KERNEL);
- if (!all_cpu_data[cpunum])
- return -ENOMEM;
+ cpu = all_cpu_data[cpunum];
+
+ if (!cpu) {
+ unsigned int size = sizeof(struct cpudata);
+
+ if (per_cpu_limits)
+ size += sizeof(struct perf_limits);
+
+ cpu = kzalloc(size, GFP_KERNEL);
+ if (!cpu)
+ return -ENOMEM;
+
+ all_cpu_data[cpunum] = cpu;
+ if (per_cpu_limits)
+ cpu->perf_limits = (struct perf_limits *)(cpu + 1);
+
+ cpu->epp_default = -EINVAL;
+ cpu->epp_powersave = -EINVAL;
+ cpu->epp_saved = -EINVAL;
+ }
cpu = all_cpu_data[cpunum];
@@ -1487,18 +1871,57 @@ static void intel_pstate_set_performance_limits(struct perf_limits *limits)
limits->no_turbo = 0;
limits->turbo_disabled = 0;
limits->max_perf_pct = 100;
- limits->max_perf = int_tofp(1);
+ limits->max_perf = int_ext_tofp(1);
limits->min_perf_pct = 100;
- limits->min_perf = int_tofp(1);
+ limits->min_perf = int_ext_tofp(1);
limits->max_policy_pct = 100;
limits->max_sysfs_pct = 100;
limits->min_policy_pct = 0;
limits->min_sysfs_pct = 0;
}
+static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
+ struct perf_limits *limits)
+{
+
+ limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
+ policy->cpuinfo.max_freq);
+ limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100);
+ if (policy->max == policy->min) {
+ limits->min_policy_pct = limits->max_policy_pct;
+ } else {
+ limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100,
+ policy->cpuinfo.max_freq);
+ limits->min_policy_pct = clamp_t(int, limits->min_policy_pct,
+ 0, 100);
+ }
+
+ /* Normalize user input to [min_policy_pct, max_policy_pct] */
+ limits->min_perf_pct = max(limits->min_policy_pct,
+ limits->min_sysfs_pct);
+ limits->min_perf_pct = min(limits->max_policy_pct,
+ limits->min_perf_pct);
+ limits->max_perf_pct = min(limits->max_policy_pct,
+ limits->max_sysfs_pct);
+ limits->max_perf_pct = max(limits->min_policy_pct,
+ limits->max_perf_pct);
+
+ /* Make sure min_perf_pct <= max_perf_pct */
+ limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
+
+ limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
+ limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
+ limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
+ limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
+
+ pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
+ limits->max_perf_pct, limits->min_perf_pct);
+}
+
static int intel_pstate_set_policy(struct cpufreq_policy *policy)
{
struct cpudata *cpu;
+ struct perf_limits *perf_limits = NULL;
if (!policy->cpuinfo.max_freq)
return -ENODEV;
@@ -1516,41 +1939,31 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
policy->max = policy->cpuinfo.max_freq;
}
- if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
- limits = &performance_limits;
+ if (per_cpu_limits)
+ perf_limits = cpu->perf_limits;
+
+ mutex_lock(&intel_pstate_limits_lock);
+
+ if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
+ if (!perf_limits) {
+ limits = &performance_limits;
+ perf_limits = limits;
+ }
if (policy->max >= policy->cpuinfo.max_freq) {
pr_debug("set performance\n");
- intel_pstate_set_performance_limits(limits);
+ intel_pstate_set_performance_limits(perf_limits);
goto out;
}
} else {
pr_debug("set powersave\n");
- limits = &powersave_limits;
- }
-
- limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
- limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
- limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
- policy->cpuinfo.max_freq);
- limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100);
-
- /* Normalize user input to [min_policy_pct, max_policy_pct] */
- limits->min_perf_pct = max(limits->min_policy_pct,
- limits->min_sysfs_pct);
- limits->min_perf_pct = min(limits->max_policy_pct,
- limits->min_perf_pct);
- limits->max_perf_pct = min(limits->max_policy_pct,
- limits->max_sysfs_pct);
- limits->max_perf_pct = max(limits->min_policy_pct,
- limits->max_perf_pct);
-
- /* Make sure min_perf_pct <= max_perf_pct */
- limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
+ if (!perf_limits) {
+ limits = &powersave_limits;
+ perf_limits = limits;
+ }
- limits->min_perf = div_fp(limits->min_perf_pct, 100);
- limits->max_perf = div_fp(limits->max_perf_pct, 100);
- limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
+ }
+ intel_pstate_update_perf_limits(policy, perf_limits);
out:
if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
/*
@@ -1565,6 +1978,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
intel_pstate_hwp_set_policy(policy);
+ mutex_unlock(&intel_pstate_limits_lock);
+
return 0;
}
@@ -1579,22 +1994,32 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
return 0;
}
+static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
+{
+ intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
+}
+
static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
{
- int cpu_num = policy->cpu;
- struct cpudata *cpu = all_cpu_data[cpu_num];
+ pr_debug("CPU %d exiting\n", policy->cpu);
- pr_debug("CPU %d exiting\n", cpu_num);
+ intel_pstate_clear_update_util_hook(policy->cpu);
+ if (hwp_active)
+ intel_pstate_hwp_save_state(policy);
+ else
+ intel_cpufreq_stop_cpu(policy);
+}
- intel_pstate_clear_update_util_hook(cpu_num);
+static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
+{
+ intel_pstate_exit_perf_limits(policy);
- if (hwp_active)
- return;
+ policy->fast_switch_possible = false;
- intel_pstate_set_min_pstate(cpu);
+ return 0;
}
-static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
+static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
{
struct cpudata *cpu;
int rc;
@@ -1605,10 +2030,13 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
cpu = all_cpu_data[policy->cpu];
- if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
- policy->policy = CPUFREQ_POLICY_PERFORMANCE;
- else
- policy->policy = CPUFREQ_POLICY_POWERSAVE;
+ /*
+ * We need sane value in the cpu->perf_limits, so inherit from global
+ * perf_limits limits, which are seeded with values based on the
+ * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up.
+ */
+ if (per_cpu_limits)
+ memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits));
policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
@@ -1621,24 +2049,35 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
policy->cpuinfo.max_freq *= cpu->pstate.scaling;
intel_pstate_init_acpi_perf_limits(policy);
- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
cpumask_set_cpu(policy->cpu, policy->cpus);
+ policy->fast_switch_possible = true;
+
return 0;
}
-static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
+static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
{
- intel_pstate_exit_perf_limits(policy);
+ int ret = __intel_pstate_cpu_init(policy);
+
+ if (ret)
+ return ret;
+
+ policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+ if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
+ policy->policy = CPUFREQ_POLICY_PERFORMANCE;
+ else
+ policy->policy = CPUFREQ_POLICY_POWERSAVE;
return 0;
}
-static struct cpufreq_driver intel_pstate_driver = {
+static struct cpufreq_driver intel_pstate = {
.flags = CPUFREQ_CONST_LOOPS,
.verify = intel_pstate_verify_policy,
.setpolicy = intel_pstate_set_policy,
- .resume = intel_pstate_hwp_set_policy,
+ .suspend = intel_pstate_hwp_save_state,
+ .resume = intel_pstate_resume,
.get = intel_pstate_get,
.init = intel_pstate_cpu_init,
.exit = intel_pstate_cpu_exit,
@@ -1646,6 +2085,118 @@ static struct cpufreq_driver intel_pstate_driver = {
.name = "intel_pstate",
};
+static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
+{
+ struct cpudata *cpu = all_cpu_data[policy->cpu];
+ struct perf_limits *perf_limits = limits;
+
+ update_turbo_state();
+ policy->cpuinfo.max_freq = limits->turbo_disabled ?
+ cpu->pstate.max_freq : cpu->pstate.turbo_freq;
+
+ cpufreq_verify_within_cpu_limits(policy);
+
+ if (per_cpu_limits)
+ perf_limits = cpu->perf_limits;
+
+ intel_pstate_update_perf_limits(policy, perf_limits);
+
+ return 0;
+}
+
+static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu,
+ struct cpufreq_policy *policy,
+ unsigned int target_freq)
+{
+ unsigned int max_freq;
+
+ update_turbo_state();
+
+ max_freq = limits->no_turbo || limits->turbo_disabled ?
+ cpu->pstate.max_freq : cpu->pstate.turbo_freq;
+ policy->cpuinfo.max_freq = max_freq;
+ if (policy->max > max_freq)
+ policy->max = max_freq;
+
+ if (target_freq > max_freq)
+ target_freq = max_freq;
+
+ return target_freq;
+}
+
+static int intel_cpufreq_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ struct cpudata *cpu = all_cpu_data[policy->cpu];
+ struct cpufreq_freqs freqs;
+ int target_pstate;
+
+ freqs.old = policy->cur;
+ freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq);
+
+ cpufreq_freq_transition_begin(policy, &freqs);
+ switch (relation) {
+ case CPUFREQ_RELATION_L:
+ target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling);
+ break;
+ case CPUFREQ_RELATION_H:
+ target_pstate = freqs.new / cpu->pstate.scaling;
+ break;
+ default:
+ target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling);
+ break;
+ }
+ target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
+ if (target_pstate != cpu->pstate.current_pstate) {
+ cpu->pstate.current_pstate = target_pstate;
+ wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL,
+ pstate_funcs.get_val(cpu, target_pstate));
+ }
+ cpufreq_freq_transition_end(policy, &freqs, false);
+
+ return 0;
+}
+
+static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
+ unsigned int target_freq)
+{
+ struct cpudata *cpu = all_cpu_data[policy->cpu];
+ int target_pstate;
+
+ target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq);
+ target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
+ intel_pstate_update_pstate(cpu, target_pstate);
+ return target_freq;
+}
+
+static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+ int ret = __intel_pstate_cpu_init(policy);
+
+ if (ret)
+ return ret;
+
+ policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY;
+ /* This reflects the intel_pstate_get_cpu_pstates() setting. */
+ policy->cur = policy->cpuinfo.min_freq;
+
+ return 0;
+}
+
+static struct cpufreq_driver intel_cpufreq = {
+ .flags = CPUFREQ_CONST_LOOPS,
+ .verify = intel_cpufreq_verify_policy,
+ .target = intel_cpufreq_target,
+ .fast_switch = intel_cpufreq_fast_switch,
+ .init = intel_cpufreq_cpu_init,
+ .exit = intel_pstate_cpu_exit,
+ .stop_cpu = intel_cpufreq_stop_cpu,
+ .name = "intel_cpufreq",
+};
+
+static struct cpufreq_driver *intel_pstate_driver = &intel_pstate;
+
static int no_load __initdata;
static int no_hwp __initdata;
static int hwp_only __initdata;
@@ -1672,6 +2223,19 @@ static void __init copy_pid_params(struct pstate_adjust_policy *policy)
pid_params.setpoint = policy->setpoint;
}
+#ifdef CONFIG_ACPI
+static void intel_pstate_use_acpi_profile(void)
+{
+ if (acpi_gbl_FADT.preferred_profile == PM_MOBILE)
+ pstate_funcs.get_target_pstate =
+ get_target_pstate_use_cpu_load;
+}
+#else
+static void intel_pstate_use_acpi_profile(void)
+{
+}
+#endif
+
static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
{
pstate_funcs.get_max = funcs->get_max;
@@ -1683,6 +2247,7 @@ static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
pstate_funcs.get_vid = funcs->get_vid;
pstate_funcs.get_target_pstate = funcs->get_target_pstate;
+ intel_pstate_use_acpi_profile();
}
#ifdef CONFIG_ACPI
@@ -1796,9 +2361,20 @@ static bool __init intel_pstate_platform_pwr_mgmt_exists(void)
return false;
}
+
+static void intel_pstate_request_control_from_smm(void)
+{
+ /*
+ * It may be unsafe to request P-states control from SMM if _PPC support
+ * has not been enabled.
+ */
+ if (acpi_ppc)
+ acpi_processor_pstate_control();
+}
#else /* CONFIG_ACPI not enabled */
static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
+static inline void intel_pstate_request_control_from_smm(void) {}
#endif /* CONFIG_ACPI */
static const struct x86_cpu_id hwp_support_ids[] __initconst = {
@@ -1818,6 +2394,7 @@ static int __init intel_pstate_init(void)
if (x86_match_cpu(hwp_support_ids) && !no_hwp) {
copy_cpu_funcs(&core_params.funcs);
hwp_active++;
+ intel_pstate.attr = hwp_cpufreq_attrs;
goto hwp_cpu_matched;
}
@@ -1850,7 +2427,9 @@ hwp_cpu_matched:
if (!hwp_active && hwp_only)
goto out;
- rc = cpufreq_register_driver(&intel_pstate_driver);
+ intel_pstate_request_control_from_smm();
+
+ rc = cpufreq_register_driver(intel_pstate_driver);
if (rc)
goto out;
@@ -1865,7 +2444,9 @@ out:
get_online_cpus();
for_each_online_cpu(cpu) {
if (all_cpu_data[cpu]) {
- intel_pstate_clear_update_util_hook(cpu);
+ if (intel_pstate_driver == &intel_pstate)
+ intel_pstate_clear_update_util_hook(cpu);
+
kfree(all_cpu_data[cpu]);
}
}
@@ -1881,8 +2462,13 @@ static int __init intel_pstate_setup(char *str)
if (!str)
return -EINVAL;
- if (!strcmp(str, "disable"))
+ if (!strcmp(str, "disable")) {
no_load = 1;
+ } else if (!strcmp(str, "passive")) {
+ pr_info("Passive mode enabled\n");
+ intel_pstate_driver = &intel_cpufreq;
+ no_hwp = 1;
+ }
if (!strcmp(str, "no_hwp")) {
pr_info("HWP disabled\n");
no_hwp = 1;
@@ -1891,6 +2477,8 @@ static int __init intel_pstate_setup(char *str)
force_load = 1;
if (!strcmp(str, "hwp_only"))
hwp_only = 1;
+ if (!strcmp(str, "per_cpu_perf_limits"))
+ per_cpu_limits = true;
#ifdef CONFIG_ACPI
if (!strcmp(str, "support_acpi_ppc"))
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index d3ffde806629..37671b545880 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -42,6 +42,10 @@
#define PMSR_PSAFE_ENABLE (1UL << 30)
#define PMSR_SPR_EM_DISABLE (1UL << 31)
#define PMSR_MAX(x) ((x >> 32) & 0xFF)
+#define LPSTATE_SHIFT 48
+#define GPSTATE_SHIFT 56
+#define GET_LPSTATE(x) (((x) >> LPSTATE_SHIFT) & 0xFF)
+#define GET_GPSTATE(x) (((x) >> GPSTATE_SHIFT) & 0xFF)
#define MAX_RAMP_DOWN_TIME 5120
/*
@@ -592,7 +596,8 @@ void gpstate_timer_handler(unsigned long data)
{
struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
struct global_pstate_info *gpstates = policy->driver_data;
- int gpstate_idx;
+ int gpstate_idx, lpstate_idx;
+ unsigned long val;
unsigned int time_diff = jiffies_to_msecs(jiffies)
- gpstates->last_sampled_time;
struct powernv_smp_call_data freq_data;
@@ -600,21 +605,37 @@ void gpstate_timer_handler(unsigned long data)
if (!spin_trylock(&gpstates->gpstate_lock))
return;
+ /*
+ * If PMCR was last updated was using fast_swtich then
+ * We may have wrong in gpstate->last_lpstate_idx
+ * value. Hence, read from PMCR to get correct data.
+ */
+ val = get_pmspr(SPRN_PMCR);
+ freq_data.gpstate_id = (s8)GET_GPSTATE(val);
+ freq_data.pstate_id = (s8)GET_LPSTATE(val);
+ if (freq_data.gpstate_id == freq_data.pstate_id) {
+ reset_gpstates(policy);
+ spin_unlock(&gpstates->gpstate_lock);
+ return;
+ }
+
gpstates->last_sampled_time += time_diff;
gpstates->elapsed_time += time_diff;
- freq_data.pstate_id = idx_to_pstate(gpstates->last_lpstate_idx);
- if ((gpstates->last_gpstate_idx == gpstates->last_lpstate_idx) ||
- (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME)) {
+ if (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME) {
gpstate_idx = pstate_to_idx(freq_data.pstate_id);
+ lpstate_idx = gpstate_idx;
reset_gpstates(policy);
gpstates->highest_lpstate_idx = gpstate_idx;
} else {
+ lpstate_idx = pstate_to_idx(freq_data.pstate_id);
gpstate_idx = calc_global_pstate(gpstates->elapsed_time,
gpstates->highest_lpstate_idx,
- gpstates->last_lpstate_idx);
+ lpstate_idx);
}
-
+ freq_data.gpstate_id = idx_to_pstate(gpstate_idx);
+ gpstates->last_gpstate_idx = gpstate_idx;
+ gpstates->last_lpstate_idx = lpstate_idx;
/*
* If local pstate is equal to global pstate, rampdown is over
* So timer is not required to be queued.
@@ -622,10 +643,6 @@ void gpstate_timer_handler(unsigned long data)
if (gpstate_idx != gpstates->last_lpstate_idx)
queue_gpstate_timer(gpstates);
- freq_data.gpstate_id = idx_to_pstate(gpstate_idx);
- gpstates->last_gpstate_idx = pstate_to_idx(freq_data.gpstate_id);
- gpstates->last_lpstate_idx = pstate_to_idx(freq_data.pstate_id);
-
spin_unlock(&gpstates->gpstate_lock);
/* Timer may get migrated to a different cpu on cpu hot unplug */
@@ -647,8 +664,14 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
if (unlikely(rebooting) && new_index != get_nominal_index())
return 0;
- if (!throttled)
+ if (!throttled) {
+ /* we don't want to be preempted while
+ * checking if the CPU frequency has been throttled
+ */
+ preempt_disable();
powernv_cpufreq_throttle_check(NULL);
+ preempt_enable();
+ }
cur_msec = jiffies_to_msecs(get_jiffies_64());
@@ -752,9 +775,12 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
spin_lock_init(&gpstates->gpstate_lock);
ret = cpufreq_table_validate_and_show(policy, powernv_freqs);
- if (ret < 0)
+ if (ret < 0) {
kfree(policy->driver_data);
+ return ret;
+ }
+ policy->fast_switch_possible = true;
return ret;
}
@@ -897,6 +923,20 @@ static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
del_timer_sync(&gpstates->timer);
}
+static unsigned int powernv_fast_switch(struct cpufreq_policy *policy,
+ unsigned int target_freq)
+{
+ int index;
+ struct powernv_smp_call_data freq_data;
+
+ index = cpufreq_table_find_index_dl(policy, target_freq);
+ freq_data.pstate_id = powernv_freqs[index].driver_data;
+ freq_data.gpstate_id = powernv_freqs[index].driver_data;
+ set_pstate(&freq_data);
+
+ return powernv_freqs[index].frequency;
+}
+
static struct cpufreq_driver powernv_cpufreq_driver = {
.name = "powernv-cpufreq",
.flags = CPUFREQ_CONST_LOOPS,
@@ -904,6 +944,7 @@ static struct cpufreq_driver powernv_cpufreq_driver = {
.exit = powernv_cpufreq_cpu_exit,
.verify = cpufreq_generic_frequency_table_verify,
.target_index = powernv_cpufreq_target_index,
+ .fast_switch = powernv_fast_switch,
.get = powernv_cpufreq_get,
.stop_cpu = powernv_cpufreq_stop_cpu,
.attr = powernv_cpu_freq_attr,