summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-18 14:11:47 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-18 14:11:47 -0800
commitc3c1e9853363d247270f9d500cbaa1df8f14d4f9 (patch)
treee94787f28a1020253301f70dc94f4974890295de
parent23b0f90ba871f096474e1c27c3d14f455189d2d9 (diff)
parentbecbdde56a5c0e40c9bbbb6b8d5ffbb8de635d63 (diff)
downloadlwn-c3c1e9853363d247270f9d500cbaa1df8f14d4f9.tar.gz
lwn-c3c1e9853363d247270f9d500cbaa1df8f14d4f9.zip
Merge tag 'pm-7.0-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull more power management updates from Rafael Wysocki: "These are mostly fixes on top of the power management updates merged recently in cpuidle governors, in the Intel RAPL power capping driver and in the wake IRQ management code: - Fix the handling of package-scope MSRs in the intel_rapl power capping driver when called from the PMU subsystem and make it add all package CPUs to the PMU cpumask to allow tools to read RAPL events from any CPU in the package (Kuppuswamy Satharayananyan) - Rework the invalid version check in the intel_rapl_tpmi power capping driver to account for the fact that on partitioned systems, multiple TPMI instances may exist per package, but RAPL registers are only valid on one instance (Kuppuswamy Satharayananyan) - Describe the new intel_idle.table command line option in the admin-guide intel_idle documentation (Artem Bityutskiy) - Fix a crash in the ladder cpuidle governor on systems with only one (polling) idle state available by making the cpuidle core bypass the governor in those cases and adjust the other existing governors to that change (Aboorva Devarajan, Christian Loehle) - Update kerneldoc comments for wake IRQ management functions that have not been matching the code (Wang Jiayue)" * tag 'pm-7.0-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: cpuidle: menu: Remove single state handling cpuidle: teo: Remove single state handling cpuidle: haltpoll: Remove single state handling cpuidle: Skip governor when only one idle state is available powercap: intel_rapl_tpmi: Remove FW_BUG from invalid version check PM: sleep: wakeirq: Update outdated documentation comments Documentation: PM: Document intel_idle.table command line option powercap: intel_rapl: Expose all package CPUs in PMU cpumask powercap: intel_rapl: Remove incorrect CPU check in PMU context
-rw-r--r--Documentation/admin-guide/pm/intel_idle.rst11
-rw-r--r--drivers/base/power/wakeirq.c11
-rw-r--r--drivers/cpuidle/cpuidle.c10
-rw-r--r--drivers/cpuidle/governors/haltpoll.c4
-rw-r--r--drivers/cpuidle/governors/menu.c2
-rw-r--r--drivers/cpuidle/governors/teo.c6
-rw-r--r--drivers/powercap/intel_rapl_common.c21
-rw-r--r--drivers/powercap/intel_rapl_msr.c12
-rw-r--r--drivers/powercap/intel_rapl_tpmi.c2
-rw-r--r--include/linux/intel_rapl.h2
10 files changed, 45 insertions, 36 deletions
diff --git a/Documentation/admin-guide/pm/intel_idle.rst b/Documentation/admin-guide/pm/intel_idle.rst
index ed6f055d4b14..188d52cd26e8 100644
--- a/Documentation/admin-guide/pm/intel_idle.rst
+++ b/Documentation/admin-guide/pm/intel_idle.rst
@@ -260,6 +260,17 @@ mode to off when the CPU is in any one of the available idle states. This may
help performance of a sibling CPU at the expense of a slightly higher wakeup
latency for the idle CPU.
+The ``table`` argument allows customization of idle state latency and target
+residency. The syntax is a comma-separated list of ``name:latency:residency``
+entries, where ``name`` is the idle state name, ``latency`` is the exit latency
+in microseconds, and ``residency`` is the target residency in microseconds. It
+is not necessary to specify all idle states; only those to be customized. For
+example, ``C1:1:3,C6:50:100`` sets the exit latency and target residency for
+C1 and C6 to 1/3 and 50/100 microseconds, respectively. Remaining idle states
+keep their default values. The driver verifies that deeper idle states have
+higher latency and target residency than shallower ones. Also, target
+residency cannot be smaller than exit latency. If any of these conditions is
+not met, the driver ignores the entire ``table`` parameter.
.. _intel-idle-core-and-package-idle-states:
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index c0809d18fc54..aab843f6dfcc 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -273,8 +273,10 @@ EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq_reverse);
* otherwise try to disable already disabled wakeirq. The wake-up interrupt
* starts disabled with IRQ_NOAUTOEN set.
*
- * Should be only called from rpm_suspend() and rpm_resume() path.
- * Caller must hold &dev->power.lock to change wirq->status
+ * Should be called from rpm_suspend(), rpm_resume(),
+ * pm_runtime_force_suspend() or pm_runtime_force_resume().
+ * Caller must hold &dev->power.lock or disable runtime PM to change
+ * wirq->status.
*/
void dev_pm_enable_wake_irq_check(struct device *dev,
bool can_change_status)
@@ -306,7 +308,8 @@ enable:
* @cond_disable: if set, also check WAKE_IRQ_DEDICATED_REVERSE
*
* Disables wake-up interrupt conditionally based on status.
- * Should be only called from rpm_suspend() and rpm_resume() path.
+ * Should be called from rpm_suspend(), rpm_resume(),
+ * pm_runtime_force_suspend() or pm_runtime_force_resume().
*/
void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable)
{
@@ -332,7 +335,7 @@ void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable)
* enable wake IRQ after running ->runtime_suspend() which depends on
* WAKE_IRQ_DEDICATED_REVERSE.
*
- * Should be only called from rpm_suspend() path.
+ * Should be called from rpm_suspend() or pm_runtime_force_suspend().
*/
void dev_pm_enable_wake_irq_complete(struct device *dev)
{
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index c7876e9e024f..65fbb8e807b9 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -359,6 +359,16 @@ noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
bool *stop_tick)
{
+ /*
+ * If there is only a single idle state (or none), there is nothing
+ * meaningful for the governor to choose. Skip the governor and
+ * always use state 0 with the tick running.
+ */
+ if (drv->state_count <= 1) {
+ *stop_tick = false;
+ return 0;
+ }
+
return cpuidle_curr_governor->select(drv, dev, stop_tick);
}
diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c
index 663b7f164d20..b367d10279c8 100644
--- a/drivers/cpuidle/governors/haltpoll.c
+++ b/drivers/cpuidle/governors/haltpoll.c
@@ -50,9 +50,7 @@ static int haltpoll_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev,
bool *stop_tick)
{
- s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
-
- if (!drv->state_count || latency_req == 0) {
+ if (cpuidle_governor_latency_req(dev->cpu) == 0) {
*stop_tick = false;
return 0;
}
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index c6052055ba0f..899ff16ff1fe 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -281,7 +281,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
data->bucket = BUCKETS - 1;
}
- if (drv->state_count <= 1 || latency_req == 0 ||
+ if (latency_req == 0 ||
((data->next_timer_ns < drv->states[1].target_residency_ns ||
latency_req < drv->states[1].exit_latency_ns) &&
!dev->states_usage[0].disable)) {
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 80f3ba942a06..bec0142377b8 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -338,12 +338,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
*/
cpu_data->sleep_length_ns = KTIME_MAX;
- /* Check if there is any choice in the first place. */
- if (drv->state_count < 2) {
- idx = 0;
- goto out_tick;
- }
-
if (!dev->states_usage[0].disable)
idx = 0;
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 3ff6da3bf4e6..539625531709 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -254,7 +254,7 @@ static void rapl_init_domains(struct rapl_package *rp);
static int rapl_read_data_raw(struct rapl_domain *rd,
enum rapl_primitives prim,
bool xlate, u64 *data,
- bool atomic);
+ bool pmu_ctx);
static int rapl_write_data_raw(struct rapl_domain *rd,
enum rapl_primitives prim,
unsigned long long value);
@@ -832,7 +832,7 @@ prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
*/
static int rapl_read_data_raw(struct rapl_domain *rd,
enum rapl_primitives prim, bool xlate, u64 *data,
- bool atomic)
+ bool pmu_ctx)
{
u64 value;
enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
@@ -854,7 +854,7 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
ra.mask = rpi->mask;
- if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, atomic)) {
+ if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, pmu_ctx)) {
pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg.val, rd->rp->name, rd->name);
return -EIO;
}
@@ -1590,23 +1590,21 @@ static struct rapl_pmu rapl_pmu;
/* PMU helpers */
-static int get_pmu_cpu(struct rapl_package *rp)
+static void set_pmu_cpumask(struct rapl_package *rp, cpumask_var_t mask)
{
int cpu;
if (!rp->has_pmu)
- return nr_cpu_ids;
+ return;
/* Only TPMI & MSR RAPL are supported for now */
if (rp->priv->type != RAPL_IF_TPMI && rp->priv->type != RAPL_IF_MSR)
- return nr_cpu_ids;
+ return;
/* TPMI/MSR RAPL uses any CPU in the package for PMU */
for_each_online_cpu(cpu)
if (topology_physical_package_id(cpu) == rp->id)
- return cpu;
-
- return nr_cpu_ids;
+ cpumask_set_cpu(cpu, mask);
}
static bool is_rp_pmu_cpu(struct rapl_package *rp, int cpu)
@@ -1883,7 +1881,6 @@ static ssize_t cpumask_show(struct device *dev,
{
struct rapl_package *rp;
cpumask_var_t cpu_mask;
- int cpu;
int ret;
if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL))
@@ -1895,9 +1892,7 @@ static ssize_t cpumask_show(struct device *dev,
/* Choose a cpu for each RAPL Package */
list_for_each_entry(rp, &rapl_packages, plist) {
- cpu = get_pmu_cpu(rp);
- if (cpu < nr_cpu_ids)
- cpumask_set_cpu(cpu, cpu_mask);
+ set_pmu_cpumask(rp, cpu_mask);
}
cpus_read_unlock();
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index a2bc0a9c1e10..3d5e7f56d68a 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -110,16 +110,14 @@ static int rapl_cpu_down_prep(unsigned int cpu)
return 0;
}
-static int rapl_msr_read_raw(int cpu, struct reg_action *ra, bool atomic)
+static int rapl_msr_read_raw(int cpu, struct reg_action *ra, bool pmu_ctx)
{
/*
- * When called from atomic-context (eg PMU event handler)
- * perform MSR read directly using rdmsrq().
+ * When called from PMU context, perform MSR read directly using
+ * rdmsrq() without IPI overhead. Package-scoped MSRs are readable
+ * from any CPU in the package.
*/
- if (atomic) {
- if (unlikely(smp_processor_id() != cpu))
- return -EIO;
-
+ if (pmu_ctx) {
rdmsrq(ra->reg.msr, ra->value);
goto out;
}
diff --git a/drivers/powercap/intel_rapl_tpmi.c b/drivers/powercap/intel_rapl_tpmi.c
index 0a0b85f4528b..0f8abdc592bc 100644
--- a/drivers/powercap/intel_rapl_tpmi.c
+++ b/drivers/powercap/intel_rapl_tpmi.c
@@ -157,7 +157,7 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
tpmi_domain_flags = tpmi_domain_header >> 32 & 0xffff;
if (tpmi_domain_version == TPMI_VERSION_INVALID) {
- pr_warn(FW_BUG "Invalid version\n");
+ pr_debug("Invalid version, other instances may be valid\n");
return -ENODEV;
}
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index f479ef5b3341..fa1f328d6712 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -152,7 +152,7 @@ struct rapl_if_priv {
union rapl_reg reg_unit;
union rapl_reg regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
int limits[RAPL_DOMAIN_MAX];
- int (*read_raw)(int id, struct reg_action *ra, bool atomic);
+ int (*read_raw)(int id, struct reg_action *ra, bool pmu_ctx);
int (*write_raw)(int id, struct reg_action *ra);
void *defaults;
void *rpi;