diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-20 17:41:08 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-20 17:41:08 -0800 |
commit | 1f2d9ffc7a5f916935749ffc6e93fb33bfe94d2f (patch) | |
tree | a5dabaa924d50867cbe347e20a7643b2850f11c0 /arch | |
parent | a2f0e7eee1344eb9f91b22bc72d9eb0a52b849c9 (diff) | |
parent | 7c4a5b89a0b5a57a64b601775b296abf77a9fe97 (diff) | |
download | lwn-1f2d9ffc7a5f916935749ffc6e93fb33bfe94d2f.tar.gz lwn-1f2d9ffc7a5f916935749ffc6e93fb33bfe94d2f.zip |
Merge tag 'sched-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
- Improve the scalability of the CFS bandwidth unthrottling logic with
large number of CPUs.
- Fix & rework various cpuidle routines, simplify interaction with the
generic scheduler code. Add __cpuidle methods as noinstr to objtool's
noinstr detection and fix boatloads of cpuidle bugs & quirks.
- Add new ABI: introduce MEMBARRIER_CMD_GET_REGISTRATIONS, to query
previously issued registrations.
- Limit scheduler slice duration to the sysctl_sched_latency period, to
improve scheduling granularity with a large number of SCHED_IDLE
tasks.
- Debuggability enhancement on sys_exit(): warn about disabled IRQs,
but also enable them to prevent a cascade of followup problems and
repeat warnings.
- Fix the rescheduling logic in prio_changed_dl().
- Micro-optimize cpufreq and sched-util methods.
- Micro-optimize ttwu_runnable()
- Micro-optimize the idle-scanning in update_numa_stats(),
select_idle_capacity() and steal_cookie_task().
- Update the RSEQ code & self-tests
- Constify various scheduler methods
- Remove unused methods
- Refine __init tags
- Documentation updates
- Misc other cleanups, fixes
* tag 'sched-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (110 commits)
sched/rt: pick_next_rt_entity(): check list_entry
sched/deadline: Add more reschedule cases to prio_changed_dl()
sched/fair: sanitize vruntime of entity being placed
sched/fair: Remove capacity inversion detection
sched/fair: unlink misfit task from cpu overutilized
objtool: mem*() are not uaccess safe
cpuidle: Fix poll_idle() noinstr annotation
sched/clock: Make local_clock() noinstr
sched/clock/x86: Mark sched_clock() noinstr
x86/pvclock: Improve atomic update of last_value in pvclock_clocksource_read()
x86/atomics: Always inline arch_atomic64*()
cpuidle: tracing, preempt: Squash _rcuidle tracing
cpuidle: tracing: Warn about !rcu_is_watching()
cpuidle: lib/bug: Disable rcu_is_watching() during WARN/BUG
cpuidle: drivers: firmware: psci: Dont instrument suspend code
KVM: selftests: Fix build of rseq test
exit: Detect and fix irq disabled state in oops
cpuidle, arm64: Fix the ARM64 cpuidle logic
cpuidle: mvebu: Fix duplicate flags assignment
sched/fair: Limit sched slice duration
...
Diffstat (limited to 'arch')
104 files changed, 308 insertions, 282 deletions
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 65fdae9e48f3..ce20c31828a0 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -57,7 +57,6 @@ EXPORT_SYMBOL(pm_power_off); void arch_cpu_idle(void) { wtint(0); - raw_local_irq_enable(); } void arch_cpu_idle_dead(void) diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 5b78d640725d..2efa7dfc798a 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -27,7 +27,6 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT *(.fixup) *(.gnu.warning) diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 3369f0700702..980b71da2f61 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -114,6 +114,8 @@ void arch_cpu_idle(void) "sleep %0 \n" : :"I"(arg)); /* can't be "r" has to be embedded const */ + + raw_local_irq_disable(); } #else /* ARC700 */ @@ -122,6 +124,7 @@ void arch_cpu_idle(void) { /* sleep, but enable both set E1/E2 (levels of interrupts) before committing */ __asm__ __volatile__("sleep 0x3 \n"); + raw_local_irq_disable(); } #endif diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index 529ae50f9fe2..549c3f407918 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -85,7 +85,6 @@ SECTIONS _stext = .; TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h index fad45c884e98..4c8632d5c432 100644 --- a/arch/arm/include/asm/vmlinux.lds.h +++ b/arch/arm/include/asm/vmlinux.lds.h @@ -96,7 +96,6 @@ SOFTIRQENTRY_TEXT \ TEXT_TEXT \ SCHED_TEXT \ - CPUIDLE_TEXT \ LOCK_TEXT \ KPROBES_TEXT \ ARM_STUBS_TEXT \ diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c index e1684623e1b2..437ff39f7808 100644 --- a/arch/arm/kernel/cpuidle.c +++ b/arch/arm/kernel/cpuidle.c @@ -26,8 +26,8 @@ static struct cpuidle_ops cpuidle_ops[NR_CPUS] __ro_after_init; * * Returns the index passed as parameter */ -int arm_cpuidle_simple_enter(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +__cpuidle int arm_cpuidle_simple_enter(struct cpuidle_device *dev, struct + cpuidle_driver *drv, int index) { cpu_do_idle(); diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index f811733a8fc5..c81e7be2b4ea 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -78,7 +78,6 @@ void arch_cpu_idle(void) arm_pm_idle(); else cpu_do_idle(); - raw_local_irq_enable(); } void arch_cpu_idle_prepare(void) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 36e6efad89f3..0b8c25763adc 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -638,7 +638,7 @@ static void do_handle_IPI(int ipinr) unsigned int cpu = smp_processor_id(); if ((unsigned)ipinr < NR_IPI) - trace_ipi_entry_rcuidle(ipi_types[ipinr]); + trace_ipi_entry(ipi_types[ipinr]); switch (ipinr) { case IPI_WAKEUP: @@ -685,7 +685,7 @@ static void do_handle_IPI(int ipinr) } if ((unsigned)ipinr < NR_IPI) - trace_ipi_exit_rcuidle(ipi_types[ipinr]); + trace_ipi_exit(ipi_types[ipinr]); } /* Legacy version, should go away once all irqchips have been converted */ @@ -708,7 +708,7 @@ static irqreturn_t ipi_handler(int irq, void *data) static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) { - trace_ipi_raise_rcuidle(target, ipi_types[ipinr]); + trace_ipi_raise(target, ipi_types[ipinr]); __ipi_send_mask(ipi_desc[ipinr], target); } diff --git a/arch/arm/mach-davinci/cpuidle.c b/arch/arm/mach-davinci/cpuidle.c index dd38785536d5..78a1575c387d 100644 --- a/arch/arm/mach-davinci/cpuidle.c +++ b/arch/arm/mach-davinci/cpuidle.c @@ -44,8 +44,8 @@ static void davinci_save_ddr_power(int enter, bool pdown) } /* Actual code that puts the SoC in different idle states */ -static int davinci_enter_idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +static __cpuidle int davinci_enter_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { davinci_save_ddr_power(1, ddr2_pdown); cpu_do_idle(); diff --git a/arch/arm/mach-gemini/board-dt.c b/arch/arm/mach-gemini/board-dt.c index de0afcc8d94a..fbafe7475c02 100644 --- a/arch/arm/mach-gemini/board-dt.c +++ b/arch/arm/mach-gemini/board-dt.c @@ -42,8 +42,9 @@ static void gemini_idle(void) */ /* FIXME: Enabling interrupts here is racy! */ - local_irq_enable(); + raw_local_irq_enable(); cpu_do_idle(); + raw_local_irq_disable(); } static void __init gemini_init_machine(void) diff --git a/arch/arm/mach-imx/cpuidle-imx5.c b/arch/arm/mach-imx/cpuidle-imx5.c index a8457c4eb99a..5ad9f2f533cd 100644 --- a/arch/arm/mach-imx/cpuidle-imx5.c +++ b/arch/arm/mach-imx/cpuidle-imx5.c @@ -8,8 +8,8 @@ #include <asm/system_misc.h> #include "cpuidle.h" -static int imx5_cpuidle_enter(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +static __cpuidle int imx5_cpuidle_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { arm_pm_idle(); return index; diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c index d086cbae09c3..2b0d3160f993 100644 --- a/arch/arm/mach-imx/cpuidle-imx6q.c +++ b/arch/arm/mach-imx/cpuidle-imx6q.c @@ -17,17 +17,17 @@ static int num_idle_cpus = 0; static DEFINE_RAW_SPINLOCK(cpuidle_lock); -static int imx6q_enter_wait(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +static __cpuidle int imx6q_enter_wait(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { raw_spin_lock(&cpuidle_lock); if (++num_idle_cpus == num_online_cpus()) imx6_set_lpm(WAIT_UNCLOCKED); raw_spin_unlock(&cpuidle_lock); - ct_idle_enter(); + ct_cpuidle_enter(); cpu_do_idle(); - ct_idle_exit(); + ct_cpuidle_exit(); raw_spin_lock(&cpuidle_lock); if (num_idle_cpus-- == num_online_cpus()) diff --git a/arch/arm/mach-imx/cpuidle-imx6sl.c b/arch/arm/mach-imx/cpuidle-imx6sl.c index b86ffbeb28e4..b49cd6302dce 100644 --- a/arch/arm/mach-imx/cpuidle-imx6sl.c +++ b/arch/arm/mach-imx/cpuidle-imx6sl.c @@ -11,8 +11,8 @@ #include "common.h" #include "cpuidle.h" -static int imx6sl_enter_wait(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +static __cpuidle int imx6sl_enter_wait(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { imx6_set_lpm(WAIT_UNCLOCKED); /* diff --git a/arch/arm/mach-imx/cpuidle-imx6sx.c b/arch/arm/mach-imx/cpuidle-imx6sx.c index 74ea1720e3d8..83c5cbd3748e 100644 --- a/arch/arm/mach-imx/cpuidle-imx6sx.c +++ b/arch/arm/mach-imx/cpuidle-imx6sx.c @@ -30,8 +30,8 @@ static int imx6sx_idle_finish(unsigned long val) return 0; } -static int imx6sx_enter_wait(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +static __cpuidle int imx6sx_enter_wait(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { imx6_set_lpm(WAIT_UNCLOCKED); @@ -47,7 +47,9 @@ static int imx6sx_enter_wait(struct cpuidle_device *dev, cpu_pm_enter(); cpu_cluster_pm_enter(); + ct_cpuidle_enter(); cpu_suspend(0, imx6sx_idle_finish); + ct_cpuidle_exit(); cpu_cluster_pm_exit(); cpu_pm_exit(); @@ -87,7 +89,8 @@ static struct cpuidle_driver imx6sx_cpuidle_driver = { */ .exit_latency = 300, .target_residency = 500, - .flags = CPUIDLE_FLAG_TIMER_STOP, + .flags = CPUIDLE_FLAG_TIMER_STOP | + CPUIDLE_FLAG_RCU_IDLE, .enter = imx6sx_enter_wait, .name = "LOW-POWER-IDLE", .desc = "ARM power off", diff --git a/arch/arm/mach-imx/cpuidle-imx7ulp.c b/arch/arm/mach-imx/cpuidle-imx7ulp.c index ca86c967d19e..f55ed74acfae 100644 --- a/arch/arm/mach-imx/cpuidle-imx7ulp.c +++ b/arch/arm/mach-imx/cpuidle-imx7ulp.c @@ -12,8 +12,8 @@ #include "common.h" #include "cpuidle.h" -static int imx7ulp_enter_wait(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +static __cpuidle int imx7ulp_enter_wait(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { if (index == 1) imx7ulp_set_lpm(ULP_PM_WAIT); diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index 08034d589081..9d60799e9752 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -256,11 +256,13 @@ extern u32 omap4_get_cpu1_ns_pa_addr(void); #if defined(CONFIG_SMP) && defined(CONFIG_PM) extern int omap4_mpuss_init(void); -extern int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state); +extern int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state, + bool rcuidle); extern int omap4_hotplug_cpu(unsigned int cpu, unsigned int power_state); #else static inline int omap4_enter_lowpower(unsigned int cpu, - unsigned int power_state) + unsigned int power_state, + bool rcuidle) { cpu_do_idle(); return 0; diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index 090a8aafb25e..2ab5dcbfb7f6 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c @@ -133,7 +133,7 @@ static int omap3_enter_idle(struct cpuidle_device *dev, } /* Execute ARM wfi */ - omap_sram_idle(); + omap_sram_idle(true); /* * Call idle CPU PM enter notifier chain to restore @@ -265,6 +265,7 @@ static struct cpuidle_driver omap3_idle_driver = { .owner = THIS_MODULE, .states = { { + .flags = CPUIDLE_FLAG_RCU_IDLE, .enter = omap3_enter_idle_bm, .exit_latency = 2 + 2, .target_residency = 5, @@ -272,6 +273,7 @@ static struct cpuidle_driver omap3_idle_driver = { .desc = "MPU ON + CORE ON", }, { + .flags = CPUIDLE_FLAG_RCU_IDLE, .enter = omap3_enter_idle_bm, .exit_latency = 10 + 10, .target_residency = 30, @@ -279,6 +281,7 @@ static struct cpuidle_driver omap3_idle_driver = { .desc = "MPU ON + CORE ON", }, { + .flags = CPUIDLE_FLAG_RCU_IDLE, .enter = omap3_enter_idle_bm, .exit_latency = 50 + 50, .target_residency = 300, @@ -286,6 +289,7 @@ static struct cpuidle_driver omap3_idle_driver = { .desc = "MPU RET + CORE ON", }, { + .flags = CPUIDLE_FLAG_RCU_IDLE, .enter = omap3_enter_idle_bm, .exit_latency = 1500 + 1800, .target_residency = 4000, @@ -293,6 +297,7 @@ static struct cpuidle_driver omap3_idle_driver = { .desc = "MPU OFF + CORE ON", }, { + .flags = CPUIDLE_FLAG_RCU_IDLE, .enter = omap3_enter_idle_bm, .exit_latency = 2500 + 7500, .target_residency = 12000, @@ -300,6 +305,7 @@ static struct cpuidle_driver omap3_idle_driver = { .desc = "MPU RET + CORE RET", }, { + .flags = CPUIDLE_FLAG_RCU_IDLE, .enter = omap3_enter_idle_bm, .exit_latency = 3000 + 8500, .target_residency = 15000, @@ -307,6 +313,7 @@ static struct cpuidle_driver omap3_idle_driver = { .desc = "MPU OFF + CORE RET", }, { + .flags = CPUIDLE_FLAG_RCU_IDLE, .enter = omap3_enter_idle_bm, .exit_latency = 10000 + 30000, .target_residency = 30000, @@ -328,6 +335,7 @@ static struct cpuidle_driver omap3430_idle_driver = { .owner = THIS_MODULE, .states = { { + .flags = CPUIDLE_FLAG_RCU_IDLE, .enter = omap3_enter_idle_bm, .exit_latency = 110 + 162, .target_residency = 5, @@ -335,6 +343,7 @@ static struct cpuidle_driver omap3430_idle_driver = { .desc = "MPU ON + CORE ON", }, { + .flags = CPUIDLE_FLAG_RCU_IDLE, .enter = omap3_enter_idle_bm, .exit_latency = 106 + 180, .target_residency = 309, @@ -342,6 +351,7 @@ static struct cpuidle_driver omap3430_idle_driver = { .desc = "MPU ON + CORE ON", }, { + .flags = CPUIDLE_FLAG_RCU_IDLE, .enter = omap3_enter_idle_bm, .exit_latency = 107 + 410, .target_residency = 46057, @@ -349,6 +359,7 @@ static struct cpuidle_driver omap3430_idle_driver = { .desc = "MPU RET + CORE ON", }, { + .flags = CPUIDLE_FLAG_RCU_IDLE, .enter = omap3_enter_idle_bm, .exit_latency = 121 + 3374, .target_residency = 46057, @@ -356,6 +367,7 @@ static struct cpuidle_driver omap3430_idle_driver = { .desc = "MPU OFF + CORE ON", }, { + .flags = CPUIDLE_FLAG_RCU_IDLE, .enter = omap3_enter_idle_bm, .exit_latency = 855 + 1146, .target_residency = 46057, @@ -363,6 +375,7 @@ static struct cpuidle_driver omap3430_idle_driver = { .desc = "MPU RET + CORE RET", }, { + .flags = CPUIDLE_FLAG_RCU_IDLE, .enter = omap3_enter_idle_bm, .exit_latency = 7580 + 4134, .target_residency = 484329, @@ -370,6 +383,7 @@ static struct cpuidle_driver omap3430_idle_driver = { .desc = "MPU OFF + CORE RET", }, { + .flags = CPUIDLE_FLAG_RCU_IDLE, .enter = omap3_enter_idle_bm, .exit_latency = 7505 + 15274, .target_residency = 484329, diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index de37027ad758..df106524d695 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -105,7 +105,7 @@ static int omap_enter_idle_smp(struct cpuidle_device *dev, } raw_spin_unlock_irqrestore(&mpu_lock, flag); - omap4_enter_lowpower(dev->cpu, cx->cpu_state); + omap4_enter_lowpower(dev->cpu, cx->cpu_state, true); raw_spin_lock_irqsave(&mpu_lock, flag); if (cx->mpu_state_vote == num_online_cpus()) @@ -151,10 +151,10 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, (cx->mpu_logic_state == PWRDM_POWER_OFF); /* Enter broadcast mode for periodic timers */ - RCU_NONIDLE(tick_broadcast_enable()); + tick_broadcast_enable(); /* Enter broadcast mode for one-shot timers */ - RCU_NONIDLE(tick_broadcast_enter()); + tick_broadcast_enter(); /* * Call idle CPU PM enter notifier chain so that @@ -166,7 +166,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, if (dev->cpu == 0) { pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state); - RCU_NONIDLE(omap_set_pwrdm_state(mpu_pd, cx->mpu_state)); + omap_set_pwrdm_state(mpu_pd, cx->mpu_state); /* * Call idle CPU cluster PM enter notifier chain @@ -178,13 +178,13 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, index = 0; cx = state_ptr + index; pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state); - RCU_NONIDLE(omap_set_pwrdm_state(mpu_pd, cx->mpu_state)); + omap_set_pwrdm_state(mpu_pd, cx->mpu_state); mpuss_can_lose_context = 0; } } } - omap4_enter_lowpower(dev->cpu, cx->cpu_state); + omap4_enter_lowpower(dev->cpu, cx->cpu_state, true); cpu_done[dev->cpu] = true; /* Wakeup CPU1 only if it is not offlined */ @@ -194,9 +194,9 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, mpuss_can_lose_context) gic_dist_disable(); - RCU_NONIDLE(clkdm_deny_idle(cpu_clkdm[1])); - RCU_NONIDLE(omap_set_pwrdm_state(cpu_pd[1], PWRDM_POWER_ON)); - RCU_NONIDLE(clkdm_allow_idle(cpu_clkdm[1])); + clkdm_deny_idle(cpu_clkdm[1]); + omap_set_pwrdm_state(cpu_pd[1], PWRDM_POWER_ON); + clkdm_allow_idle(cpu_clkdm[1]); if (IS_PM44XX_ERRATUM(PM_OMAP4_ROM_SMP_BOOT_ERRATUM_GICD) && mpuss_can_lose_context) { @@ -222,7 +222,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, cpu_pm_exit(); cpu_pm_out: - RCU_NONIDLE(tick_broadcast_exit()); + tick_broadcast_exit(); fail: cpuidle_coupled_parallel_barrier(dev, &abort_barrier); @@ -247,7 +247,8 @@ static struct cpuidle_driver omap4_idle_driver = { /* C2 - CPU0 OFF + CPU1 OFF + MPU CSWR */ .exit_latency = 328 + 440, .target_residency = 960, - .flags = CPUIDLE_FLAG_COUPLED, + .flags = CPUIDLE_FLAG_COUPLED | + CPUIDLE_FLAG_RCU_IDLE, .enter = omap_enter_idle_coupled, .name = "C2", .desc = "CPUx OFF, MPUSS CSWR", @@ -256,7 +257,8 @@ static struct cpuidle_driver omap4_idle_driver = { /* C3 - CPU0 OFF + CPU1 OFF + MPU OSWR */ .exit_latency = 460 + 518, .target_residency = 1100, - .flags = CPUIDLE_FLAG_COUPLED, + .flags = CPUIDLE_FLAG_COUPLED | + CPUIDLE_FLAG_RCU_IDLE, .enter = omap_enter_idle_coupled, .name = "C3", .desc = "CPUx OFF, MPUSS OSWR", @@ -282,7 +284,8 @@ static struct cpuidle_driver omap5_idle_driver = { /* C2 - CPU0 RET + CPU1 RET + MPU CSWR */ .exit_latency = 48 + 60, .target_residency = 100, - .flags = CPUIDLE_FLAG_TIMER_STOP, + .flags = CPUIDLE_FLAG_TIMER_STOP | + CPUIDLE_FLAG_RCU_IDLE, .enter = omap_enter_idle_smp, .name = "C2", .desc = "CPUx CSWR, MPUSS CSWR", diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c index 9fba98c2313a..7ad74db951f6 100644 --- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c +++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c @@ -33,6 +33,7 @@ * and first to wake-up when MPUSS low power states are excercised */ +#include <linux/cpuidle.h> #include <linux/kernel.h> #include <linux/io.h> #include <linux/errno.h> @@ -214,6 +215,7 @@ static void __init save_l2x0_context(void) * of OMAP4 MPUSS subsystem * @cpu : CPU ID * @power_state: Low power state. + * @rcuidle: RCU needs to be idled * * MPUSS states for the context save: * save_state = @@ -222,7 +224,8 @@ static void __init save_l2x0_context(void) * 2 - CPUx L1 and logic lost + GIC lost: MPUSS OSWR * 3 - CPUx L1 and logic lost + GIC + L2 lost: DEVICE OFF */ -int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state) +__cpuidle int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state, + bool rcuidle) { struct omap4_cpu_pm_info *pm_info = &per_cpu(omap4_pm_info, cpu); unsigned int save_state = 0, cpu_logic_state = PWRDM_POWER_RET; @@ -268,6 +271,10 @@ int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state) cpu_clear_prev_logic_pwrst(cpu); pwrdm_set_next_pwrst(pm_info->pwrdm, power_state); pwrdm_set_logic_retst(pm_info->pwrdm, cpu_logic_state); + + if (rcuidle) + ct_cpuidle_enter(); + set_cpu_wakeup_addr(cpu, __pa_symbol(omap_pm_ops.resume)); omap_pm_ops.scu_prepare(cpu, power_state); l2x0_pwrst_prepare(cpu, save_state); @@ -283,6 +290,9 @@ int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state) if (IS_PM44XX_ERRATUM(PM_OMAP4_ROM_SMP_BOOT_ERRATUM_GICD) && cpu) gic_dist_enable(); + if (rcuidle) + ct_cpuidle_exit(); + /* * Restore the CPUx power state to ON otherwise CPUx * power domain can transitions to programmed low power diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h index 90a341b0369c..f97ff93f2fb4 100644 --- a/arch/arm/mach-omap2/pm.h +++ b/arch/arm/mach-omap2/pm.h @@ -29,7 +29,7 @@ static inline int omap4_idle_init(void) extern void *omap3_secure_ram_storage; extern void omap3_pm_off_mode_enable(int); -extern void omap_sram_idle(void); +extern void omap_sram_idle(bool rcuidle); extern int omap_pm_clkdms_setup(struct clockdomain *clkdm, void *unused); extern int omap3_pm_get_suspend_state(struct powerdomain *pwrdm); diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index d73c7b692116..68975771e633 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -26,6 +26,7 @@ #include <linux/delay.h> #include <linux/slab.h> #include <linux/of.h> +#include <linux/cpuidle.h> #include <trace/events/power.h> @@ -174,7 +175,7 @@ static int omap34xx_do_sram_idle(unsigned long save_state) return 0; } -void omap_sram_idle(void) +__cpuidle void omap_sram_idle(bool rcuidle) { /* Variable to tell what needs to be saved and restored * in omap_sram_idle*/ @@ -254,11 +255,18 @@ void omap_sram_idle(void) */ if (save_state) omap34xx_save_context(omap3_arm_context); + + if (rcuidle) + ct_cpuidle_enter(); + if (save_state == 1 || save_state == 3) cpu_suspend(save_state, omap34xx_do_sram_idle); else omap34xx_do_sram_idle(save_state); + if (rcuidle) + ct_cpuidle_exit(); + /* Restore normal SDRC POWER settings */ if (cpu_is_omap3430() && omap_rev() >= OMAP3430_REV_ES3_0 && (omap_type() == OMAP2_DEVICE_TYPE_EMU || @@ -294,7 +302,7 @@ static void omap3_pm_idle(void) if (omap_irq_pending()) return; - omap_sram_idle(); + omap3_do_wfi(); } #ifdef CONFIG_SUSPEND @@ -316,7 +324,7 @@ static int omap3_pm_suspend(void) omap3_intc_suspend(); - omap_sram_idle(); + omap_sram_idle(false); restore: /* Restore next_pwrsts */ diff --git a/arch/arm/mach-omap2/pm44xx.c b/arch/arm/mach-omap2/pm44xx.c index 5a7a949ae965..f57802f3ee3a 100644 --- a/arch/arm/mach-omap2/pm44xx.c +++ b/arch/arm/mach-omap2/pm44xx.c @@ -76,7 +76,7 @@ static int omap4_pm_suspend(void) * domain CSWR is not supported by hardware. * More details can be found in OMAP4430 TRM section 4.3.4.2. */ - omap4_enter_lowpower(cpu_id, cpu_suspend_state); + omap4_enter_lowpower(cpu_id, cpu_suspend_state, false); /* Restore next powerdomain state */ list_for_each_entry(pwrst, &pwrst_list, node) { diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c index fd974514a7b2..777f9f8e7cd8 100644 --- a/arch/arm/mach-omap2/powerdomain.c +++ b/arch/arm/mach-omap2/powerdomain.c @@ -187,9 +187,9 @@ static int _pwrdm_state_switch(struct powerdomain *pwrdm, int flag) trace_state = (PWRDM_TRACE_STATES_FLAG | ((next & OMAP_POWERSTATE_MASK) << 8) | ((prev & OMAP_POWERSTATE_MASK) << 0)); - trace_power_domain_target_rcuidle(pwrdm->name, - trace_state, - raw_smp_processor_id()); + trace_power_domain_target(pwrdm->name, + trace_state, + raw_smp_processor_id()); } break; default: @@ -541,8 +541,8 @@ int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 pwrst) if (arch_pwrdm && arch_pwrdm->pwrdm_set_next_pwrst) { /* Trace the pwrdm desired target state */ - trace_power_domain_target_rcuidle(pwrdm->name, pwrst, - raw_smp_processor_id()); + trace_power_domain_target(pwrdm->name, pwrst, + raw_smp_processor_id()); /* Program the pwrdm desired target state */ ret = arch_pwrdm->pwrdm_set_next_pwrst(pwrdm, pwrst); } diff --git a/arch/arm/mach-s3c/cpuidle-s3c64xx.c b/arch/arm/mach-s3c/cpuidle-s3c64xx.c index b1c5f43d4922..27a13cc27893 100644 --- a/arch/arm/mach-s3c/cpuidle-s3c64xx.c +++ b/arch/arm/mach-s3c/cpuidle-s3c64xx.c @@ -19,9 +19,8 @@ #include "regs-sys-s3c64xx.h" #include "regs-syscon-power-s3c64xx.h" -static int s3c64xx_enter_idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, - int index) +static __cpuidle int s3c64xx_enter_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { unsigned long tmp; diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 4150e308e99c..42e19fff40ee 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -62,15 +62,15 @@ int acpi_processor_ffh_lpi_probe(unsigned int cpu) return psci_acpi_cpu_init_idle(cpu); } -int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) +__cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) { u32 state = lpi->address; if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags)) - return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM(psci_cpu_suspend_enter, + return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM_RCU(psci_cpu_suspend_enter, lpi->index, state); else - return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, + return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter, lpi->index, state); } #endif diff --git a/arch/arm64/kernel/idle.c b/arch/arm64/kernel/idle.c index a2cfbacec2bb..c1125753fe9b 100644 --- a/arch/arm64/kernel/idle.c +++ b/arch/arm64/kernel/idle.c @@ -42,5 +42,4 @@ void noinstr arch_cpu_idle(void) * tricks */ cpu_do_idle(); - raw_local_irq_enable(); } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index ffc5d76cf695..4e8327264255 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -865,7 +865,7 @@ static void do_handle_IPI(int ipinr) unsigned int cpu = smp_processor_id(); if ((unsigned)ipinr < NR_IPI) - trace_ipi_entry_rcuidle(ipi_types[ipinr]); + trace_ipi_entry(ipi_types[ipinr]); switch (ipinr) { case IPI_RESCHEDULE: @@ -914,7 +914,7 @@ static void do_handle_IPI(int ipinr) } if ((unsigned)ipinr < NR_IPI) - trace_ipi_exit_rcuidle(ipi_types[ipinr]); + trace_ipi_exit(ipi_types[ipinr]); } static irqreturn_t ipi_handler(int irq, void *data) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index e7163f31f716..0fbdf5fe64d8 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -4,6 +4,7 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/pgtable.h> +#include <linux/cpuidle.h> #include <asm/alternative.h> #include <asm/cacheflush.h> #include <asm/cpufeature.h> @@ -104,6 +105,10 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) * From this point debug exceptions are disabled to prevent * updates to mdscr register (saved and restored along with * general purpose registers) from kernel debuggers. + * + * Strictly speaking the trace_hardirqs_off() here is superfluous, + * hardirqs should be firmly off by now. This really ought to use + * something like raw_local_daif_save(). */ flags = local_daif_save(); @@ -120,6 +125,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ arm_cpuidle_save_irq_context(&context); + ct_cpuidle_enter(); + if (__cpu_suspend_enter(&state)) { /* Call the suspend finisher */ ret = fn(arg); @@ -133,8 +140,11 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ if (!ret) ret = -EOPNOTSUPP; + + ct_cpuidle_exit(); } else { - RCU_NONIDLE(__cpu_suspend_exit()); + ct_cpuidle_exit(); + __cpu_suspend_exit(); } arm_cpuidle_restore_irq_context(&context); diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 4c13dafc98b8..2777214cbf1a 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -175,7 +175,6 @@ SECTIONS ENTRY_TEXT TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT HYPERVISOR_TEXT diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index 2b0ed515a88e..0c6e4b17fe00 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -100,6 +100,5 @@ void arch_cpu_idle(void) #ifdef CONFIG_CPU_PM_STOP asm volatile("stop\n"); #endif - raw_local_irq_enable(); } #endif diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index 4b605aa2e1d6..b45d1073307f 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -309,7 +309,7 @@ void arch_cpu_idle_dead(void) while (!secondary_stack) arch_cpu_idle(); - local_irq_disable(); + raw_local_irq_disable(); asm volatile( "mov sp, %0\n" diff --git a/arch/csky/kernel/vmlinux.lds.S b/arch/csky/kernel/vmlinux.lds.S index 68c980d08482..d718961786d2 100644 --- a/arch/csky/kernel/vmlinux.lds.S +++ b/arch/csky/kernel/vmlinux.lds.S @@ -34,7 +34,6 @@ SECTIONS SOFTIRQENTRY_TEXT TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT *(.fixup) diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index e15eeaebd785..dd7f74ea2c20 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -44,7 +44,6 @@ void arch_cpu_idle(void) { __vmwait(); /* interrupts wake us up, but irqs are still disabled */ - raw_local_irq_enable(); } /* diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 57465bff1fe4..1140051a0c45 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -41,7 +41,6 @@ SECTIONS IRQENTRY_TEXT SOFTIRQENTRY_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT *(.fixup) diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 416305e550e2..f6195a0a00ae 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -242,6 +242,7 @@ void arch_cpu_idle(void) (*mark_idle)(1); raw_safe_halt(); + raw_local_irq_disable(); if (mark_idle) (*mark_idle)(0); diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index fa9c0ab8c6fc..83ef044b63ef 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -25,6 +25,7 @@ #include <linux/platform_device.h> #include <linux/sched/cputime.h> +#include <asm/cputime.h> #include <asm/delay.h> #include <asm/efi.h> #include <asm/hw_irq.h> diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 9b265783be6a..53dfde161c8a 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -51,7 +51,6 @@ SECTIONS { __end_ivt_text = .; TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/loongarch/kernel/idle.c b/arch/loongarch/kernel/idle.c index 1a65d0527d25..0b5dd2faeb90 100644 --- a/arch/loongarch/kernel/idle.c +++ b/arch/loongarch/kernel/idle.c @@ -13,4 +13,5 @@ void __cpuidle arch_cpu_idle(void) { raw_local_irq_enable(); __arch_cpu_idle(); /* idle instruction needs irq enabled */ + raw_local_irq_disable(); } diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 733b16e8d55d..78506b31ba61 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -43,7 +43,6 @@ SECTIONS .text : { TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/m68k/kernel/vmlinux-nommu.lds b/arch/m68k/kernel/vmlinux-nommu.lds index 387f334e87d3..2624fc18c131 100644 --- a/arch/m68k/kernel/vmlinux-nommu.lds +++ b/arch/m68k/kernel/vmlinux-nommu.lds @@ -48,7 +48,6 @@ SECTIONS { IRQENTRY_TEXT SOFTIRQENTRY_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT *(.fixup) . = ALIGN(16); diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index ed1d9eda3190..1ccdd04ae462 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -19,7 +19,6 @@ SECTIONS IRQENTRY_TEXT SOFTIRQENTRY_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT *(.fixup) *(.gnu.warning) diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index 4a52f44f2ef0..f13ddcc2af5c 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds @@ -19,7 +19,6 @@ SECTIONS IRQENTRY_TEXT SOFTIRQENTRY_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT *(.fixup) *(.gnu.warning) diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 1f802aab2b96..56342e11442d 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -140,5 +140,4 @@ int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu) void arch_cpu_idle(void) { - raw_local_irq_enable(); } diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S index fb31747ec092..ae50d3d04a7d 100644 --- a/arch/microblaze/kernel/vmlinux.lds.S +++ b/arch/microblaze/kernel/vmlinux.lds.S @@ -36,7 +36,6 @@ SECTIONS { EXIT_TEXT EXIT_CALL SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 53adcc1b2ed5..5abc8b7340f8 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -33,13 +33,13 @@ static void __cpuidle r3081_wait(void) { unsigned long cfg = read_c0_conf(); write_c0_conf(cfg | R30XX_CONF_HALT); - raw_local_irq_enable(); } void __cpuidle r4k_wait(void) { raw_local_irq_enable(); __r4k_wait(); + raw_local_irq_disable(); } /* @@ -57,7 +57,6 @@ void __cpuidle r4k_wait_irqoff(void) " .set arch=r4000 \n" " wait \n" " .set pop \n"); - raw_local_irq_enable(); } /* @@ -77,7 +76,6 @@ static void __cpuidle rm7k_wait_irqoff(void) " wait \n" " mtc0 $1, $12 # stalls until W stage \n" " .set pop \n"); - raw_local_irq_enable(); } /* @@ -103,6 +101,8 @@ static void __cpuidle au1k_wait(void) " nop \n" " .set pop \n" : : "r" (au1k_wait), "r" (c0status)); + + raw_local_irq_disable(); } static int __initdata nowait; @@ -241,18 +241,16 @@ void __init check_wait(void) } } -void arch_cpu_idle(void) +__cpuidle void arch_cpu_idle(void) { if (cpu_wait) cpu_wait(); - else - raw_local_irq_enable(); } #ifdef CONFIG_CPU_IDLE -int mips_cpuidle_wait_enter(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) +__cpuidle int mips_cpuidle_wait_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { arch_cpu_idle(); return index; diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 1f98947fe715..52cbde60edf5 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -61,7 +61,6 @@ SECTIONS .text : { TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c index 29593b98567d..f84021303f6a 100644 --- a/arch/nios2/kernel/process.c +++ b/arch/nios2/kernel/process.c @@ -33,7 +33,6 @@ EXPORT_SYMBOL(pm_power_off); void arch_cpu_idle(void) { - raw_local_irq_enable(); } /* diff --git a/arch/nios2/kernel/vmlinux.lds.S b/arch/nios2/kernel/vmlinux.lds.S index 126e114744cb..37b958055064 100644 --- a/arch/nios2/kernel/vmlinux.lds.S +++ b/arch/nios2/kernel/vmlinux.lds.S @@ -24,7 +24,6 @@ SECTIONS .text : { TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index f94b5ec06786..dfa558f98ed8 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -102,6 +102,7 @@ void arch_cpu_idle(void) raw_local_irq_enable(); if (mfspr(SPR_UPR) & SPR_UPR_PMP) mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME); + raw_local_irq_disable(); } void (*pm_power_off)(void) = NULL; diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S index d5c7bb0fae57..bc1306047837 100644 --- a/arch/openrisc/kernel/vmlinux.lds.S +++ b/arch/openrisc/kernel/vmlinux.lds.S @@ -52,7 +52,6 @@ SECTIONS _stext = .; TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index c4f8374c7018..c064719b49b0 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -183,8 +183,6 @@ void arch_cpu_idle_dead(void) void __cpuidle arch_cpu_idle(void) { - raw_local_irq_enable(); - /* nop on real hardware, qemu will idle sleep. */ asm volatile("or %%r10,%%r10,%%r10\n":::); } diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 2769eb991f58..1aaa2ca09800 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -86,7 +86,6 @@ SECTIONS TEXT_TEXT LOCK_TEXT SCHED_TEXT - CPUIDLE_TEXT KPROBES_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 77cd4c5a2d63..b9a725abc596 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -51,10 +51,9 @@ void arch_cpu_idle(void) * Some power_save functions return with * interrupts enabled, some don't. */ - if (irqs_disabled()) - raw_local_irq_enable(); + if (!irqs_disabled()) + raw_local_irq_disable(); } else { - raw_local_irq_enable(); /* * Go into low thread priority and possibly * low power mode. diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 958e77a24f85..f128c7cf9c1d 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -112,7 +112,6 @@ SECTIONS #endif NOINSTR_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 8955f2432c2d..774ffde386ab 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -39,7 +39,6 @@ extern asmlinkage void ret_from_kernel_thread(void); void arch_cpu_idle(void) { cpu_do_idle(); - raw_local_irq_enable(); } void __show_regs(struct pt_regs *regs) diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index 75e0fa8a700a..eab9edc3b631 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -39,7 +39,6 @@ SECTIONS _stext = .; TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT ENTRY_TEXT diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 4e6c88aa4d87..643ab60e9efb 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -42,7 +42,6 @@ SECTIONS _stext = .; TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT ENTRY_TEXT diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 4bf1ee293f2b..b04fb418307c 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -12,9 +12,9 @@ #include <linux/notifier.h> #include <linux/init.h> #include <linux/cpu.h> -#include <linux/sched/cputime.h> #include <trace/events/power.h> #include <asm/cpu_mf.h> +#include <asm/cputime.h> #include <asm/nmi.h> #include <asm/smp.h> #include "entry.h" @@ -66,7 +66,6 @@ void arch_cpu_idle(void) idle->idle_count++; account_idle_time(cputime_to_nsecs(idle_time)); raw_write_seqcount_end(&idle->seqcount); - raw_local_irq_enable(); } static ssize_t show_idle_count(struct device *dev, diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index cbf9c1b0beda..20262e3c0cff 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -44,7 +44,6 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 9436f3053b88..e0a88dcaf5cb 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -7,13 +7,13 @@ */ #include <linux/kernel_stat.h> -#include <linux/sched/cputime.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/timex.h> #include <linux/types.h> #include <linux/time.h> #include <asm/alternative.h> +#include <asm/cputime.h> #include <asm/vtimer.h> #include <asm/vtime.h> #include <asm/cpu_mf.h> diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index f59814983bd5..3418c40f0099 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -25,6 +25,7 @@ void default_idle(void) raw_local_irq_enable(); /* Isn't this racy ? */ cpu_sleep(); + raw_local_irq_disable(); clear_bl_bit(); } diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index b6276a3521d7..9644fe187a3f 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -30,7 +30,6 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/sparc/kernel/leon_pmc.c b/arch/sparc/kernel/leon_pmc.c index 396f46bca52e..6c00cbad7fb5 100644 --- a/arch/sparc/kernel/leon_pmc.c +++ b/arch/sparc/kernel/leon_pmc.c @@ -57,6 +57,8 @@ static void pmc_leon_idle_fixup(void) "lda [%0] %1, %%g0\n" : : "r"(address), "i"(ASI_LEON_BYPASS)); + + raw_local_irq_disable(); } /* @@ -70,6 +72,8 @@ static void pmc_leon_idle(void) /* For systems without power-down, this will be no-op */ __asm__ __volatile__ ("wr %g0, %asr19\n\t"); + + raw_local_irq_disable(); } /* Install LEON Power Down function */ diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 33b0215a4182..9c7c662cb565 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -71,7 +71,6 @@ void arch_cpu_idle(void) { if (sparc_idle) (*sparc_idle)(); - raw_local_irq_enable(); } /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */ diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 6335b698a4b4..91c2b8124527 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -59,7 +59,6 @@ void arch_cpu_idle(void) { if (tlb_type != hypervisor) { touch_nmi_watchdog(); - raw_local_irq_enable(); } else { unsigned long pstate; @@ -90,6 +89,8 @@ void arch_cpu_idle(void) "wrpr %0, %%g0, %%pstate" : "=&r" (pstate) : "i" (PSTATE_IE)); + + raw_local_irq_disable(); } } diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index d55ae65a07ad..d317a843f7ea 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -50,7 +50,6 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 2b7fc5b54164..3385d653ebd0 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -74,7 +74,6 @@ SECTIONS _stext = .; TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 47830ade35ed..106b7da2f8d6 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -218,7 +218,6 @@ void arch_cpu_idle(void) { cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); um_idle_sleep(); - raw_local_irq_enable(); } int __cant_sleep(void) { diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 71a59b8adbdc..5c92d58a78e8 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -35,7 +35,6 @@ SECTIONS _stext = .; TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 112b2375d021..b22f34b8684a 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -34,6 +34,7 @@ SECTIONS _text = .; /* Text */ *(.text) *(.text.*) + *(.noinstr.text) _etext = . ; } .rodata : { diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S index f9eb1134f22d..ad0d51f03cb4 100644 --- a/arch/x86/coco/tdx/tdcall.S +++ b/arch/x86/coco/tdx/tdcall.S @@ -31,6 +31,8 @@ TDX_R12 | TDX_R13 | \ TDX_R14 | TDX_R15 ) +.section .noinstr.text, "ax" + /* * __tdx_module_call() - Used by TDX guests to request services from * the TDX module (does not include VMM services) using TDCALL instruction. @@ -139,19 +141,6 @@ SYM_FUNC_START(__tdx_hypercall) movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx - /* - * For the idle loop STI needs to be called directly before the TDCALL - * that enters idle (EXIT_REASON_HLT case). STI instruction enables - * interrupts only one instruction later. If there is a window between - * STI and the instruction that emulates the HALT state, there is a - * chance for interrupts to happen in this window, which can delay the - * HLT operation indefinitely. Since this is the not the desired - * result, conditionally call STI before TDCALL. - */ - testq $TDX_HCALL_ISSUE_STI, %rsi - jz .Lskip_sti - sti -.Lskip_sti: tdcall /* diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 669d9e4f2901..3bd111d5e6a0 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -64,8 +64,9 @@ static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) } /* Called from __tdx_hypercall() for unrecoverable failure */ -void __tdx_hypercall_failed(void) +noinstr void __tdx_hypercall_failed(void) { + instrumentation_begin(); panic("TDVMCALL failed. TDX module bug?"); } @@ -75,7 +76,7 @@ void __tdx_hypercall_failed(void) * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and * guest sides of these calls. */ -static u64 hcall_func(u64 exit_reason) +static __always_inline u64 hcall_func(u64 exit_reason) { return exit_reason; } @@ -220,7 +221,7 @@ static int ve_instr_len(struct ve_info *ve) } } -static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti) +static u64 __cpuidle __halt(const bool irq_disabled) { struct tdx_hypercall_args args = { .r10 = TDX_HYPERCALL_STANDARD, @@ -240,20 +241,14 @@ static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti) * can keep the vCPU in virtual HLT, even if an IRQ is * pending, without hanging/breaking the guest. */ - return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0); + return __tdx_hypercall(&args, 0); } static int handle_halt(struct ve_info *ve) { - /* - * Since non safe halt is mainly used in CPU offlining - * and the guest will always stay in the halt state, don't - * call the STI instruction (set do_sti as false). - */ const bool irq_disabled = irqs_disabled(); - const bool do_sti = false; - if (__halt(irq_disabled, do_sti)) + if (__halt(irq_disabled)) return -EIO; return ve_instr_len(ve); @@ -261,18 +256,12 @@ static int handle_halt(struct ve_info *ve) void __cpuidle tdx_safe_halt(void) { - /* - * For do_sti=true case, __tdx_hypercall() function enables - * interrupts using the STI instruction before the TDCALL. So - * set irq_disabled as false. - */ const bool irq_disabled = false; - const bool do_sti = true; /* * Use WARN_ONCE() to report the failure. */ - if (__halt(irq_disabled, do_sti)) + if (__halt(irq_disabled)) WARN_ONCE(1, "HLT instruction emulation failed\n"); } diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c index 58461fa18b6f..ed308719236c 100644 --- a/arch/x86/events/amd/brs.c +++ b/arch/x86/events/amd/brs.c @@ -41,18 +41,15 @@ static inline unsigned int brs_to(int idx) return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1; } -static inline void set_debug_extn_cfg(u64 val) +static __always_inline void set_debug_extn_cfg(u64 val) { /* bits[4:3] must always be set to 11b */ - wrmsrl(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3); + __wrmsr(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3, val >> 32); } -static inline u64 get_debug_extn_cfg(void) +static __always_inline u64 get_debug_extn_cfg(void) { - u64 val; - - rdmsrl(MSR_AMD_DBG_EXTN_CFG, val); - return val; + return __rdmsr(MSR_AMD_DBG_EXTN_CFG); } static bool __init amd_brs_detect(void) @@ -405,7 +402,7 @@ void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_i * called from ACPI processor_idle.c or acpi_pad.c * with interrupts disabled */ -void perf_amd_brs_lopwr_cb(bool lopwr_in) +void noinstr perf_amd_brs_lopwr_cb(bool lopwr_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); union amd_debug_extn_cfg cfg; diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 5efd01b548d1..808b4eece251 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -71,7 +71,7 @@ ATOMIC64_DECL(add_unless); * the old value. */ -static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) +static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) { return arch_cmpxchg64(&v->counter, o, n); } @@ -85,7 +85,7 @@ static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) * Atomically xchgs the value of @v to @n and returns * the old value. */ -static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) +static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) { s64 o; unsigned high = (unsigned)(n >> 32); @@ -104,7 +104,7 @@ static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) * * Atomically sets the value of @v to @n. */ -static inline void arch_atomic64_set(atomic64_t *v, s64 i) +static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { unsigned high = (unsigned)(i >> 32); unsigned low = (unsigned)i; @@ -119,7 +119,7 @@ static inline void arch_atomic64_set(atomic64_t *v, s64 i) * * Atomically reads the value of @v and returns it. */ -static inline s64 arch_atomic64_read(const atomic64_t *v) +static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { s64 r; alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); @@ -133,7 +133,7 @@ static inline s64 arch_atomic64_read(const atomic64_t *v) * * Atomically adds @i to @v and returns @i + *@v */ -static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { alternative_atomic64(add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -145,7 +145,7 @@ static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) /* * Other variants with different arithmetic operators: */ -static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { alternative_atomic64(sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -154,7 +154,7 @@ static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) } #define arch_atomic64_sub_return arch_atomic64_sub_return -static inline s64 arch_atomic64_inc_return(atomic64_t *v) +static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v) { s64 a; alternative_atomic64(inc_return, "=&A" (a), @@ -163,7 +163,7 @@ static inline s64 arch_atomic64_inc_return(atomic64_t *v) } #define arch_atomic64_inc_return arch_atomic64_inc_return -static inline s64 arch_atomic64_dec_return(atomic64_t *v) +static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) { s64 a; alternative_atomic64(dec_return, "=&A" (a), @@ -179,7 +179,7 @@ static inline s64 arch_atomic64_dec_return(atomic64_t *v) * * Atomically adds @i to @v. */ -static inline s64 arch_atomic64_add(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v) { __alternative_atomic64(add, add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -194,7 +194,7 @@ static inline s64 arch_atomic64_add(s64 i, atomic64_t *v) * * Atomically subtracts @i from @v. */ -static inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) { __alternative_atomic64(sub, sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -208,7 +208,7 @@ static inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) * * Atomically increments @v by 1. */ -static inline void arch_atomic64_inc(atomic64_t *v) +static __always_inline void arch_atomic64_inc(atomic64_t *v) { __alternative_atomic64(inc, inc_return, /* no output */, "S" (v) : "memory", "eax", "ecx", "edx"); @@ -221,7 +221,7 @@ static inline void arch_atomic64_inc(atomic64_t *v) * * Atomically decrements @v by 1. */ -static inline void arch_atomic64_dec(atomic64_t *v) +static __always_inline void arch_atomic64_dec(atomic64_t *v) { __alternative_atomic64(dec, dec_return, /* no output */, "S" (v) : "memory", "eax", "ecx", "edx"); @@ -237,7 +237,7 @@ static inline void arch_atomic64_dec(atomic64_t *v) * Atomically adds @a to @v, so long as it was not @u. * Returns non-zero if the add was done, zero otherwise. */ -static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) +static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { unsigned low = (unsigned)u; unsigned high = (unsigned)(u >> 32); @@ -248,7 +248,7 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) } #define arch_atomic64_add_unless arch_atomic64_add_unless -static inline int arch_atomic64_inc_not_zero(atomic64_t *v) +static __always_inline int arch_atomic64_inc_not_zero(atomic64_t *v) { int r; alternative_atomic64(inc_not_zero, "=&a" (r), @@ -257,7 +257,7 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v) } #define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero -static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) +static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { s64 r; alternative_atomic64(dec_if_positive, "=&A" (r), @@ -269,7 +269,7 @@ static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) #undef alternative_atomic64 #undef __alternative_atomic64 -static inline void arch_atomic64_and(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v) { s64 old, c = 0; @@ -277,7 +277,7 @@ static inline void arch_atomic64_and(s64 i, atomic64_t *v) c = old; } -static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { s64 old, c = 0; @@ -288,7 +288,7 @@ static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) } #define arch_atomic64_fetch_and arch_atomic64_fetch_and -static inline void arch_atomic64_or(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v) { s64 old, c = 0; @@ -296,7 +296,7 @@ static inline void arch_atomic64_or(s64 i, atomic64_t *v) c = old; } -static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { s64 old, c = 0; @@ -307,7 +307,7 @@ static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) } #define arch_atomic64_fetch_or arch_atomic64_fetch_or -static inline void arch_atomic64_xor(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v) { s64 old, c = 0; @@ -315,7 +315,7 @@ static inline void arch_atomic64_xor(s64 i, atomic64_t *v) c = old; } -static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { s64 old, c = 0; @@ -326,7 +326,7 @@ static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) } #define arch_atomic64_fetch_xor arch_atomic64_fetch_xor -static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { s64 old, c = 0; diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 7886d0578fc9..c496595bf601 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -17,7 +17,7 @@ * Atomically reads the value of @v. * Doesn't imply a read memory barrier. */ -static inline s64 arch_atomic64_read(const atomic64_t *v) +static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { return __READ_ONCE((v)->counter); } @@ -29,7 +29,7 @@ static inline s64 arch_atomic64_read(const atomic64_t *v) * * Atomically sets the value of @v to @i. */ -static inline void arch_atomic64_set(atomic64_t *v, s64 i) +static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { __WRITE_ONCE(v->counter, i); } @@ -55,7 +55,7 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) * * Atomically subtracts @i from @v. */ -static inline void arch_atomic64_sub(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "subq %1,%0" : "=m" (v->counter) @@ -71,7 +71,7 @@ static inline void arch_atomic64_sub(s64 i, atomic64_t *v) * true if the result is zero, or false for all * other cases. */ -static inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) +static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i); } @@ -113,7 +113,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v) * returns true if the result is 0, or false for all other * cases. */ -static inline bool arch_atomic64_dec_and_test(atomic64_t *v) +static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e); } @@ -127,7 +127,7 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static inline bool arch_atomic64_inc_and_test(atomic64_t *v) +static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e); } @@ -142,7 +142,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) +static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i); } @@ -161,25 +161,25 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) } #define arch_atomic64_add_return arch_atomic64_add_return -static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { return arch_atomic64_add_return(-i, v); } #define arch_atomic64_sub_return arch_atomic64_sub_return -static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { return xadd(&v->counter, i); } #define arch_atomic64_fetch_add arch_atomic64_fetch_add -static inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v) { return xadd(&v->counter, -i); } #define arch_atomic64_fetch_sub arch_atomic64_fetch_sub -static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) +static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { return arch_cmpxchg(&v->counter, old, new); } @@ -191,13 +191,13 @@ static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s } #define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg -static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) +static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) { return arch_xchg(&v->counter, new); } #define arch_atomic64_xchg arch_atomic64_xchg -static inline void arch_atomic64_and(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "andq %1,%0" : "+m" (v->counter) @@ -205,7 +205,7 @@ static inline void arch_atomic64_and(s64 i, atomic64_t *v) : "memory"); } -static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); @@ -215,7 +215,7 @@ static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) } #define arch_atomic64_fetch_and arch_atomic64_fetch_and -static inline void arch_atomic64_or(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "orq %1,%0" : "+m" (v->counter) @@ -223,7 +223,7 @@ static inline void arch_atomic64_or(s64 i, atomic64_t *v) : "memory"); } -static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); @@ -233,7 +233,7 @@ static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) } #define arch_atomic64_fetch_or arch_atomic64_fetch_or -static inline void arch_atomic64_xor(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "xorq %1,%0" : "+m" (v->counter) @@ -241,7 +241,7 @@ static inline void arch_atomic64_xor(s64 i, atomic64_t *v) : "memory"); } -static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h index 9656a5bc6fea..9a710c060445 100644 --- a/arch/x86/include/asm/fpu/xcr.h +++ b/arch/x86/include/asm/fpu/xcr.h @@ -5,7 +5,7 @@ #define XCR_XFEATURE_ENABLED_MASK 0x00000000 #define XCR_XFEATURE_IN_USE_MASK 0x00000001 -static inline u64 xgetbv(u32 index) +static __always_inline u64 xgetbv(u32 index) { u32 eax, edx; @@ -27,7 +27,7 @@ static inline void xsetbv(u32 index, u64 value) * * Callers should check X86_FEATURE_XGETBV1. */ -static inline u64 xfeatures_in_use(void) +static __always_inline u64 xfeatures_in_use(void) { return xgetbv(XCR_XFEATURE_IN_USE_MASK); } diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 7793e52d6237..8c5ae649d2df 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -8,9 +8,6 @@ #include <asm/nospec-branch.h> -/* Provide __cpuidle; we can't safely include <linux/cpu.h> */ -#define __cpuidle __section(".cpuidle.text") - /* * Interrupt control: */ @@ -45,13 +42,13 @@ static __always_inline void native_irq_enable(void) asm volatile("sti": : :"memory"); } -static inline __cpuidle void native_safe_halt(void) +static __always_inline void native_safe_halt(void) { mds_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } -static inline __cpuidle void native_halt(void) +static __always_inline void native_halt(void) { mds_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); @@ -84,7 +81,7 @@ static __always_inline void arch_local_irq_enable(void) * Used in the idle loop; sti takes one instruction cycle * to complete: */ -static inline __cpuidle void arch_safe_halt(void) +static __always_inline void arch_safe_halt(void) { native_safe_halt(); } @@ -93,7 +90,7 @@ static inline __cpuidle void arch_safe_halt(void) * Used when interrupts are already enabled or to * shutdown the processor: */ -static inline __cpuidle void halt(void) +static __always_inline void halt(void) { native_halt(); } diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h index 6c5765192102..511b35069187 100644 --- a/arch/x86/include/asm/kvmclock.h +++ b/arch/x86/include/asm/kvmclock.h @@ -8,7 +8,7 @@ extern struct clocksource kvm_clock; DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); -static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) +static __always_inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) { return &this_cpu_read(hv_clock_per_cpu)->pvti; } diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 3a8fdf881313..778df05f8539 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -26,7 +26,7 @@ #define TPAUSE_C01_STATE 1 #define TPAUSE_C02_STATE 0 -static inline void __monitor(const void *eax, unsigned long ecx, +static __always_inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) { /* "monitor %eax, %ecx, %edx;" */ @@ -34,7 +34,7 @@ static inline void __monitor(const void *eax, unsigned long ecx, :: "a" (eax), "c" (ecx), "d"(edx)); } -static inline void __monitorx(const void *eax, unsigned long ecx, +static __always_inline void __monitorx(const void *eax, unsigned long ecx, unsigned long edx) { /* "monitorx %eax, %ecx, %edx;" */ @@ -42,7 +42,7 @@ static inline void __monitorx(const void *eax, unsigned long ecx, :: "a" (eax), "c" (ecx), "d"(edx)); } -static inline void __mwait(unsigned long eax, unsigned long ecx) +static __always_inline void __mwait(unsigned long eax, unsigned long ecx) { mds_idle_clear_cpu_buffers(); @@ -77,8 +77,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) * EAX (logical) address to monitor * ECX #GP if not zero */ -static inline void __mwaitx(unsigned long eax, unsigned long ebx, - unsigned long ecx) +static __always_inline void __mwaitx(unsigned long eax, unsigned long ebx, + unsigned long ecx) { /* No MDS buffer clear as this is AMD/HYGON only */ @@ -87,7 +87,7 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, :: "a" (eax), "b" (ebx), "c" (ecx)); } -static inline void __sti_mwait(unsigned long eax, unsigned long ecx) +static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx) { mds_idle_clear_cpu_buffers(); /* "mwait %eax, %ecx;" */ @@ -105,7 +105,7 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) * New with Core Duo processors, MWAIT can take some hints based on CPU * capability. */ -static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) +static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) { if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) { diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 771b0a2b7a34..e04313e89f4f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -564,7 +564,7 @@ static __always_inline void mds_user_clear_cpu_buffers(void) * * Clear CPU buffers if the corresponding static key is enabled */ -static inline void mds_idle_clear_cpu_buffers(void) +static __always_inline void mds_idle_clear_cpu_buffers(void) { if (static_branch_likely(&mds_idle_clear)) mds_clear_cpu_buffers(); diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 73e9522db7c1..cf40e813b3d7 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -26,7 +26,7 @@ DECLARE_STATIC_CALL(pv_sched_clock, dummy_sched_clock); void paravirt_set_sched_clock(u64 (*func)(void)); -static inline u64 paravirt_sched_clock(void) +static __always_inline u64 paravirt_sched_clock(void) { return static_call(pv_sched_clock)(); } @@ -168,7 +168,7 @@ static inline void __write_cr4(unsigned long x) PVOP_VCALL1(cpu.write_cr4, x); } -static inline void arch_safe_halt(void) +static __always_inline void arch_safe_halt(void) { PVOP_VCALL0(irq.safe_halt); } @@ -178,7 +178,9 @@ static inline void halt(void) PVOP_VCALL0(irq.halt); } -static inline void wbinvd(void) +extern noinstr void pv_native_wbinvd(void); + +static __always_inline void wbinvd(void) { PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV)); } diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 6496bdbcac98..8fc15ed5e60b 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -586,7 +586,7 @@ extern void perf_amd_brs_lopwr_cb(bool lopwr_in); DECLARE_STATIC_CALL(perf_lopwr_cb, perf_amd_brs_lopwr_cb); -static inline void perf_lopwr_cb(bool lopwr_in) +static __always_inline void perf_lopwr_cb(bool lopwr_in) { static_call_mod(perf_lopwr_cb)(lopwr_in); } diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 19b695ff2c68..0c92db84469d 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -7,6 +7,7 @@ /* some helper functions for xen and kvm pv clock sources */ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); +u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src); u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src); void pvclock_set_flags(u8 flags); unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); @@ -39,7 +40,7 @@ bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src, * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. */ -static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) +static __always_inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) { u64 product; #ifdef __i386__ diff --git a/arch/x86/include/asm/shared/io.h b/arch/x86/include/asm/shared/io.h index c0ef921c0586..8009d781c2f9 100644 --- a/arch/x86/include/asm/shared/io.h +++ b/arch/x86/include/asm/shared/io.h @@ -5,13 +5,13 @@ #include <linux/types.h> #define BUILDIO(bwl, bw, type) \ -static inline void __out##bwl(type value, u16 port) \ +static __always_inline void __out##bwl(type value, u16 port) \ { \ asm volatile("out" #bwl " %" #bw "0, %w1" \ : : "a"(value), "Nd"(port)); \ } \ \ -static inline type __in##bwl(u16 port) \ +static __always_inline type __in##bwl(u16 port) \ { \ type value; \ asm volatile("in" #bwl " %w1, %" #bw "0" \ diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index e53f26228fbb..559176887791 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -8,7 +8,6 @@ #define TDX_HYPERCALL_STANDARD 0 #define TDX_HCALL_HAS_OUTPUT BIT(0) -#define TDX_HCALL_ISSUE_STI BIT(1) #define TDX_CPUID_LEAF_ID 0x21 #define TDX_IDENT "IntelTDX " diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 35f709f619fb..f358a23f228d 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -115,7 +115,7 @@ static inline void wrpkru(u32 pkru) } #endif -static inline void native_wbinvd(void) +static __always_inline void native_wbinvd(void) { asm volatile("wbinvd": : :"memory"); } @@ -179,7 +179,7 @@ static inline void __write_cr4(unsigned long x) native_write_cr4(x); } -static inline void wbinvd(void) +static __always_inline void wbinvd(void) { native_wbinvd(); } @@ -196,7 +196,7 @@ static inline void load_gs_index(unsigned int selector) #endif /* CONFIG_PARAVIRT_XXL */ -static inline void clflush(volatile void *__p) +static __always_inline void clflush(volatile void *__p) { asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); } @@ -295,7 +295,7 @@ static inline int enqcmds(void __iomem *dst, const void *src) return 0; } -static inline void tile_release(void) +static __always_inline void tile_release(void) { /* * Instruction opcode for TILERELEASE; supported in binutils diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index e5e0fe10c692..a2dd24947eb8 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -382,7 +382,7 @@ MULTI_stack_switch(struct multicall_entry *mcl, } #endif -static inline int +static __always_inline int HYPERVISOR_sched_op(int cmd, void *arg) { return _hypercall2(int, sched_op, cmd, arg); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bca0bd8f4846..85168740f76a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -86,7 +86,7 @@ void update_spec_ctrl_cond(u64 val) wrmsrl(MSR_IA32_SPEC_CTRL, val); } -u64 spec_ctrl_current(void) +noinstr u64 spec_ctrl_current(void) { return this_cpu_read(x86_spec_ctrl_current); } diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 02039ec3597d..11f83d07925e 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -143,7 +143,7 @@ static __init int parse_no_stealacc(char *arg) } early_param("no-steal-acc", parse_no_stealacc); -static unsigned long long notrace vmware_sched_clock(void) +static noinstr u64 vmware_sched_clock(void) { unsigned long long ns; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 9baa89a8877d..dccce58201b7 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -853,12 +853,12 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr) * Initialize register state that may prevent from entering low-power idle. * This function will be invoked from the cpuidle driver only when needed. */ -void fpu_idle_fpregs(void) +noinstr void fpu_idle_fpregs(void) { /* Note: AMX_TILE being enabled implies XGETBV1 support */ if (cpu_feature_enabled(X86_FEATURE_AMX_TILE) && (xfeatures_in_use() & XFEATURE_MASK_XTILE)) { tile_release(); - fpregs_deactivate(¤t->thread.fpu); + __this_cpu_write(fpu_fpregs_owner_ctx, NULL); } } diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 16333ba1904b..0f35d44c56fe 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -71,12 +71,12 @@ static int kvm_set_wallclock(const struct timespec64 *now) return -ENODEV; } -static u64 kvm_clock_read(void) +static noinstr u64 kvm_clock_read(void) { u64 ret; preempt_disable_notrace(); - ret = pvclock_clocksource_read(this_cpu_pvti()); + ret = pvclock_clocksource_read_nowd(this_cpu_pvti()); preempt_enable_notrace(); return ret; } @@ -86,7 +86,7 @@ static u64 kvm_clock_get_cycles(struct clocksource *cs) return kvm_clock_read(); } -static u64 kvm_sched_clock_read(void) +static noinstr u64 kvm_sched_clock_read(void) { return kvm_clock_read() - kvm_sched_clock_offset; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 327757afb027..5bf4f0b2f35d 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -216,6 +216,11 @@ static noinstr void pv_native_set_debugreg(int regno, unsigned long val) native_set_debugreg(regno, val); } +noinstr void pv_native_wbinvd(void) +{ + native_wbinvd(); +} + static noinstr void pv_native_irq_enable(void) { native_irq_enable(); @@ -225,6 +230,11 @@ static noinstr void pv_native_irq_disable(void) { native_irq_disable(); } + +static noinstr void pv_native_safe_halt(void) +{ + native_safe_halt(); +} #endif enum paravirt_lazy_mode paravirt_get_lazy_mode(void) @@ -256,7 +266,7 @@ struct paravirt_patch_template pv_ops = { .cpu.read_cr0 = native_read_cr0, .cpu.write_cr0 = native_write_cr0, .cpu.write_cr4 = native_write_cr4, - .cpu.wbinvd = native_wbinvd, + .cpu.wbinvd = pv_native_wbinvd, .cpu.read_msr = native_read_msr, .cpu.write_msr = native_write_msr, .cpu.read_msr_safe = native_read_msr_safe, @@ -290,7 +300,7 @@ struct paravirt_patch_template pv_ops = { .irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), .irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable), .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable), - .irq.safe_halt = native_safe_halt, + .irq.safe_halt = pv_native_safe_halt, .irq.halt = native_halt, #endif /* CONFIG_PARAVIRT_XXL */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 40d156a31676..e57cd31bfec4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -24,6 +24,7 @@ #include <linux/cpuidle.h> #include <linux/acpi.h> #include <linux/elf-randomize.h> +#include <linux/static_call.h> #include <trace/events/power.h> #include <linux/hw_breakpoint.h> #include <asm/cpu.h> @@ -694,7 +695,24 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; EXPORT_SYMBOL(boot_option_idle_override); -static void (*x86_idle)(void); +/* + * We use this if we don't have any better idle routine.. + */ +void __cpuidle default_idle(void) +{ + raw_safe_halt(); + raw_local_irq_disable(); +} +#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) +EXPORT_SYMBOL(default_idle); +#endif + +DEFINE_STATIC_CALL_NULL(x86_idle, default_idle); + +static bool x86_idle_set(void) +{ + return !!static_call_query(x86_idle); +} #ifndef CONFIG_SMP static inline void play_dead(void) @@ -717,28 +735,17 @@ void arch_cpu_idle_dead(void) /* * Called from the generic idle code. */ -void arch_cpu_idle(void) -{ - x86_idle(); -} - -/* - * We use this if we don't have any better idle routine.. - */ -void __cpuidle default_idle(void) +void __cpuidle arch_cpu_idle(void) { - raw_safe_halt(); + static_call(x86_idle)(); } -#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) -EXPORT_SYMBOL(default_idle); -#endif #ifdef CONFIG_XEN bool xen_set_default_idle(void) { - bool ret = !!x86_idle; + bool ret = x86_idle_set(); - x86_idle = default_idle; + static_call_update(x86_idle, default_idle); return ret; } @@ -800,13 +807,7 @@ static void amd_e400_idle(void) default_idle(); - /* - * The switch back from broadcast mode needs to be called with - * interrupts disabled. - */ - raw_local_irq_disable(); tick_broadcast_exit(); - raw_local_irq_enable(); } /* @@ -864,12 +865,10 @@ static __cpuidle void mwait_idle(void) } __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (!need_resched()) + if (!need_resched()) { __sti_mwait(0, 0); - else - raw_local_irq_enable(); - } else { - raw_local_irq_enable(); + raw_local_irq_disable(); + } } __current_clr_polling(); } @@ -880,20 +879,20 @@ void select_idle_routine(const struct cpuinfo_x86 *c) if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); #endif - if (x86_idle || boot_option_idle_override == IDLE_POLL) + if (x86_idle_set() || boot_option_idle_override == IDLE_POLL) return; if (boot_cpu_has_bug(X86_BUG_AMD_E400)) { pr_info("using AMD E400 aware idle routine\n"); - x86_idle = amd_e400_idle; + static_call_update(x86_idle, amd_e400_idle); } else if (prefer_mwait_c1_over_halt(c)) { pr_info("using mwait in idle threads\n"); - x86_idle = mwait_idle; + static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { pr_info("using TDX aware idle routine\n"); - x86_idle = tdx_safe_halt; + static_call_update(x86_idle, tdx_safe_halt); } else - x86_idle = default_idle; + static_call_update(x86_idle, default_idle); } void amd_e400_c1e_apic_setup(void) @@ -946,7 +945,7 @@ static int __init idle_setup(char *str) * To continue to load the CPU idle driver, don't touch * the boot_option_idle_override. */ - x86_idle = default_idle; + static_call_update(x86_idle, default_idle); boot_option_idle_override = IDLE_HALT; } else if (!strcmp(str, "nomwait")) { /* diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index eda37df016f0..56acf53a782a 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -64,7 +64,8 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) return flags & valid_flags; } -u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) +static __always_inline +u64 __pvclock_clocksource_read(struct pvclock_vcpu_time_info *src, bool dowd) { unsigned version; u64 ret; @@ -77,7 +78,7 @@ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) flags = src->flags; } while (pvclock_read_retry(src, version)); - if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { + if (dowd && unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { src->flags &= ~PVCLOCK_GUEST_STOPPED; pvclock_touch_watchdogs(); } @@ -100,16 +101,25 @@ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) * updating at the same time, and one of them could be slightly behind, * making the assumption that last_value always go forward fail to hold. */ - last = atomic64_read(&last_value); + last = arch_atomic64_read(&last_value); do { - if (ret < last) + if (ret <= last) return last; - last = atomic64_cmpxchg(&last_value, last, ret); - } while (unlikely(last != ret)); + } while (!arch_atomic64_try_cmpxchg(&last_value, &last, ret)); return ret; } +u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) +{ + return __pvclock_clocksource_read(src, true); +} + +noinstr u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src) +{ + return __pvclock_clocksource_read(src, false); +} + void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, struct pvclock_vcpu_time_info *vcpu_time, struct timespec64 *ts) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index a78e73da4a74..8c33936b017d 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -215,7 +215,7 @@ static void __init cyc2ns_init_secondary_cpus(void) /* * Scheduler clock - returns current time in nanosec units. */ -u64 native_sched_clock(void) +noinstr u64 native_sched_clock(void) { if (static_branch_likely(&__use_tsc)) { u64 tsc_now = rdtsc(); @@ -248,7 +248,7 @@ u64 native_sched_clock_from_tsc(u64 tsc) /* We need to define a real function for sched_clock, to override the weak default version */ #ifdef CONFIG_PARAVIRT -unsigned long long sched_clock(void) +noinstr u64 sched_clock(void) { return paravirt_sched_clock(); } @@ -258,8 +258,7 @@ bool using_native_sched_clock(void) return static_call_query(pv_sched_clock) == native_sched_clock; } #else -unsigned long long -sched_clock(void) __attribute__((alias("native_sched_clock"))); +u64 sched_clock(void) __attribute__((alias("native_sched_clock"))); bool using_native_sched_clock(void) { return true; } #endif diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 2e0ee14229bf..25f155205770 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -129,7 +129,6 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT SOFTIRQENTRY_TEXT diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index dd8cd8831251..a64017602010 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -8,7 +8,7 @@ #include <asm/alternative.h> #include <asm/export.h> -.pushsection .noinstr.text, "ax" +.section .noinstr.text, "ax" /* * We build a jump to memcpy_orig by default which gets NOPped out on @@ -43,7 +43,7 @@ SYM_TYPED_FUNC_START(__memcpy) SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) -SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +SYM_FUNC_ALIAS(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) /* @@ -184,4 +184,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig) RET SYM_FUNC_END(memcpy_orig) -.popsection diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 724bbf83eb5b..02661861e5dd 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -13,6 +13,8 @@ #undef memmove +.section .noinstr.text, "ax" + /* * Implement memmove(). This can handle overlap between src and dst. * @@ -213,5 +215,5 @@ SYM_FUNC_START(__memmove) SYM_FUNC_END(__memmove) EXPORT_SYMBOL(__memmove) -SYM_FUNC_ALIAS_WEAK(memmove, __memmove) +SYM_FUNC_ALIAS(memmove, __memmove) EXPORT_SYMBOL(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index fc9ffd3ff3b2..6143b1a6fa2c 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -6,6 +6,8 @@ #include <asm/alternative.h> #include <asm/export.h> +.section .noinstr.text, "ax" + /* * ISO C memset - set a memory block to a byte value. This function uses fast * string to get better performance than the original function. The code is @@ -43,7 +45,7 @@ SYM_FUNC_START(__memset) SYM_FUNC_END(__memset) EXPORT_SYMBOL(__memset) -SYM_FUNC_ALIAS_WEAK(memset, __memset) +SYM_FUNC_ALIAS(memset, __memset) EXPORT_SYMBOL(memset) /* diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5b1379662877..5cacd4890bab 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1068,7 +1068,7 @@ static const typeof(pv_ops) xen_cpu_ops __initconst = { .write_cr4 = xen_write_cr4, - .wbinvd = native_wbinvd, + .wbinvd = pv_native_wbinvd, .read_msr = xen_read_msr, .write_msr = xen_write_msr, diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 06c3c2fb4b06..6092fea7d651 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -24,7 +24,7 @@ noinstr void xen_force_evtchn_callback(void) (void)HYPERVISOR_xen_version(0, NULL); } -static void xen_safe_halt(void) +static noinstr void xen_safe_halt(void) { /* Blocking includes an implicit local_irq_enable(). */ if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 9ef0a5cca96e..6b8836deb738 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -60,9 +60,17 @@ static u64 xen_clocksource_get_cycles(struct clocksource *cs) return xen_clocksource_read(); } -static u64 xen_sched_clock(void) +static noinstr u64 xen_sched_clock(void) { - return xen_clocksource_read() - xen_sched_clock_offset; + struct pvclock_vcpu_time_info *src; + u64 ret; + + preempt_disable_notrace(); + src = &__this_cpu_read(xen_vcpu)->time; + ret = pvclock_clocksource_read_nowd(src); + ret -= xen_sched_clock_offset; + preempt_enable_notrace(); + return ret; } static void xen_read_wallclock(struct timespec64 *ts) diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 68e0e2f06d66..a815577d25fd 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -183,6 +183,7 @@ void coprocessor_flush_release_all(struct thread_info *ti) void arch_cpu_idle(void) { platform_idle(); + raw_local_irq_disable(); } /* diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 965a3952c47b..c14fd96f459d 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -125,7 +125,6 @@ SECTIONS ENTRY_TEXT TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT *(.fixup) } |