diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-08-11 08:56:01 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-08-11 08:56:01 -0700 |
commit | 8001a975f955f532d8d0c27b224d58646e202f2a (patch) | |
tree | 1cbd45380d5de43c8f0cee10cdcf092c997fb84f | |
parent | b2dbdf2ca1d2803e9cdc46a94554c4a39ffb235a (diff) | |
parent | 96ea91e7b6ee2c406598d859e7348b4829404eea (diff) | |
download | lwn-8001a975f955f532d8d0c27b224d58646e202f2a.tar.gz lwn-8001a975f955f532d8d0c27b224d58646e202f2a.zip |
Merge tag 'powerpc-4.13-6' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc fixes from Michael Ellerman:
"All fixes for code that went in this cycle.
- a revert of an optimisation to the syscall exit path, which could
lead to an oops on either older machines or machines with > 1TB of
memory
- disable some deep idle states if the firmware configuration for
them fails
- re-enable HARD/SOFT lockup detectors in defconfigs after a Kconfig
change
- six fairly small patches fixing bugs in our new watchdog code
Thanks to: Gautham R Shenoy, Nicholas Piggin"
* tag 'powerpc-4.13-6' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
powerpc/watchdog: add locking around init/exit functions
powerpc/watchdog: Fix marking of stuck CPUs
powerpc/watchdog: Fix final-check recovered case
powerpc/watchdog: Moderate touch_nmi_watchdog overhead
powerpc/watchdog: Improve watchdog lock primitive
powerpc: NMI IPI improve lock primitive
powerpc/configs: Re-enable HARD/SOFT lockup detectors
powerpc/powernv/idle: Disable LOSE_FULL_CONTEXT states when stop-api fails
Revert "powerpc/64: Avoid restore_math call if possible in syscall exit"
-rw-r--r-- | arch/powerpc/configs/powernv_defconfig | 3 | ||||
-rw-r--r-- | arch/powerpc/configs/ppc64_defconfig | 3 | ||||
-rw-r--r-- | arch/powerpc/configs/pseries_defconfig | 3 | ||||
-rw-r--r-- | arch/powerpc/kernel/entry_64.S | 60 | ||||
-rw-r--r-- | arch/powerpc/kernel/process.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kernel/smp.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kernel/watchdog.c | 49 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/idle.c | 41 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-powernv.c | 10 |
9 files changed, 111 insertions, 68 deletions
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 0695ce047d56..34fc9bbfca9e 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -293,7 +293,8 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 5175028c56ce..c5246d29f385 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -324,7 +324,8 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_DEBUG_MUTEXES=y CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 1a61aa20dfba..fd5d98a0b95c 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -291,7 +291,8 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 49d8422767b4..e925c1c99c71 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -223,17 +223,27 @@ system_call_exit: andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- .Lsyscall_exit_work - /* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */ - li r7,MSR_FP + andi. r0,r8,MSR_FP + beq 2f #ifdef CONFIG_ALTIVEC - oris r7,r7,MSR_VEC@h + andis. r0,r8,MSR_VEC@h + bne 3f #endif - and r0,r8,r7 - cmpd r0,r7 - bne .Lsyscall_restore_math -.Lsyscall_restore_math_cont: +2: addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PPC_BOOK3S + li r10,MSR_RI + mtmsrd r10,1 /* Restore RI */ +#endif + bl restore_math +#ifdef CONFIG_PPC_BOOK3S + li r11,0 + mtmsrd r11,1 +#endif + ld r8,_MSR(r1) + ld r3,RESULT(r1) + li r11,-MAX_ERRNO - cmpld r3,r11 +3: cmpld r3,r11 ld r5,_CCR(r1) bge- .Lsyscall_error .Lsyscall_error_cont: @@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r5,_CCR(r1) b .Lsyscall_error_cont -.Lsyscall_restore_math: - /* - * Some initial tests from restore_math to avoid the heavyweight - * C code entry and MSR manipulations. - */ - LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK) - and. r0,r0,r8 - bne 1f - - ld r7,PACACURRENT(r13) - lbz r0,THREAD+THREAD_LOAD_FP(r7) -#ifdef CONFIG_ALTIVEC - lbz r6,THREAD+THREAD_LOAD_VEC(r7) - add r0,r0,r6 -#endif - cmpdi r0,0 - beq .Lsyscall_restore_math_cont - -1: addi r3,r1,STACK_FRAME_OVERHEAD -#ifdef CONFIG_PPC_BOOK3S - li r10,MSR_RI - mtmsrd r10,1 /* Restore RI */ -#endif - bl restore_math -#ifdef CONFIG_PPC_BOOK3S - li r11,0 - mtmsrd r11,1 -#endif - /* Restore volatiles, reload MSR from updated one */ - ld r8,_MSR(r1) - ld r3,RESULT(r1) - li r11,-MAX_ERRNO - b .Lsyscall_restore_math_cont - /* Traced system call support */ .Lsyscall_dotrace: bl save_nvgprs diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9f3e2c932dcc..ec480966f9bf 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -511,10 +511,6 @@ void restore_math(struct pt_regs *regs) { unsigned long msr; - /* - * Syscall exit makes a similar initial check before branching - * to restore_math. Keep them in synch. - */ if (!msr_tm_active(regs->msr) && !current->thread.load_fp && !loadvec(current->thread)) return; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index cf0e1245b8cc..8d3320562c70 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -351,7 +351,7 @@ static void nmi_ipi_lock_start(unsigned long *flags) hard_irq_disable(); while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { raw_local_irq_restore(*flags); - cpu_relax(); + spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0); raw_local_irq_save(*flags); hard_irq_disable(); } @@ -360,7 +360,7 @@ static void nmi_ipi_lock_start(unsigned long *flags) static void nmi_ipi_lock(void) { while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) - cpu_relax(); + spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0); } static void nmi_ipi_unlock(void) @@ -475,7 +475,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) nmi_ipi_lock_start(&flags); while (nmi_ipi_busy_count) { nmi_ipi_unlock_end(&flags); - cpu_relax(); + spin_until_cond(nmi_ipi_busy_count == 0); nmi_ipi_lock_start(&flags); } diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index b67f8b03a32d..34721a257a77 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags) * This may be called from low level interrupt handlers at some * point in future. */ - local_irq_save(*flags); - while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) - cpu_relax(); + raw_local_irq_save(*flags); + hard_irq_disable(); /* Make it soft-NMI safe */ + while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) { + raw_local_irq_restore(*flags); + spin_until_cond(!test_bit(0, &__wd_smp_lock)); + raw_local_irq_save(*flags); + hard_irq_disable(); + } } static inline void wd_smp_unlock(unsigned long *flags) { clear_bit_unlock(0, &__wd_smp_lock); - local_irq_restore(*flags); + raw_local_irq_restore(*flags); } static void wd_lockup_ipi(struct pt_regs *regs) @@ -96,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs) nmi_panic(regs, "Hard LOCKUP"); } -static void set_cpu_stuck(int cpu, u64 tb) +static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb) { - cpumask_set_cpu(cpu, &wd_smp_cpus_stuck); - cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask); + cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask); if (cpumask_empty(&wd_smp_cpus_pending)) { wd_smp_last_reset_tb = tb; cpumask_andnot(&wd_smp_cpus_pending, @@ -107,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb) &wd_smp_cpus_stuck); } } +static void set_cpu_stuck(int cpu, u64 tb) +{ + set_cpumask_stuck(cpumask_of(cpu), tb); +} static void watchdog_smp_panic(int cpu, u64 tb) { @@ -135,11 +144,9 @@ static void watchdog_smp_panic(int cpu, u64 tb) } smp_flush_nmi_ipi(1000000); - /* Take the stuck CPU out of the watch group */ - for_each_cpu(c, &wd_smp_cpus_pending) - set_cpu_stuck(c, tb); + /* Take the stuck CPUs out of the watch group */ + set_cpumask_stuck(&wd_smp_cpus_pending, tb); -out: wd_smp_unlock(&flags); printk_safe_flush(); @@ -152,6 +159,11 @@ out: if (hardlockup_panic) nmi_panic(NULL, "Hard LOCKUP"); + + return; + +out: + wd_smp_unlock(&flags); } static void wd_smp_clear_cpu_pending(int cpu, u64 tb) @@ -258,9 +270,11 @@ static void wd_timer_fn(unsigned long data) void arch_touch_nmi_watchdog(void) { + unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; int cpu = smp_processor_id(); - watchdog_timer_interrupt(cpu); + if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks) + watchdog_timer_interrupt(cpu); } EXPORT_SYMBOL(arch_touch_nmi_watchdog); @@ -283,6 +297,8 @@ static void stop_watchdog_timer_on(unsigned int cpu) static int start_wd_on_cpu(unsigned int cpu) { + unsigned long flags; + if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { WARN_ON(1); return 0; @@ -297,12 +313,14 @@ static int start_wd_on_cpu(unsigned int cpu) if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) return 0; + wd_smp_lock(&flags); cpumask_set_cpu(cpu, &wd_cpus_enabled); if (cpumask_weight(&wd_cpus_enabled) == 1) { cpumask_set_cpu(cpu, &wd_smp_cpus_pending); wd_smp_last_reset_tb = get_tb(); } - smp_wmb(); + wd_smp_unlock(&flags); + start_watchdog_timer_on(cpu); return 0; @@ -310,12 +328,17 @@ static int start_wd_on_cpu(unsigned int cpu) static int stop_wd_on_cpu(unsigned int cpu) { + unsigned long flags; + if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) return 0; /* Can happen in CPU unplug case */ stop_watchdog_timer_on(cpu); + wd_smp_lock(&flags); cpumask_clear_cpu(cpu, &wd_cpus_enabled); + wd_smp_unlock(&flags); + wd_smp_clear_cpu_pending(cpu, get_tb()); return 0; diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 2abee070373f..a553aeea7af6 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -56,6 +56,7 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE; */ static u64 pnv_deepest_stop_psscr_val; static u64 pnv_deepest_stop_psscr_mask; +static u64 pnv_deepest_stop_flag; static bool deepest_stop_found; static int pnv_save_sprs_for_deep_states(void) @@ -185,8 +186,40 @@ static void pnv_alloc_idle_core_states(void) update_subcore_sibling_mask(); - if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) - pnv_save_sprs_for_deep_states(); + if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { + int rc = pnv_save_sprs_for_deep_states(); + + if (likely(!rc)) + return; + + /* + * The stop-api is unable to restore hypervisor + * resources on wakeup from platform idle states which + * lose full context. So disable such states. + */ + supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; + pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); + pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); + + if (cpu_has_feature(CPU_FTR_ARCH_300) && + (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { + /* + * Use the default stop state for CPU-Hotplug + * if available. + */ + if (default_stop_found) { + pnv_deepest_stop_psscr_val = + pnv_default_stop_val; + pnv_deepest_stop_psscr_mask = + pnv_default_stop_mask; + pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", + pnv_deepest_stop_psscr_val); + } else { /* Fallback to snooze loop for CPU-Hotplug */ + deepest_stop_found = false; + pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); + } + } + } } u32 pnv_get_supported_cpuidle_states(void) @@ -375,7 +408,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu) pnv_deepest_stop_psscr_val; srr1 = power9_idle_stop(psscr); - } else if (idle_states & OPAL_PM_WINKLE_ENABLED) { + } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) && + (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) { srr1 = power7_idle_insn(PNV_THREAD_WINKLE); } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { @@ -553,6 +587,7 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, max_residency_ns = residency_ns[i]; pnv_deepest_stop_psscr_val = psscr_val[i]; pnv_deepest_stop_psscr_mask = psscr_mask[i]; + pnv_deepest_stop_flag = flags[i]; deepest_stop_found = true; } diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 37b0698b7193..42896a67aeae 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -235,6 +235,7 @@ static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len, return -1; } +extern u32 pnv_get_supported_cpuidle_states(void); static int powernv_add_idle_states(void) { struct device_node *power_mgt; @@ -248,6 +249,8 @@ static int powernv_add_idle_states(void) const char *names[CPUIDLE_STATE_MAX]; u32 has_stop_states = 0; int i, rc; + u32 supported_flags = pnv_get_supported_cpuidle_states(); + /* Currently we have snooze statically defined */ @@ -362,6 +365,13 @@ static int powernv_add_idle_states(void) for (i = 0; i < dt_idle_states; i++) { unsigned int exit_latency, target_residency; bool stops_timebase = false; + + /* + * Skip the platform idle state whose flag isn't in + * the supported_cpuidle_states flag mask. + */ + if ((flags[i] & supported_flags) != flags[i]) + continue; /* * If an idle state has exit latency beyond * POWERNV_THRESHOLD_LATENCY_NS then don't use it |