diff options
author | Seiji Aguchi <seiji.aguchi@hds.com> | 2013-01-11 18:09:41 +0000 |
---|---|---|
committer | Ben Hutchings <ben@decadent.org.uk> | 2013-03-06 03:24:22 +0000 |
commit | 019c74a99a5278cbeea999779f998a326d1d40c7 (patch) | |
tree | e866ad0a36419161276022baeee15c1bef1e4e1c | |
parent | 56546c8c2a08d511b6499c7997fd95f22da5400c (diff) | |
download | lwn-019c74a99a5278cbeea999779f998a326d1d40c7.tar.gz lwn-019c74a99a5278cbeea999779f998a326d1d40c7.zip |
pstore: Avoid deadlock in panic and emergency-restart path
commit 9f244e9cfd70c7c0f82d3c92ce772ab2a92d9f64 upstream.
[Issue]
When pstore is in panic and emergency-restart paths, it may be blocked
in those paths because it simply takes spin_lock.
This is an example scenario which pstore may hang up in a panic path:
- cpuA grabs psinfo->buf_lock
- cpuB panics and calls smp_send_stop
- smp_send_stop sends IRQ to cpuA
- after 1 second, cpuB gives up on cpuA and sends an NMI instead
- cpuA is now in an NMI handler while still holding buf_lock
- cpuB is deadlocked
This case may happen if a firmware has a bug and
cpuA is stuck talking with it more than one second.
Also, this is a similar scenario in an emergency-restart path:
- cpuA grabs psinfo->buf_lock and stucks in a firmware
- cpuB kicks emergency-restart via either sysrq-b or hangcheck timer.
And then, cpuB is deadlocked by taking psinfo->buf_lock again.
[Solution]
This patch avoids the deadlocking issues in both panic and emergency_restart
paths by introducing a function, is_non_blocking_path(), to check if a cpu
can be blocked in current path.
With this patch, pstore is not blocked even if another cpu has
taken a spin_lock, in those paths by changing from spin_lock_irqsave
to spin_trylock_irqsave.
In addition, according to a comment of emergency_restart() in kernel/sys.c,
spin_lock shouldn't be taken in an emergency_restart path to avoid
deadlock. This patch fits the comment below.
<snip>
/**
* emergency_restart - reboot the system
*
* Without shutting down any hardware or taking any locks
* reboot the system. This is called when we know we are in
* trouble so this is our best effort to reboot. This is
* safe to call in interrupt context.
*/
void emergency_restart(void)
<snip>
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
[bwh: Backported to 3.2:
- Adjust context
- Add #include <linux/kmsg_dump.h>]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
-rw-r--r-- | fs/pstore/platform.c | 35 | ||||
-rw-r--r-- | include/linux/pstore.h | 8 |
2 files changed, 37 insertions, 6 deletions
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 57bbf9078ac8..45d18d115bce 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -72,6 +72,27 @@ static char *reason_str[] = { "Oops", "Panic", "Kexec", "Restart", "Halt", "Poweroff", "Emergency" }; +bool pstore_cannot_block_path(enum kmsg_dump_reason reason) +{ + /* + * In case of NMI path, pstore shouldn't be blocked + * regardless of reason. + */ + if (in_nmi()) + return true; + + switch (reason) { + /* In panic case, other cpus are stopped by smp_send_stop(). */ + case KMSG_DUMP_PANIC: + /* Emergency restart shouldn't be blocked by spin lock. */ + case KMSG_DUMP_EMERG: + return true; + default: + return false; + } +} +EXPORT_SYMBOL_GPL(pstore_cannot_block_path); + /* * callback from kmsg_dump. (s2,l2) has the most recently * written bytes, older bytes are in (s1,l1). Save as much @@ -97,10 +118,12 @@ static void pstore_dump(struct kmsg_dumper *dumper, else why = "Unknown"; - if (in_nmi()) { - is_locked = spin_trylock(&psinfo->buf_lock); - if (!is_locked) - pr_err("pstore dump routine blocked in NMI, may corrupt error record\n"); + if (pstore_cannot_block_path(reason)) { + is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags); + if (!is_locked) { + pr_err("pstore dump routine blocked in %s path, may corrupt error record\n" + , in_nmi() ? "NMI" : why); + } } else spin_lock_irqsave(&psinfo->buf_lock, flags); oopscount++; @@ -131,9 +154,9 @@ static void pstore_dump(struct kmsg_dumper *dumper, total += l1_cpy + l2_cpy; part++; } - if (in_nmi()) { + if (pstore_cannot_block_path(reason)) { if (is_locked) - spin_unlock(&psinfo->buf_lock); + spin_unlock_irqrestore(&psinfo->buf_lock, flags); } else spin_unlock_irqrestore(&psinfo->buf_lock, flags); } diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 2ca8cde5459d..9b169690013d 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -22,6 +22,8 @@ #ifndef _LINUX_PSTORE_H #define _LINUX_PSTORE_H +#include <linux/kmsg_dump.h> + /* types */ enum pstore_type_id { PSTORE_TYPE_DMESG = 0, @@ -50,6 +52,7 @@ struct pstore_info { #ifdef CONFIG_PSTORE extern int pstore_register(struct pstore_info *); +extern bool pstore_cannot_block_path(enum kmsg_dump_reason reason); extern int pstore_write(enum pstore_type_id type, char *buf, size_t size); #else static inline int @@ -57,6 +60,11 @@ pstore_register(struct pstore_info *psi) { return -ENODEV; } +static inline bool +pstore_cannot_block_path(enum kmsg_dump_reason reason) +{ + return false; +} static inline int pstore_write(enum pstore_type_id type, char *buf, size_t size) { |