diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-18 09:42:20 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-18 09:42:20 -0700 |
commit | 3d9944978e0bb6c98b901949cb7a22256e48b23d (patch) | |
tree | ea068a5eaca4d5ae5c02afcf903665a1f4cfb65b | |
parent | 42ea7d7f2a7356962022cdd124d9043c488ca5e2 (diff) | |
parent | dad1743e5993f19b3d7e7bd0fb35dc45b5326626 (diff) | |
download | lwn-3d9944978e0bb6c98b901949cb7a22256e48b23d.tar.gz lwn-3d9944978e0bb6c98b901949cb7a22256e48b23d.zip |
Merge tag 'linus-mce-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull a machine check recovery fix from Tony Luck.
I really don't like how the MCE code does some of the things it does,
but this does seem to be an improvement.
* tag 'linus-mce-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
x86/mce: Only restart instruction after machine check recovery if it is safe
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d086a09c087d..11c9166c3337 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -945,9 +945,10 @@ struct mce_info { atomic_t inuse; struct task_struct *t; __u64 paddr; + int restartable; } mce_info[MCE_INFO_MAX]; -static void mce_save_info(__u64 addr) +static void mce_save_info(__u64 addr, int c) { struct mce_info *mi; @@ -955,6 +956,7 @@ static void mce_save_info(__u64 addr) if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { mi->t = current; mi->paddr = addr; + mi->restartable = c; return; } } @@ -1130,7 +1132,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_panic("Fatal machine check on current CPU", &m, msg); if (worst == MCE_AR_SEVERITY) { /* schedule action before return to userland */ - mce_save_info(m.addr); + mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); set_thread_flag(TIF_MCE_NOTIFY); } else if (kill_it) { force_sig(SIGBUS, current); @@ -1179,7 +1181,13 @@ void mce_notify_process(void) pr_err("Uncorrected hardware memory error in user-access at %llx", mi->paddr); - if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) { + /* + * We must call memory_failure() here even if the current process is + * doomed. We still need to mark the page as poisoned and alert any + * other users of the page. + */ + if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || + mi->restartable == 0) { pr_err("Memory error not recovered"); force_sig(SIGBUS, current); } |