summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/s390/s390mach.c321
-rw-r--r--drivers/s390/s390mach.h35
2 files changed, 317 insertions, 39 deletions
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index ffa996c8a908..5bb255e02acc 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -31,14 +31,14 @@ extern void css_reiterate_subchannels(void);
extern struct workqueue_struct *slow_path_wq;
extern struct work_struct slow_path_work;
-static void
+static NORET_TYPE void
s390_handle_damage(char *msg)
{
- printk(KERN_EMERG "%s\n", msg);
#ifdef CONFIG_SMP
smp_send_stop();
#endif
disabled_wait((unsigned long) __builtin_return_address(0));
+ for(;;);
}
/*
@@ -122,40 +122,39 @@ repeat:
return 0;
}
+struct mcck_struct {
+ int kill_task;
+ int channel_report;
+ int warning;
+ unsigned long long mcck_code;
+};
+
+static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
+
/*
- * machine check handler.
+ * Main machine check handler function. Will be called with interrupts enabled
+ * or disabled and machine checks enabled or disabled.
*/
void
-s390_do_machine_check(void)
+s390_handle_mcck(void)
{
- struct mci *mci;
-
- mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+ unsigned long flags;
+ struct mcck_struct mcck;
- if (mci->sd) /* system damage */
- s390_handle_damage("received system damage machine check\n");
+ /*
+ * Disable machine checks and get the current state of accumulated
+ * machine checks. Afterwards delete the old state and enable machine
+ * checks again.
+ */
+ local_irq_save(flags);
+ local_mcck_disable();
+ mcck = __get_cpu_var(cpu_mcck);
+ memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
+ clear_thread_flag(TIF_MCCK_PENDING);
+ local_mcck_enable();
+ local_irq_restore(flags);
- if (mci->pd) /* instruction processing damage */
- s390_handle_damage("received instruction processing "
- "damage machine check\n");
-
- if (mci->se) /* storage error uncorrected */
- s390_handle_damage("received storage error uncorrected "
- "machine check\n");
-
- if (mci->sc) /* storage error corrected */
- printk(KERN_WARNING
- "received storage error corrected machine check\n");
-
- if (mci->ke) /* storage key-error uncorrected */
- s390_handle_damage("received storage key-error uncorrected "
- "machine check\n");
-
- if (mci->ds && mci->fa) /* storage degradation */
- s390_handle_damage("received storage degradation machine "
- "check\n");
-
- if (mci->cp) /* channel report word pending */
+ if (mcck.channel_report)
up(&m_sem);
#ifdef CONFIG_MACHCHK_WARNING
@@ -168,7 +167,7 @@ s390_do_machine_check(void)
* On VM we only get one interrupt per virtally presented machinecheck.
* Though one suffices, we may get one interrupt per (virtual) processor.
*/
- if (mci->w) { /* WARNING pending ? */
+ if (mcck.warning) { /* WARNING pending ? */
static int mchchk_wng_posted = 0;
/*
* Use single machine clear, as we cannot handle smp right now
@@ -178,6 +177,261 @@ s390_do_machine_check(void)
kill_proc(1, SIGPWR, 1);
}
#endif
+
+ if (mcck.kill_task) {
+ local_irq_enable();
+ printk(KERN_EMERG "mcck: Terminating task because of machine "
+ "malfunction (code 0x%016llx).\n", mcck.mcck_code);
+ printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
+ current->comm, current->pid);
+ do_exit(SIGSEGV);
+ }
+}
+
+/*
+ * returns 0 if all registers could be validated
+ * returns 1 otherwise
+ */
+static int
+s390_revalidate_registers(struct mci *mci)
+{
+ int kill_task;
+ u64 tmpclock;
+ u64 zero;
+ void *fpt_save_area, *fpt_creg_save_area;
+
+ kill_task = 0;
+ zero = 0;
+ /* General purpose registers */
+ if (!mci->gr)
+ /*
+ * General purpose registers couldn't be restored and have
+ * unknown contents. Process needs to be terminated.
+ */
+ kill_task = 1;
+
+ /* Revalidate floating point registers */
+ if (!mci->fp)
+ /*
+ * Floating point registers can't be restored and
+ * therefore the process needs to be terminated.
+ */
+ kill_task = 1;
+
+#ifndef __s390x__
+ asm volatile("ld 0,0(%0)\n"
+ "ld 2,8(%0)\n"
+ "ld 4,16(%0)\n"
+ "ld 6,24(%0)"
+ : : "a" (&S390_lowcore.floating_pt_save_area));
+#endif
+
+ if (MACHINE_HAS_IEEE) {
+#ifdef __s390x__
+ fpt_save_area = &S390_lowcore.floating_pt_save_area;
+ fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
+#else
+ fpt_save_area = (void *) S390_lowcore.extended_save_area_addr;
+ fpt_creg_save_area = fpt_save_area+128;
+#endif
+ /* Floating point control register */
+ if (!mci->fc) {
+ /*
+ * Floating point control register can't be restored.
+ * Task will be terminated.
+ */
+ asm volatile ("lfpc 0(%0)" : : "a" (&zero));
+ kill_task = 1;
+
+ }
+ else
+ asm volatile (
+ "lfpc 0(%0)"
+ : : "a" (fpt_creg_save_area));
+
+ asm volatile("ld 0,0(%0)\n"
+ "ld 1,8(%0)\n"
+ "ld 2,16(%0)\n"
+ "ld 3,24(%0)\n"
+ "ld 4,32(%0)\n"
+ "ld 5,40(%0)\n"
+ "ld 6,48(%0)\n"
+ "ld 7,56(%0)\n"
+ "ld 8,64(%0)\n"
+ "ld 9,72(%0)\n"
+ "ld 10,80(%0)\n"
+ "ld 11,88(%0)\n"
+ "ld 12,96(%0)\n"
+ "ld 13,104(%0)\n"
+ "ld 14,112(%0)\n"
+ "ld 15,120(%0)\n"
+ : : "a" (fpt_save_area));
+ }
+
+ /* Revalidate access registers */
+ asm volatile("lam 0,15,0(%0)"
+ : : "a" (&S390_lowcore.access_regs_save_area));
+ if (!mci->ar)
+ /*
+ * Access registers have unknown contents.
+ * Terminating task.
+ */
+ kill_task = 1;
+
+ /* Revalidate control registers */
+ if (!mci->cr)
+ /*
+ * Control registers have unknown contents.
+ * Can't recover and therefore stopping machine.
+ */
+ s390_handle_damage("invalid control registers.");
+ else
+#ifdef __s390x__
+ asm volatile("lctlg 0,15,0(%0)"
+ : : "a" (&S390_lowcore.cregs_save_area));
+#else
+ asm volatile("lctl 0,15,0(%0)"
+ : : "a" (&S390_lowcore.cregs_save_area));
+#endif
+
+ /*
+ * We don't even try to revalidate the TOD register, since we simply
+ * can't write something sensible into that register.
+ */
+
+#ifdef __s390x__
+ /*
+ * See if we can revalidate the TOD programmable register with its
+ * old contents (should be zero) otherwise set it to zero.
+ */
+ if (!mci->pr)
+ asm volatile("sr 0,0\n"
+ "sckpf"
+ : : : "0", "cc");
+ else
+ asm volatile(
+ "l 0,0(%0)\n"
+ "sckpf"
+ : : "a" (&S390_lowcore.tod_progreg_save_area) : "0", "cc");
+#endif
+
+ /* Revalidate clock comparator register */
+ asm volatile ("stck 0(%1)\n"
+ "sckc 0(%1)"
+ : "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory");
+
+ /* Check if old PSW is valid */
+ if (!mci->wp)
+ /*
+ * Can't tell if we come from user or kernel mode
+ * -> stopping machine.
+ */
+ s390_handle_damage("old psw invalid.");
+
+ if (!mci->ms || !mci->pm || !mci->ia)
+ kill_task = 1;
+
+ return kill_task;
+}
+
+/*
+ * machine check handler.
+ */
+void
+s390_do_machine_check(struct pt_regs *regs)
+{
+ struct mci *mci;
+ struct mcck_struct *mcck;
+ int umode;
+
+ mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+ mcck = &__get_cpu_var(cpu_mcck);
+ umode = user_mode(regs);
+
+ if (mci->sd)
+ /* System damage -> stopping machine */
+ s390_handle_damage("received system damage machine check.");
+
+ if (mci->pd) {
+ if (mci->b) {
+ /* Processing backup -> verify if we can survive this */
+ u64 z_mcic, o_mcic, t_mcic;
+#ifdef __s390x__
+ z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
+ o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+ 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+ 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
+ 1ULL<<16);
+#else
+ z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 |
+ 1ULL<<29);
+ o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+ 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+ 1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16);
+#endif
+ t_mcic = *(u64 *)mci;
+
+ if (((t_mcic & z_mcic) != 0) ||
+ ((t_mcic & o_mcic) != o_mcic)) {
+ s390_handle_damage("processing backup machine "
+ "check with damage.");
+ }
+ if (!umode)
+ s390_handle_damage("processing backup machine "
+ "check in kernel mode.");
+ mcck->kill_task = 1;
+ mcck->mcck_code = *(unsigned long long *) mci;
+ }
+ else {
+ /* Processing damage -> stopping machine */
+ s390_handle_damage("received instruction processing "
+ "damage machine check.");
+ }
+ }
+ if (s390_revalidate_registers(mci)) {
+ if (umode) {
+ /*
+ * Couldn't restore all register contents while in
+ * user mode -> mark task for termination.
+ */
+ mcck->kill_task = 1;
+ mcck->mcck_code = *(unsigned long long *) mci;
+ set_thread_flag(TIF_MCCK_PENDING);
+ }
+ else
+ /*
+ * Couldn't restore all register contents while in
+ * kernel mode -> stopping machine.
+ */
+ s390_handle_damage("unable to revalidate registers.");
+ }
+
+ if (mci->se)
+ /* Storage error uncorrected */
+ s390_handle_damage("received storage error uncorrected "
+ "machine check.");
+
+ if (mci->ke)
+ /* Storage key-error uncorrected */
+ s390_handle_damage("received storage key-error uncorrected "
+ "machine check.");
+
+ if (mci->ds && mci->fa)
+ /* Storage degradation */
+ s390_handle_damage("received storage degradation machine "
+ "check.");
+
+ if (mci->cp) {
+ /* Channel report word pending */
+ mcck->channel_report = 1;
+ set_thread_flag(TIF_MCCK_PENDING);
+ }
+
+ if (mci->w) {
+ /* Warning pending */
+ mcck->warning = 1;
+ set_thread_flag(TIF_MCCK_PENDING);
+ }
}
/*
@@ -189,9 +443,8 @@ static int
machine_check_init(void)
{
init_MUTEX_LOCKED(&m_sem);
- ctl_clear_bit(14, 25); /* disable damage MCH */
- ctl_set_bit(14, 26); /* enable degradation MCH */
- ctl_set_bit(14, 27); /* enable system recovery MCH */
+ ctl_clear_bit(14, 25); /* disable external damage MCH */
+ ctl_set_bit(14, 27); /* enable system recovery MCH */
#ifdef CONFIG_MACHCHK_WARNING
ctl_set_bit(14, 24); /* enable warning MCH */
#endif
diff --git a/drivers/s390/s390mach.h b/drivers/s390/s390mach.h
index 7e26f0f1b0dc..4eaa70179182 100644
--- a/drivers/s390/s390mach.h
+++ b/drivers/s390/s390mach.h
@@ -16,20 +16,45 @@ struct mci {
__u32 sd : 1; /* 00 system damage */
__u32 pd : 1; /* 01 instruction-processing damage */
__u32 sr : 1; /* 02 system recovery */
- __u32 to_be_defined_1 : 4; /* 03-06 */
+ __u32 to_be_defined_1 : 1; /* 03 */
+ __u32 cd : 1; /* 04 timing-facility damage */
+ __u32 ed : 1; /* 05 external damage */
+ __u32 to_be_defined_2 : 1; /* 06 */
__u32 dg : 1; /* 07 degradation */
__u32 w : 1; /* 08 warning pending */
__u32 cp : 1; /* 09 channel-report pending */
- __u32 to_be_defined_2 : 6; /* 10-15 */
+ __u32 sp : 1; /* 10 service-processor damage */
+ __u32 ck : 1; /* 11 channel-subsystem damage */
+ __u32 to_be_defined_3 : 2; /* 12-13 */
+ __u32 b : 1; /* 14 backed up */
+ __u32 to_be_defined_4 : 1; /* 15 */
__u32 se : 1; /* 16 storage error uncorrected */
__u32 sc : 1; /* 17 storage error corrected */
__u32 ke : 1; /* 18 storage-key error uncorrected */
__u32 ds : 1; /* 19 storage degradation */
- __u32 to_be_defined_3 : 4; /* 20-23 */
+ __u32 wp : 1; /* 20 psw mwp validity */
+ __u32 ms : 1; /* 21 psw mask and key validity */
+ __u32 pm : 1; /* 22 psw program mask and cc validity */
+ __u32 ia : 1; /* 23 psw instruction address validity */
__u32 fa : 1; /* 24 failing storage address validity */
- __u32 to_be_defined_4 : 7; /* 25-31 */
+ __u32 to_be_defined_5 : 1; /* 25 */
+ __u32 ec : 1; /* 26 external damage code validity */
+ __u32 fp : 1; /* 27 floating point register validity */
+ __u32 gr : 1; /* 28 general register validity */
+ __u32 cr : 1; /* 29 control register validity */
+ __u32 to_be_defined_6 : 1; /* 30 */
+ __u32 st : 1; /* 31 storage logical validity */
__u32 ie : 1; /* 32 indirect storage error */
- __u32 to_be_defined_5 : 31; /* 33-63 */
+ __u32 ar : 1; /* 33 access register validity */
+ __u32 da : 1; /* 34 delayed access exception */
+ __u32 to_be_defined_7 : 7; /* 35-41 */
+ __u32 pr : 1; /* 42 tod programmable register validity */
+ __u32 fc : 1; /* 43 fp control register validity */
+ __u32 ap : 1; /* 44 ancillary report */
+ __u32 to_be_defined_8 : 1; /* 45 */
+ __u32 ct : 1; /* 46 cpu timer validity */
+ __u32 cc : 1; /* 47 clock comparator validity */
+ __u32 to_be_defined_9 : 16; /* 47-63 */
};
/*