diff options
author | Borislav Petkov <borislav.petkov@amd.com> | 2009-06-25 19:51:04 +0200 |
---|---|---|
committer | Borislav Petkov <borislav.petkov@amd.com> | 2009-09-14 18:58:25 +0200 |
commit | 5110dbdeab546268dda2e4c6a83448639b2fc5ae (patch) | |
tree | d504eb80a6b77a7511cd8259943d0d746758834d /drivers/edac/amd64_edac.c | |
parent | ef44cc4c2245d3c43f3c11c7bff6239852eef498 (diff) | |
download | lwn-5110dbdeab546268dda2e4c6a83448639b2fc5ae.tar.gz lwn-5110dbdeab546268dda2e4c6a83448639b2fc5ae.zip |
amd64_edac: cleanup/complete NB MCE decoding
* don't dump info which mcheck already does
* update to newest BKDG
* mv amd64_process_error_info -> amd64_decode_nb_mce
* shorten error struct names
* remove redundant info ptr in amd64_process_error_info
* remove unused ErrorCodeExt[19:16] (MCx_STATUS) defines
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Diffstat (limited to 'drivers/edac/amd64_edac.c')
-rw-r--r-- | drivers/edac/amd64_edac.c | 125 |
1 files changed, 46 insertions, 79 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index c9b88d829701..5af87d44c80c 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2355,62 +2355,47 @@ static void amd64_decode_bus_error(struct mem_ctl_info *mci, "Error Overflow set"); } -int amd64_process_error_info(struct mem_ctl_info *mci, - struct err_regs *regs, - int handle_errors) +void amd64_decode_nb_mce(struct mem_ctl_info *mci, struct err_regs *regs, + int handle_errors) { - struct amd64_pvt *pvt; - u32 err_code, ext_ec; - int gart_tlb_error = 0; - - pvt = mci->pvt_info; + struct amd64_pvt *pvt = mci->pvt_info; + int ecc; + u32 ec = ERROR_CODE(regs->nbsl); + u32 xec = EXT_ERROR_CODE(regs->nbsl); if (!handle_errors) - return 1; + return; - debugf1("NorthBridge ERROR: mci(0x%p)\n", mci); - debugf1(" MC node(%d) Error-Address(0x%.8x-%.8x)\n", - pvt->mc_node_id, regs->nbeah, regs->nbeal); - debugf1(" nbsh(0x%.8x) nbsl(0x%.8x)\n", - regs->nbsh, regs->nbsl); - debugf1(" Valid Error=%s Overflow=%s\n", - (regs->nbsh & K8_NBSH_VALID_BIT) ? "True" : "False", - (regs->nbsh & K8_NBSH_OVERFLOW) ? "True" : "False"); - debugf1(" Err Uncorrected=%s MCA Error Reporting=%s\n", - (regs->nbsh & K8_NBSH_UNCORRECTED_ERR) ? - "True" : "False", - (regs->nbsh & K8_NBSH_ERR_ENABLE) ? - "True" : "False"); - debugf1(" MiscErr Valid=%s ErrAddr Valid=%s PCC=%s\n", - (regs->nbsh & K8_NBSH_MISC_ERR_VALID) ? - "True" : "False", - (regs->nbsh & K8_NBSH_VALID_ERROR_ADDR) ? - "True" : "False", - (regs->nbsh & K8_NBSH_PCC) ? - "True" : "False"); - debugf1(" CECC=%s UECC=%s Found by Scruber=%s\n", - (regs->nbsh & K8_NBSH_CECC) ? - "True" : "False", - (regs->nbsh & K8_NBSH_UECC) ? - "True" : "False", - (regs->nbsh & K8_NBSH_ERR_SCRUBER) ? - "True" : "False"); - debugf1(" CORE0=%s CORE1=%s CORE2=%s CORE3=%s\n", - (regs->nbsh & K8_NBSH_CORE0) ? "True" : "False", - (regs->nbsh & K8_NBSH_CORE1) ? "True" : "False", - (regs->nbsh & K8_NBSH_CORE2) ? "True" : "False", - (regs->nbsh & K8_NBSH_CORE3) ? "True" : "False"); + pr_emerg(" Northbridge ERROR, mc node %d", pvt->mc_node_id); + /* + * F10h, revD can disable ErrCpu[3:0] so check that first and also the + * value encoding has changed so interpret those differently + */ + if ((boot_cpu_data.x86 == 0x10) && + (boot_cpu_data.x86_model > 8)) { + if (regs->nbsh & K8_NBSH_ERR_CPU_VAL) + pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf)); + } else { + pr_cont(", core: %d\n", ilog2((regs->nbsh & 0xf))); + } - err_code = ERROR_CODE(regs->nbsl); + pr_emerg(" Error: %sorrected", + ((regs->nbsh & K8_NBSH_UC_ERR) ? "Unc" : "C")); + pr_cont(", Report Error: %s", + ((regs->nbsh & K8_NBSH_ERR_EN) ? "yes" : "no")); + pr_cont(", MiscV: %svalid, CPU context corrupt: %s", + ((regs->nbsh & K8_NBSH_MISCV) ? "" : "In"), + ((regs->nbsh & K8_NBSH_PCC) ? "yes" : "no")); - /* Determine which error type: - * 1) GART errors - non-fatal, developmental events - * 2) MEMORY errors - * 3) BUS errors - * 4) Unknown error - */ - if (TLB_ERROR(err_code)) { + /* do the two bits[14:13] together */ + ecc = regs->nbsh & (0x3 << 13); + if (ecc) + pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U")); + + pr_cont("\n"); + + if (TLB_ERROR(ec)) { /* * GART errors are intended to help graphics driver developers * to detect bad GART PTEs. It is recommended by AMD to disable @@ -2423,52 +2408,34 @@ int amd64_process_error_info(struct mem_ctl_info *mci, * [1] section 13.10.1 on BIOS and Kernel Developers Guide for * AMD NPT family 0Fh processors */ - if (report_gart_errors == 0) - return 1; - - /* - * Only if GART error reporting is requested should we generate - * any logs. - */ - gart_tlb_error = 1; + if (!report_gart_errors) + return; - debugf1("GART TLB error\n"); + pr_emerg("GART TLB error\n"); amd64_decode_gart_tlb_error(mci, regs); - } else if (MEM_ERROR(err_code)) { - debugf1("Memory/Cache error\n"); + } else if (MEM_ERROR(ec)) { + pr_emerg("Memory/Cache error\n"); amd64_decode_mem_cache_error(mci, regs); - } else if (BUS_ERROR(err_code)) { - debugf1("Bus (Link/DRAM) error\n"); + } else if (BUS_ERROR(ec)) { + pr_emerg("Bus (Link/DRAM) error\n"); amd64_decode_bus_error(mci, regs); } else { /* shouldn't reach here! */ amd64_mc_printk(mci, KERN_WARNING, "%s(): unknown MCE error 0x%x\n", __func__, - err_code); + ec); } - ext_ec = EXT_ERROR_CODE(regs->nbsl); - amd64_mc_printk(mci, KERN_ERR, - "ExtErr=(0x%x) %s\n", ext_ec, ext_msgs[ext_ec]); + pr_emerg("%s.\n", EXT_ERR_MSG(xec)); /* * Check the UE bit of the NB status high register, if set generate some * logs. If NOT a GART error, then process the event as a NO-INFO event. * If it was a GART error, skip that process. */ - if (regs->nbsh & K8_NBSH_UNCORRECTED_ERR) { - amd64_mc_printk(mci, KERN_CRIT, "uncorrected error\n"); - if (!gart_tlb_error) - edac_mc_handle_ue_no_info(mci, "UE bit is set\n"); - } - - if (regs->nbsh & K8_NBSH_PCC) - amd64_mc_printk(mci, KERN_CRIT, - "PCC (processor context corrupt) set\n"); - - return 1; + if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors) + edac_mc_handle_ue_no_info(mci, "UE bit is set"); } -EXPORT_SYMBOL_GPL(amd64_process_error_info); /* * The main polling 'check' function, called FROM the edac core to perform the @@ -2479,7 +2446,7 @@ static void amd64_check(struct mem_ctl_info *mci) struct err_regs regs; if (amd64_get_error_info(mci, ®s)) - amd64_process_error_info(mci, ®s, 1); + amd64_decode_nb_mce(mci, ®s, 1); } /* |