summaryrefslogtreecommitdiff
path: root/drivers/edac/skx_common.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-21 08:10:03 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-21 08:10:03 -0800
commitd9de5ce8a5ec8f97c9468244fd85ff1a10363b60 (patch)
tree6de3a4ef226313aceae0ed920b1a3e28f42ac144 /drivers/edac/skx_common.c
parent0246725d7399d7d6acc8fd5a1a0a1ffce9a1eaa3 (diff)
parent28980db94742f9f2fb0f68ea35f2171b38007bae (diff)
downloadlwn-d9de5ce8a5ec8f97c9468244fd85ff1a10363b60.tar.gz
lwn-d9de5ce8a5ec8f97c9468244fd85ff1a10363b60.zip
Merge tag 'edac_updates_for_v6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull EDAC updates from Borislav Petkov: - Add a driver for the RAS functionality on Xilinx's on chip memory controller - Add support for decoding errors from the first and second level memory on SKL-based hardware - Add support for the memory controllers in Intel Granite Rapids and Emerald Rapids machines - First round of amd64_edac driver simplification and removal of unneeded functionality - The usual cleanups and fixes * tag 'edac_updates_for_v6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: EDAC/amd64: Shut up an -Werror,-Wsometimes-uninitialized clang false positive EDAC/amd64: Remove early_channel_count() EDAC/amd64: Remove PCI Function 0 EDAC/amd64: Remove PCI Function 6 EDAC/amd64: Remove scrub rate control for Family 17h and later EDAC/amd64: Don't set up EDAC PCI control on Family 17h+ EDAC/i10nm: Add driver decoder for Sapphire Rapids server EDAC/i10nm: Add Intel Granite Rapids server support EDAC/i10nm: Make more configurations CPU model specific EDAC/i10nm: Add Intel Emerald Rapids server support EDAC/skx_common: Delete duplicated and unreachable code EDAC/skx_common: Enable EDAC support for the "near" memory EDAC/qcom: Add platform_device_id table for module autoloading EDAC/zynqmp: Add EDAC support for Xilinx ZynqMP OCM dt-bindings: edac: Add bindings for Xilinx ZynqMP OCM
Diffstat (limited to 'drivers/edac/skx_common.c')
-rw-r--r--drivers/edac/skx_common.c76
1 files changed, 33 insertions, 43 deletions
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 806986f03177..ce3e0069e028 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -560,44 +560,28 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
tp_event = HW_EVENT_ERR_CORRECTED;
}
- /*
- * According to Intel Architecture spec vol 3B,
- * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
- * memory errors should fit one of these masks:
- * 000f 0000 1mmm cccc (binary)
- * 000f 0010 1mmm cccc (binary) [RAM used as cache]
- * where:
- * f = Correction Report Filtering Bit. If 1, subsequent errors
- * won't be shown
- * mmm = error type
- * cccc = channel
- * If the mask doesn't match, report an error to the parsing logic
- */
- if (!((errcode & 0xef80) == 0x80 || (errcode & 0xef80) == 0x280)) {
- optype = "Can't parse: it is not a mem";
- } else {
- switch (optypenum) {
- case 0:
- optype = "generic undef request error";
- break;
- case 1:
- optype = "memory read error";
- break;
- case 2:
- optype = "memory write error";
- break;
- case 3:
- optype = "addr/cmd error";
- break;
- case 4:
- optype = "memory scrubbing error";
- scrub_err = true;
- break;
- default:
- optype = "reserved";
- break;
- }
+ switch (optypenum) {
+ case 0:
+ optype = "generic undef request error";
+ break;
+ case 1:
+ optype = "memory read error";
+ break;
+ case 2:
+ optype = "memory write error";
+ break;
+ case 3:
+ optype = "addr/cmd error";
+ break;
+ case 4:
+ optype = "memory scrubbing error";
+ scrub_err = true;
+ break;
+ default:
+ optype = "reserved";
+ break;
}
+
if (res->decoded_by_adxl) {
len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
overflow ? " OVERFLOW" : "",
@@ -632,12 +616,18 @@ static bool skx_error_in_1st_level_mem(const struct mce *m)
if (!skx_mem_cfg_2lm)
return false;
- errcode = GET_BITFIELD(m->status, 0, 15);
+ errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK;
- if ((errcode & 0xef80) != 0x280)
- return false;
+ return errcode == MCACOD_EXT_MEM_ERR;
+}
- return true;
+static bool skx_error_in_mem(const struct mce *m)
+{
+ u32 errcode;
+
+ errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK;
+
+ return (errcode == MCACOD_MEM_CTL_ERR || errcode == MCACOD_EXT_MEM_ERR);
}
int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
@@ -651,8 +641,8 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
if (mce->kflags & MCE_HANDLED_CEC)
return NOTIFY_DONE;
- /* ignore unless this is memory related with an address */
- if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
+ /* Ignore unless this is memory related with an address */
+ if (!skx_error_in_mem(mce) || !(mce->status & MCI_STATUS_ADDRV))
return NOTIFY_DONE;
memset(&res, 0, sizeof(res));