summaryrefslogtreecommitdiff
path: root/drivers/edac/igen6_edac.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac/igen6_edac.c')
-rw-r--r--drivers/edac/igen6_edac.c374
1 files changed, 349 insertions, 25 deletions
diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
index 6be9986fc6bd..a07bbfd075d0 100644
--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
@@ -22,11 +22,12 @@
#include <linux/io.h>
#include <asm/mach_traps.h>
#include <asm/nmi.h>
+#include <asm/mce.h>
#include "edac_mc.h"
#include "edac_module.h"
-#define IGEN6_REVISION "v2.4"
+#define IGEN6_REVISION "v2.5"
#define EDAC_MOD_STR "igen6_edac"
#define IGEN6_NMI_NAME "igen6_ibecc"
@@ -40,7 +41,7 @@
#define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo))
-#define NUM_IMC 1 /* Max memory controllers */
+#define NUM_IMC 2 /* Max memory controllers */
#define NUM_CHANNELS 2 /* Max channels */
#define NUM_DIMMS 2 /* Max DIMMs per channel */
@@ -54,6 +55,10 @@
#define CAPID_C_OFFSET 0xec
#define CAPID_C_IBECC BIT(15)
+/* Capability register E */
+#define CAPID_E_OFFSET 0xf0
+#define CAPID_E_IBECC BIT(12)
+
/* Error Status */
#define ERRSTS_OFFSET 0xc8
#define ERRSTS_CE BIT_ULL(6)
@@ -70,7 +75,7 @@
#define IBECC_ACTIVATE_EN BIT(0)
/* IBECC error log */
-#define ECC_ERROR_LOG_OFFSET (IBECC_BASE + 0x170)
+#define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset)
#define ECC_ERROR_LOG_CE BIT_ULL(62)
#define ECC_ERROR_LOG_UE BIT_ULL(63)
#define ECC_ERROR_LOG_ADDR_SHIFT 5
@@ -84,39 +89,54 @@
#define MCHBAR_SIZE 0x10000
/* Parameters for the channel decode stage */
-#define MAD_INTER_CHANNEL_OFFSET 0x5000
+#define IMC_BASE (res_cfg->imc_base)
+#define MAD_INTER_CHANNEL_OFFSET IMC_BASE
#define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2)
#define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3)
#define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4)
#define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29)
/* Parameters for DRAM decode stage */
-#define MAD_INTRA_CH0_OFFSET 0x5004
+#define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4)
#define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0)
/* DIMM characteristics */
-#define MAD_DIMM_CH0_OFFSET 0x500c
+#define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc)
#define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29)
#define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8)
#define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29)
#define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25)
+/* Hash for memory controller selection */
+#define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8)
+#define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3)
+
/* Hash for channel selection */
-#define CHANNEL_HASH_OFFSET 0X5024
+#define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24)
/* Hash for enhanced channel selection */
-#define CHANNEL_EHASH_OFFSET 0X5028
+#define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28)
#define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
#define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
#define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28)
+/* Parameters for memory slice decode stage */
+#define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
+#define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
+
static struct res_config {
+ bool machine_check;
int num_imc;
+ u32 imc_base;
+ u32 cmf_base;
+ u32 cmf_size;
+ u32 ms_hash_offset;
u32 ibecc_base;
+ u32 ibecc_error_log_offset;
bool (*ibecc_available)(struct pci_dev *pdev);
/* Convert error address logged in IBECC to system physical address */
- u64 (*err_addr_to_sys_addr)(u64 eaddr);
+ u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc);
/* Convert error address logged in IBECC to integrated memory controller address */
- u64 (*err_addr_to_imc_addr)(u64 eaddr);
+ u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc);
} *res_cfg;
struct igen6_imc {
@@ -125,6 +145,7 @@ struct igen6_imc {
struct pci_dev *pdev;
struct device dev;
void __iomem *window;
+ u64 size;
u64 ch_s_size;
int ch_l_map;
u64 dimm_s_size[NUM_CHANNELS];
@@ -134,6 +155,9 @@ struct igen6_imc {
static struct igen6_pvt {
struct igen6_imc imc[NUM_IMC];
+ u64 ms_hash;
+ u64 ms_s_size;
+ int ms_l_map;
} *igen6_pvt;
/* The top of low usable DRAM */
@@ -183,6 +207,21 @@ static struct work_struct ecclog_work;
#define DID_EHL_SKU14 0x4534
#define DID_EHL_SKU15 0x4536
+/* Compute die IDs for ICL-NNPI with IBECC */
+#define DID_ICL_SKU8 0x4581
+#define DID_ICL_SKU10 0x4585
+#define DID_ICL_SKU11 0x4589
+#define DID_ICL_SKU12 0x458d
+
+/* Compute die IDs for Tiger Lake with IBECC */
+#define DID_TGL_SKU 0x9a14
+
+/* Compute die IDs for Alder Lake with IBECC */
+#define DID_ADL_SKU1 0x4601
+#define DID_ADL_SKU2 0x4602
+#define DID_ADL_SKU3 0x4621
+#define DID_ADL_SKU4 0x4641
+
static bool ehl_ibecc_available(struct pci_dev *pdev)
{
u32 v;
@@ -193,12 +232,12 @@ static bool ehl_ibecc_available(struct pci_dev *pdev)
return !!(CAPID_C_IBECC & v);
}
-static u64 ehl_err_addr_to_sys_addr(u64 eaddr)
+static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc)
{
return eaddr;
}
-static u64 ehl_err_addr_to_imc_addr(u64 eaddr)
+static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc)
{
if (eaddr < igen6_tolud)
return eaddr;
@@ -212,12 +251,156 @@ static u64 ehl_err_addr_to_imc_addr(u64 eaddr)
return eaddr;
}
+static bool icl_ibecc_available(struct pci_dev *pdev)
+{
+ u32 v;
+
+ if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
+ return false;
+
+ return !(CAPID_C_IBECC & v) &&
+ (boot_cpu_data.x86_stepping >= 1);
+}
+
+static bool tgl_ibecc_available(struct pci_dev *pdev)
+{
+ u32 v;
+
+ if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
+ return false;
+
+ return !(CAPID_E_IBECC & v);
+}
+
+static u64 mem_addr_to_sys_addr(u64 maddr)
+{
+ if (maddr < igen6_tolud)
+ return maddr;
+
+ if (igen6_tom <= _4GB)
+ return maddr - igen6_tolud + _4GB;
+
+ if (maddr < _4GB)
+ return maddr - igen6_tolud + igen6_tom;
+
+ return maddr;
+}
+
+static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit)
+{
+ u64 hash_addr = addr & mask, hash = hash_init;
+ u64 intlv = (addr >> intlv_bit) & 1;
+ int i;
+
+ for (i = 6; i < 20; i++)
+ hash ^= (hash_addr >> i) & 1;
+
+ return hash ^ intlv;
+}
+
+static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc)
+{
+ u64 maddr, hash, mask, ms_s_size;
+ int intlv_bit;
+ u32 ms_hash;
+
+ ms_s_size = igen6_pvt->ms_s_size;
+ if (eaddr >= ms_s_size)
+ return eaddr + ms_s_size;
+
+ ms_hash = igen6_pvt->ms_hash;
+
+ mask = MEM_SLICE_HASH_MASK(ms_hash);
+ intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6;
+
+ maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) |
+ GET_BITFIELD(eaddr, 0, intlv_bit - 1);
+
+ hash = mem_slice_hash(maddr, mask, mc, intlv_bit);
+
+ return maddr | (hash << intlv_bit);
+}
+
+static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc)
+{
+ u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc);
+
+ return mem_addr_to_sys_addr(maddr);
+}
+
+static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc)
+{
+ return eaddr;
+}
+
+static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc)
+{
+ return mem_addr_to_sys_addr(eaddr);
+}
+
+static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc)
+{
+ u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size;
+ struct igen6_imc *imc = &igen6_pvt->imc[mc];
+ int intlv_bit;
+ u32 mc_hash;
+
+ if (eaddr >= 2 * ms_s_size)
+ return eaddr - ms_s_size;
+
+ mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET);
+
+ intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6;
+
+ imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit |
+ GET_BITFIELD(eaddr, 0, intlv_bit - 1);
+
+ return imc_addr;
+}
+
static struct res_config ehl_cfg = {
- .num_imc = 1,
- .ibecc_base = 0xdc00,
- .ibecc_available = ehl_ibecc_available,
- .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
- .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
+ .num_imc = 1,
+ .imc_base = 0x5000,
+ .ibecc_base = 0xdc00,
+ .ibecc_available = ehl_ibecc_available,
+ .ibecc_error_log_offset = 0x170,
+ .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
+ .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
+};
+
+static struct res_config icl_cfg = {
+ .num_imc = 1,
+ .imc_base = 0x5000,
+ .ibecc_base = 0xd800,
+ .ibecc_error_log_offset = 0x170,
+ .ibecc_available = icl_ibecc_available,
+ .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
+ .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
+};
+
+static struct res_config tgl_cfg = {
+ .machine_check = true,
+ .num_imc = 2,
+ .imc_base = 0x5000,
+ .cmf_base = 0x11000,
+ .cmf_size = 0x800,
+ .ms_hash_offset = 0xac,
+ .ibecc_base = 0xd400,
+ .ibecc_error_log_offset = 0x170,
+ .ibecc_available = tgl_ibecc_available,
+ .err_addr_to_sys_addr = tgl_err_addr_to_sys_addr,
+ .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr,
+};
+
+static struct res_config adl_cfg = {
+ .machine_check = true,
+ .num_imc = 2,
+ .imc_base = 0xd800,
+ .ibecc_base = 0xd400,
+ .ibecc_error_log_offset = 0x68,
+ .ibecc_available = tgl_ibecc_available,
+ .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
+ .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
};
static const struct pci_device_id igen6_pci_tbl[] = {
@@ -232,6 +415,15 @@ static const struct pci_device_id igen6_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg },
{ PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg },
{ PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg },
+ { PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg },
+ { PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg },
{ },
};
MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
@@ -490,8 +682,8 @@ static void ecclog_work_cb(struct work_struct *work)
eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) <<
ECC_ERROR_LOG_ADDR_SHIFT;
res.mc = node->mc;
- res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr);
- res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr);
+ res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc);
+ res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc);
mci = igen6_pvt->imc[res.mc].mci;
@@ -540,6 +732,57 @@ static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs)
return NMI_HANDLED;
}
+static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct mce *mce = (struct mce *)data;
+ char *type;
+
+ if (mce->kflags & MCE_HANDLED_CEC)
+ return NOTIFY_DONE;
+
+ /*
+ * Ignore unless this is a memory related error.
+ * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here,
+ * since this bit isn't set on some CPU (e.g., Tiger Lake UP3).
+ */
+ if ((mce->status & 0xefff) >> 7 != 1)
+ return NOTIFY_DONE;
+
+ if (mce->mcgstatus & MCG_STATUS_MCIP)
+ type = "Exception";
+ else
+ type = "Event";
+
+ edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n",
+ mce->extcpu, type, mce->mcgstatus,
+ mce->bank, mce->status);
+ edac_dbg(0, "TSC 0x%llx\n", mce->tsc);
+ edac_dbg(0, "ADDR 0x%llx\n", mce->addr);
+ edac_dbg(0, "MISC 0x%llx\n", mce->misc);
+ edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n",
+ mce->cpuvendor, mce->cpuid, mce->time,
+ mce->socketid, mce->apicid);
+ /*
+ * We just use the Machine Check for the memory error notification.
+ * Each memory controller is associated with an IBECC instance.
+ * Directly read and clear the error information(error address and
+ * error type) on all the IBECC instances so that we know on which
+ * memory controller the memory error(s) occurred.
+ */
+ if (!ecclog_handler())
+ return NOTIFY_DONE;
+
+ mce->kflags |= MCE_HANDLED_EDAC;
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ecclog_mce_dec = {
+ .notifier_call = ecclog_mce_handler,
+ .priority = MCE_PRIO_EDAC,
+};
+
static bool igen6_check_ecc(struct igen6_imc *imc)
{
u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET);
@@ -573,6 +816,8 @@ static int igen6_get_dimm_config(struct mem_ctl_info *mci)
imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm);
imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm);
imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra);
+ imc->size += imc->dimm_s_size[i];
+ imc->size += imc->dimm_l_size[i];
ndimms = 0;
for (j = 0; j < NUM_DIMMS; j++) {
@@ -608,6 +853,8 @@ static int igen6_get_dimm_config(struct mem_ctl_info *mci)
}
}
+ edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20);
+
return 0;
}
@@ -857,6 +1104,80 @@ static void igen6_unregister_mcis(void)
}
}
+static int igen6_mem_slice_setup(u64 mchbar)
+{
+ struct igen6_imc *imc = &igen6_pvt->imc[0];
+ u64 base = mchbar + res_cfg->cmf_base;
+ u32 offset = res_cfg->ms_hash_offset;
+ u32 size = res_cfg->cmf_size;
+ u64 ms_s_size, ms_hash;
+ void __iomem *cmf;
+ int ms_l_map;
+
+ edac_dbg(2, "\n");
+
+ if (imc[0].size < imc[1].size) {
+ ms_s_size = imc[0].size;
+ ms_l_map = 1;
+ } else {
+ ms_s_size = imc[1].size;
+ ms_l_map = 0;
+ }
+
+ igen6_pvt->ms_s_size = ms_s_size;
+ igen6_pvt->ms_l_map = ms_l_map;
+
+ edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n",
+ ms_s_size >> 20, ms_l_map);
+
+ if (!size)
+ return 0;
+
+ cmf = ioremap(base, size);
+ if (!cmf) {
+ igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base);
+ return -ENODEV;
+ }
+
+ ms_hash = readq(cmf + offset);
+ igen6_pvt->ms_hash = ms_hash;
+
+ edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash);
+
+ iounmap(cmf);
+
+ return 0;
+}
+
+static int register_err_handler(void)
+{
+ int rc;
+
+ if (res_cfg->machine_check) {
+ mce_register_decode_chain(&ecclog_mce_dec);
+ return 0;
+ }
+
+ rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
+ 0, IGEN6_NMI_NAME);
+ if (rc) {
+ igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
+ return rc;
+ }
+
+ return 0;
+}
+
+static void unregister_err_handler(void)
+{
+ if (res_cfg->machine_check) {
+ mce_unregister_decode_chain(&ecclog_mce_dec);
+ return;
+ }
+
+ unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
+}
+
static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
u64 mchbar;
@@ -880,6 +1201,12 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto fail2;
}
+ if (res_cfg->num_imc > 1) {
+ rc = igen6_mem_slice_setup(mchbar);
+ if (rc)
+ goto fail2;
+ }
+
ecclog_pool = ecclog_gen_pool_create();
if (!ecclog_pool) {
rc = -ENOMEM;
@@ -892,12 +1219,9 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Check if any pending errors before registering the NMI handler */
ecclog_handler();
- rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
- 0, IGEN6_NMI_NAME);
- if (rc) {
- igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
+ rc = register_err_handler();
+ if (rc)
goto fail3;
- }
/* Enable error reporting */
rc = errcmd_enable_error_reporting(true);
@@ -925,7 +1249,7 @@ static void igen6_remove(struct pci_dev *pdev)
igen6_debug_teardown();
errcmd_enable_error_reporting(false);
- unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
+ unregister_err_handler();
irq_work_sync(&ecclog_irq_work);
flush_work(&ecclog_work);
gen_pool_destroy(ecclog_pool);