diff options
Diffstat (limited to 'drivers/edac/ie31200_edac.c')
-rw-r--r-- | drivers/edac/ie31200_edac.c | 641 |
1 files changed, 387 insertions, 254 deletions
diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 4fc16922dc1a..204834149579 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -51,6 +51,7 @@ #include <linux/edac.h> #include <linux/io-64-nonatomic-lo-hi.h> +#include <asm/mce.h> #include "edac_module.h" #define EDAC_MOD_STR "ie31200_edac" @@ -84,44 +85,23 @@ #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9 0x3ec6 #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10 0x3eca -/* Test if HB is for Skylake or later. */ -#define DEVICE_ID_SKYLAKE_OR_LATER(did) \ - (((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_8) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_10) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_11) || \ - ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_12) || \ - (((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \ - PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK)) - -#define IE31200_DIMMS 4 -#define IE31200_RANKS 8 -#define IE31200_RANKS_PER_CHANNEL 4 +/* Raptor Lake-S */ +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1 0xa703 +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2 0x4640 +#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3 0x4630 + +#define IE31200_RANKS_PER_CHANNEL 8 #define IE31200_DIMMS_PER_CHANNEL 2 #define IE31200_CHANNELS 2 +#define IE31200_IMC_NUM 2 /* Intel IE31200 register addresses - device 0 function 0 - DRAM Controller */ #define IE31200_MCHBAR_LOW 0x48 #define IE31200_MCHBAR_HIGH 0x4c -#define IE31200_MCHBAR_MASK GENMASK_ULL(38, 15) -#define IE31200_MMR_WINDOW_SIZE BIT(15) /* * Error Status Register (16b) * - * 15 reserved - * 14 Isochronous TBWRR Run Behind FIFO Full - * (ITCV) - * 13 Isochronous TBWRR Run Behind FIFO Put - * (ITSTV) - * 12 reserved - * 11 MCH Thermal Sensor Event - * for SMI/SCI/SERR (GTSE) - * 10 reserved - * 9 LOCK to non-DRAM Memory Flag (LCKF) - * 8 reserved - * 7 DRAM Throttle Flag (DTF) - * 6:2 reserved * 1 Multi-bit DRAM ECC Error Flag (DMERR) * 0 Single-bit DRAM ECC Error Flag (DSERR) */ @@ -130,68 +110,60 @@ #define IE31200_ERRSTS_CE BIT(0) #define IE31200_ERRSTS_BITS (IE31200_ERRSTS_UE | IE31200_ERRSTS_CE) -/* - * Channel 0 ECC Error Log (64b) - * - * 63:48 Error Column Address (ERRCOL) - * 47:32 Error Row Address (ERRROW) - * 31:29 Error Bank Address (ERRBANK) - * 28:27 Error Rank Address (ERRRANK) - * 26:24 reserved - * 23:16 Error Syndrome (ERRSYND) - * 15: 2 reserved - * 1 Multiple Bit Error Status (MERRSTS) - * 0 Correctable Error Status (CERRSTS) - */ - -#define IE31200_C0ECCERRLOG 0x40c8 -#define IE31200_C1ECCERRLOG 0x44c8 -#define IE31200_C0ECCERRLOG_SKL 0x4048 -#define IE31200_C1ECCERRLOG_SKL 0x4448 -#define IE31200_ECCERRLOG_CE BIT(0) -#define IE31200_ECCERRLOG_UE BIT(1) -#define IE31200_ECCERRLOG_RANK_BITS GENMASK_ULL(28, 27) -#define IE31200_ECCERRLOG_RANK_SHIFT 27 -#define IE31200_ECCERRLOG_SYNDROME_BITS GENMASK_ULL(23, 16) -#define IE31200_ECCERRLOG_SYNDROME_SHIFT 16 - -#define IE31200_ECCERRLOG_SYNDROME(log) \ - ((log & IE31200_ECCERRLOG_SYNDROME_BITS) >> \ - IE31200_ECCERRLOG_SYNDROME_SHIFT) - #define IE31200_CAPID0 0xe4 #define IE31200_CAPID0_PDCD BIT(4) #define IE31200_CAPID0_DDPCD BIT(6) #define IE31200_CAPID0_ECC BIT(1) -#define IE31200_MAD_DIMM_0_OFFSET 0x5004 -#define IE31200_MAD_DIMM_0_OFFSET_SKL 0x500C -#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0) -#define IE31200_MAD_DIMM_A_RANK BIT(17) -#define IE31200_MAD_DIMM_A_RANK_SHIFT 17 -#define IE31200_MAD_DIMM_A_RANK_SKL BIT(10) -#define IE31200_MAD_DIMM_A_RANK_SKL_SHIFT 10 -#define IE31200_MAD_DIMM_A_WIDTH BIT(19) -#define IE31200_MAD_DIMM_A_WIDTH_SHIFT 19 -#define IE31200_MAD_DIMM_A_WIDTH_SKL GENMASK_ULL(9, 8) -#define IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT 8 - -/* Skylake reports 1GB increments, everything else is 256MB */ -#define IE31200_PAGES(n, skl) \ - (n << (28 + (2 * skl) - PAGE_SHIFT)) +/* Non-constant mask variant of FIELD_GET() */ +#define field_get(_mask, _reg) (((_reg) & (_mask)) >> (ffs(_mask) - 1)) static int nr_channels; static struct pci_dev *mci_pdev; static int ie31200_registered = 1; +struct res_config { + enum mem_type mtype; + bool cmci; + int imc_num; + /* Host MMIO configuration register */ + u64 reg_mchbar_mask; + u64 reg_mchbar_window_size; + /* ECC error log register */ + u64 reg_eccerrlog_offset[IE31200_CHANNELS]; + u64 reg_eccerrlog_ce_mask; + u64 reg_eccerrlog_ce_ovfl_mask; + u64 reg_eccerrlog_ue_mask; + u64 reg_eccerrlog_ue_ovfl_mask; + u64 reg_eccerrlog_rank_mask; + u64 reg_eccerrlog_syndrome_mask; + /* MSR to clear ECC error log register */ + u32 msr_clear_eccerrlog_offset; + /* DIMM characteristics register */ + u64 reg_mad_dimm_size_granularity; + u64 reg_mad_dimm_offset[IE31200_CHANNELS]; + u32 reg_mad_dimm_size_mask[IE31200_DIMMS_PER_CHANNEL]; + u32 reg_mad_dimm_rank_mask[IE31200_DIMMS_PER_CHANNEL]; + u32 reg_mad_dimm_width_mask[IE31200_DIMMS_PER_CHANNEL]; +}; + struct ie31200_priv { void __iomem *window; void __iomem *c0errlog; void __iomem *c1errlog; + struct res_config *cfg; + struct mem_ctl_info *mci; + struct pci_dev *pdev; + struct device dev; }; +static struct ie31200_pvt { + struct ie31200_priv *priv[IE31200_IMC_NUM]; +} ie31200_pvt; + enum ie31200_chips { IE31200 = 0, + IE31200_1 = 1, }; struct ie31200_dev_info { @@ -202,18 +174,22 @@ struct ie31200_error_info { u16 errsts; u16 errsts2; u64 eccerrlog[IE31200_CHANNELS]; + u64 erraddr; }; static const struct ie31200_dev_info ie31200_devs[] = { [IE31200] = { .ctl_name = "IE31200" }, + [IE31200_1] = { + .ctl_name = "IE31200_1" + }, }; struct dimm_data { - u8 size; /* in multiples of 256MB, except Skylake is 1GB */ - u8 dual_rank : 1, - x16_width : 2; /* 0 means x8 width */ + u64 size; /* in bytes */ + u8 ranks; + enum dev_type dtype; }; static int how_many_channels(struct pci_dev *pdev) @@ -251,29 +227,54 @@ static bool ecc_capable(struct pci_dev *pdev) return true; } -static int eccerrlog_row(u64 log) -{ - return ((log & IE31200_ECCERRLOG_RANK_BITS) >> - IE31200_ECCERRLOG_RANK_SHIFT); -} +#define mci_to_pci_dev(mci) (((struct ie31200_priv *)(mci)->pvt_info)->pdev) static void ie31200_clear_error_info(struct mem_ctl_info *mci) { + struct ie31200_priv *priv = mci->pvt_info; + struct res_config *cfg = priv->cfg; + + /* + * The PCI ERRSTS register is deprecated. Write the MSR to clear + * the ECC error log registers in all memory controllers. + */ + if (cfg->msr_clear_eccerrlog_offset) { + if (wrmsr_safe(cfg->msr_clear_eccerrlog_offset, + cfg->reg_eccerrlog_ce_mask | + cfg->reg_eccerrlog_ce_ovfl_mask | + cfg->reg_eccerrlog_ue_mask | + cfg->reg_eccerrlog_ue_ovfl_mask, 0) < 0) + ie31200_printk(KERN_ERR, "Failed to wrmsr.\n"); + + return; + } + /* * Clear any error bits. * (Yes, we really clear bits by writing 1 to them.) */ - pci_write_bits16(to_pci_dev(mci->pdev), IE31200_ERRSTS, + pci_write_bits16(mci_to_pci_dev(mci), IE31200_ERRSTS, IE31200_ERRSTS_BITS, IE31200_ERRSTS_BITS); } static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, struct ie31200_error_info *info) { - struct pci_dev *pdev; + struct pci_dev *pdev = mci_to_pci_dev(mci); struct ie31200_priv *priv = mci->pvt_info; - pdev = to_pci_dev(mci->pdev); + /* + * The PCI ERRSTS register is deprecated, directly read the + * MMIO-mapped ECC error log registers. + */ + if (priv->cfg->msr_clear_eccerrlog_offset) { + info->eccerrlog[0] = lo_hi_readq(priv->c0errlog); + if (nr_channels == 2) + info->eccerrlog[1] = lo_hi_readq(priv->c1errlog); + + ie31200_clear_error_info(mci); + return; + } /* * This is a mess because there is no atomic way to read all the @@ -309,46 +310,56 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, static void ie31200_process_error_info(struct mem_ctl_info *mci, struct ie31200_error_info *info) { + struct ie31200_priv *priv = mci->pvt_info; + struct res_config *cfg = priv->cfg; int channel; u64 log; - if (!(info->errsts & IE31200_ERRSTS_BITS)) - return; + if (!cfg->msr_clear_eccerrlog_offset) { + if (!(info->errsts & IE31200_ERRSTS_BITS)) + return; - if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) { - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, - -1, -1, -1, "UE overwrote CE", ""); - info->errsts = info->errsts2; + if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, "UE overwrote CE", ""); + info->errsts = info->errsts2; + } } for (channel = 0; channel < nr_channels; channel++) { log = info->eccerrlog[channel]; - if (log & IE31200_ECCERRLOG_UE) { + if (log & cfg->reg_eccerrlog_ue_mask) { edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - 0, 0, 0, - eccerrlog_row(log), + info->erraddr >> PAGE_SHIFT, 0, 0, + field_get(cfg->reg_eccerrlog_rank_mask, log), channel, -1, "ie31200 UE", ""); - } else if (log & IE31200_ECCERRLOG_CE) { + } else if (log & cfg->reg_eccerrlog_ce_mask) { edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - 0, 0, - IE31200_ECCERRLOG_SYNDROME(log), - eccerrlog_row(log), + info->erraddr >> PAGE_SHIFT, 0, + field_get(cfg->reg_eccerrlog_syndrome_mask, log), + field_get(cfg->reg_eccerrlog_rank_mask, log), channel, -1, "ie31200 CE", ""); } } } -static void ie31200_check(struct mem_ctl_info *mci) +static void __ie31200_check(struct mem_ctl_info *mci, struct mce *mce) { struct ie31200_error_info info; + info.erraddr = mce ? mce->addr : 0; ie31200_get_and_clear_error_info(mci, &info); ie31200_process_error_info(mci, &info); } -static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) +static void ie31200_check(struct mem_ctl_info *mci) +{ + __ie31200_check(mci, NULL); +} + +static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg, int mc) { union { u64 mchbar; @@ -361,7 +372,8 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) pci_read_config_dword(pdev, IE31200_MCHBAR_LOW, &u.mchbar_low); pci_read_config_dword(pdev, IE31200_MCHBAR_HIGH, &u.mchbar_high); - u.mchbar &= IE31200_MCHBAR_MASK; + u.mchbar &= cfg->reg_mchbar_mask; + u.mchbar += cfg->reg_mchbar_window_size * mc; if (u.mchbar != (resource_size_t)u.mchbar) { ie31200_printk(KERN_ERR, "mmio space beyond accessible range (0x%llx)\n", @@ -369,7 +381,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) return NULL; } - window = ioremap(u.mchbar, IE31200_MMR_WINDOW_SIZE); + window = ioremap(u.mchbar, cfg->reg_mchbar_window_size); if (!window) ie31200_printk(KERN_ERR, "Cannot map mmio space at 0x%llx\n", (unsigned long long)u.mchbar); @@ -377,155 +389,108 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) return window; } -static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode, - int chan) +static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm, + struct res_config *cfg) { - dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE; - dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0; - dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >> - (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4))); + dd->size = field_get(cfg->reg_mad_dimm_size_mask[dimm], addr_decode) * cfg->reg_mad_dimm_size_granularity; + dd->ranks = field_get(cfg->reg_mad_dimm_rank_mask[dimm], addr_decode) + 1; + dd->dtype = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode) + DEV_X8; } -static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode, - int chan) +static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *window, + struct res_config *cfg, int mc) { - dd->size = (addr_decode >> (chan << 3)) & IE31200_MAD_DIMM_SIZE; - dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << chan)) ? 1 : 0; - dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << chan)) ? 1 : 0; -} + struct dimm_data dimm_info; + struct dimm_info *dimm; + unsigned long nr_pages; + u32 addr_decode; + int i, j, k; -static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan, - bool skl) -{ - if (skl) - __skl_populate_dimm_info(dd, addr_decode, chan); - else - __populate_dimm_info(dd, addr_decode, chan); -} + for (i = 0; i < IE31200_CHANNELS; i++) { + addr_decode = readl(window + cfg->reg_mad_dimm_offset[i]); + edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); + for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { + populate_dimm_info(&dimm_info, addr_decode, j, cfg); + edac_dbg(0, "mc: %d, channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n", + mc, i, j, dimm_info.size >> 20, + dimm_info.ranks, + dimm_info.dtype); -static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) + nr_pages = MiB_TO_PAGES(dimm_info.size >> 20); + if (nr_pages == 0) + continue; + + nr_pages = nr_pages / dimm_info.ranks; + for (k = 0; k < dimm_info.ranks; k++) { + dimm = edac_get_dimm(mci, (j * dimm_info.ranks) + k, i, 0); + dimm->nr_pages = nr_pages; + edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); + dimm->grain = 8; /* just a guess */ + dimm->mtype = cfg->mtype; + dimm->dtype = dimm_info.dtype; + dimm->edac_mode = EDAC_UNKNOWN; + } + } + } +} + +static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, int mc) { - int i, j, ret; - struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; - struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL]; - void __iomem *window; struct ie31200_priv *priv; - u32 addr_decode, mad_offset; - - /* - * Kaby Lake, Coffee Lake seem to work like Skylake. Please re-visit - * this logic when adding new CPU support. - */ - bool skl = DEVICE_ID_SKYLAKE_OR_LATER(pdev->device); - - edac_dbg(0, "MC:\n"); - - if (!ecc_capable(pdev)) { - ie31200_printk(KERN_INFO, "No ECC support\n"); - return -ENODEV; - } + struct mem_ctl_info *mci; + void __iomem *window; + int ret; nr_channels = how_many_channels(pdev); layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = IE31200_DIMMS; + layers[0].size = IE31200_RANKS_PER_CHANNEL; layers[0].is_virt_csrow = true; layers[1].type = EDAC_MC_LAYER_CHANNEL; layers[1].size = nr_channels; layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, sizeof(struct ie31200_priv)); if (!mci) return -ENOMEM; - window = ie31200_map_mchbar(pdev); + window = ie31200_map_mchbar(pdev, cfg, mc); if (!window) { ret = -ENODEV; goto fail_free; } edac_dbg(3, "MC: init mci\n"); - mci->pdev = &pdev->dev; - if (skl) - mci->mtype_cap = MEM_FLAG_DDR4; - else - mci->mtype_cap = MEM_FLAG_DDR3; + mci->mtype_cap = BIT(cfg->mtype); mci->edac_ctl_cap = EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = ie31200_devs[dev_idx].ctl_name; + mci->ctl_name = ie31200_devs[mc].ctl_name; mci->dev_name = pci_name(pdev); - mci->edac_check = ie31200_check; + mci->edac_check = cfg->cmci ? NULL : ie31200_check; mci->ctl_page_to_phys = NULL; priv = mci->pvt_info; priv->window = window; - if (skl) { - priv->c0errlog = window + IE31200_C0ECCERRLOG_SKL; - priv->c1errlog = window + IE31200_C1ECCERRLOG_SKL; - mad_offset = IE31200_MAD_DIMM_0_OFFSET_SKL; - } else { - priv->c0errlog = window + IE31200_C0ECCERRLOG; - priv->c1errlog = window + IE31200_C1ECCERRLOG; - mad_offset = IE31200_MAD_DIMM_0_OFFSET; - } - - /* populate DIMM info */ - for (i = 0; i < IE31200_CHANNELS; i++) { - addr_decode = readl(window + mad_offset + - (i * 4)); - edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); - for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { - populate_dimm_info(&dimm_info[i][j], addr_decode, j, - skl); - edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n", - dimm_info[i][j].size, - dimm_info[i][j].dual_rank, - dimm_info[i][j].x16_width); - } - } - + priv->c0errlog = window + cfg->reg_eccerrlog_offset[0]; + priv->c1errlog = window + cfg->reg_eccerrlog_offset[1]; + priv->cfg = cfg; + priv->mci = mci; + priv->pdev = pdev; + device_initialize(&priv->dev); /* - * The dram rank boundary (DRB) reg values are boundary addresses - * for each DRAM rank with a granularity of 64MB. DRB regs are - * cumulative; the last one will contain the total memory - * contained in all ranks. + * The EDAC core uses mci->pdev (pointer to the structure device) + * as the memory controller ID. The SoCs attach one or more memory + * controllers to a single pci_dev (a single pci_dev->dev can + * correspond to multiple memory controllers). + * + * To make mci->pdev unique, assign pci_dev->dev to mci->pdev + * for the first memory controller and assign a unique priv->dev + * to mci->pdev for each additional memory controller. */ - for (i = 0; i < IE31200_DIMMS_PER_CHANNEL; i++) { - for (j = 0; j < IE31200_CHANNELS; j++) { - struct dimm_info *dimm; - unsigned long nr_pages; - - nr_pages = IE31200_PAGES(dimm_info[j][i].size, skl); - if (nr_pages == 0) - continue; - - if (dimm_info[j][i].dual_rank) { - nr_pages = nr_pages / 2; - dimm = edac_get_dimm(mci, (i * 2) + 1, j, 0); - dimm->nr_pages = nr_pages; - edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); - dimm->grain = 8; /* just a guess */ - if (skl) - dimm->mtype = MEM_DDR4; - else - dimm->mtype = MEM_DDR3; - dimm->dtype = DEV_UNKNOWN; - dimm->edac_mode = EDAC_UNKNOWN; - } - dimm = edac_get_dimm(mci, i * 2, j, 0); - dimm->nr_pages = nr_pages; - edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); - dimm->grain = 8; /* same guess */ - if (skl) - dimm->mtype = MEM_DDR4; - else - dimm->mtype = MEM_DDR3; - dimm->dtype = DEV_UNKNOWN; - dimm->edac_mode = EDAC_UNKNOWN; - } - } + mci->pdev = mc ? &priv->dev : &pdev->dev; + ie31200_get_dimm_config(mci, window, cfg, mc); ie31200_clear_error_info(mci); if (edac_mc_add_mc(mci)) { @@ -534,16 +499,115 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) goto fail_unmap; } - /* get this far and it's successful */ - edac_dbg(3, "MC: success\n"); + ie31200_pvt.priv[mc] = priv; return 0; - fail_unmap: iounmap(window); - fail_free: edac_mc_free(mci); + return ret; +} + +static void mce_check(struct mce *mce) +{ + struct ie31200_priv *priv; + int i; + + for (i = 0; i < IE31200_IMC_NUM; i++) { + priv = ie31200_pvt.priv[i]; + if (!priv) + continue; + + __ie31200_check(priv->mci, mce); + } +} + +static int mce_handler(struct notifier_block *nb, unsigned long val, void *data) +{ + struct mce *mce = (struct mce *)data; + char *type; + + if (mce->kflags & MCE_HANDLED_CEC) + return NOTIFY_DONE; + + /* + * Ignore unless this is a memory related error. + * Don't check MCI_STATUS_ADDRV since it's not set on some CPUs. + */ + if ((mce->status & 0xefff) >> 7 != 1) + return NOTIFY_DONE; + + type = mce->mcgstatus & MCG_STATUS_MCIP ? "Exception" : "Event"; + + edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n", + mce->extcpu, type, mce->mcgstatus, + mce->bank, mce->status); + edac_dbg(0, "TSC 0x%llx\n", mce->tsc); + edac_dbg(0, "ADDR 0x%llx\n", mce->addr); + edac_dbg(0, "MISC 0x%llx\n", mce->misc); + edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n", + mce->cpuvendor, mce->cpuid, mce->time, + mce->socketid, mce->apicid); + + mce_check(mce); + mce->kflags |= MCE_HANDLED_EDAC; + + return NOTIFY_DONE; +} + +static struct notifier_block ie31200_mce_dec = { + .notifier_call = mce_handler, + .priority = MCE_PRIO_EDAC, +}; + +static void ie31200_unregister_mcis(void) +{ + struct ie31200_priv *priv; + struct mem_ctl_info *mci; + int i; + + for (i = 0; i < IE31200_IMC_NUM; i++) { + priv = ie31200_pvt.priv[i]; + if (!priv) + continue; + mci = priv->mci; + edac_mc_del_mc(mci->pdev); + iounmap(priv->window); + edac_mc_free(mci); + } +} + +static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg) +{ + int i, ret; + + edac_dbg(0, "MC:\n"); + + if (!ecc_capable(pdev)) { + ie31200_printk(KERN_INFO, "No ECC support\n"); + return -ENODEV; + } + + for (i = 0; i < cfg->imc_num; i++) { + ret = ie31200_register_mci(pdev, cfg, i); + if (ret) + goto fail_register; + } + + if (cfg->cmci) { + mce_register_decode_chain(&ie31200_mce_dec); + edac_op_state = EDAC_OPSTATE_INT; + } else { + edac_op_state = EDAC_OPSTATE_POLL; + } + + /* get this far and it's successful. */ + edac_dbg(3, "MC: success\n"); + return 0; + +fail_register: + ie31200_unregister_mcis(); return ret; } @@ -555,7 +619,7 @@ static int ie31200_init_one(struct pci_dev *pdev, edac_dbg(0, "MC:\n"); if (pci_enable_device(pdev) < 0) return -EIO; - rc = ie31200_probe1(pdev, ent->driver_data); + rc = ie31200_probe1(pdev, (struct res_config *)ent->driver_data); if (rc == 0 && !mci_pdev) mci_pdev = pci_dev_get(pdev); @@ -564,43 +628,112 @@ static int ie31200_init_one(struct pci_dev *pdev, static void ie31200_remove_one(struct pci_dev *pdev) { - struct mem_ctl_info *mci; - struct ie31200_priv *priv; + struct ie31200_priv *priv = ie31200_pvt.priv[0]; edac_dbg(0, "\n"); pci_dev_put(mci_pdev); mci_pdev = NULL; - mci = edac_mc_del_mc(&pdev->dev); - if (!mci) - return; - priv = mci->pvt_info; - iounmap(priv->window); - edac_mc_free(mci); + if (priv->cfg->cmci) + mce_unregister_decode_chain(&ie31200_mce_dec); + ie31200_unregister_mcis(); } +static struct res_config snb_cfg = { + .mtype = MEM_DDR3, + .imc_num = 1, + .reg_mchbar_mask = GENMASK_ULL(38, 15), + .reg_mchbar_window_size = BIT_ULL(15), + .reg_eccerrlog_offset[0] = 0x40c8, + .reg_eccerrlog_offset[1] = 0x44c8, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ue_mask = BIT_ULL(1), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_size_granularity = BIT_ULL(28), + .reg_mad_dimm_offset[0] = 0x5004, + .reg_mad_dimm_offset[1] = 0x5008, + .reg_mad_dimm_size_mask[0] = GENMASK(7, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(15, 8), + .reg_mad_dimm_rank_mask[0] = BIT(17), + .reg_mad_dimm_rank_mask[1] = BIT(18), + .reg_mad_dimm_width_mask[0] = BIT(19), + .reg_mad_dimm_width_mask[1] = BIT(20), +}; + +static struct res_config skl_cfg = { + .mtype = MEM_DDR4, + .imc_num = 1, + .reg_mchbar_mask = GENMASK_ULL(38, 15), + .reg_mchbar_window_size = BIT_ULL(15), + .reg_eccerrlog_offset[0] = 0x4048, + .reg_eccerrlog_offset[1] = 0x4448, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ue_mask = BIT_ULL(1), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .reg_mad_dimm_size_granularity = BIT_ULL(30), + .reg_mad_dimm_offset[0] = 0x500c, + .reg_mad_dimm_offset[1] = 0x5010, + .reg_mad_dimm_size_mask[0] = GENMASK(5, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(21, 16), + .reg_mad_dimm_rank_mask[0] = BIT(10), + .reg_mad_dimm_rank_mask[1] = BIT(26), + .reg_mad_dimm_width_mask[0] = GENMASK(9, 8), + .reg_mad_dimm_width_mask[1] = GENMASK(25, 24), +}; + +struct res_config rpl_s_cfg = { + .mtype = MEM_DDR5, + .cmci = true, + .imc_num = 2, + .reg_mchbar_mask = GENMASK_ULL(41, 17), + .reg_mchbar_window_size = BIT_ULL(16), + .reg_eccerrlog_offset[0] = 0xe048, + .reg_eccerrlog_offset[1] = 0xe848, + .reg_eccerrlog_ce_mask = BIT_ULL(0), + .reg_eccerrlog_ce_ovfl_mask = BIT_ULL(1), + .reg_eccerrlog_ue_mask = BIT_ULL(2), + .reg_eccerrlog_ue_ovfl_mask = BIT_ULL(3), + .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27), + .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16), + .msr_clear_eccerrlog_offset = 0x791, + .reg_mad_dimm_offset[0] = 0xd80c, + .reg_mad_dimm_offset[1] = 0xd810, + .reg_mad_dimm_size_granularity = BIT_ULL(29), + .reg_mad_dimm_size_mask[0] = GENMASK(6, 0), + .reg_mad_dimm_size_mask[1] = GENMASK(22, 16), + .reg_mad_dimm_rank_mask[0] = GENMASK(10, 9), + .reg_mad_dimm_rank_mask[1] = GENMASK(27, 26), + .reg_mad_dimm_width_mask[0] = GENMASK(8, 7), + .reg_mad_dimm_width_mask[1] = GENMASK(25, 24), +}; + static const struct pci_device_id ie31200_pci_tbl[] = { - { PCI_VEND_DEV(INTEL, IE31200_HB_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_11), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_12), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, - { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_3), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_4), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_5), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_6), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_7), (kernel_ulong_t)&snb_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_8), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_9), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_10), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_11), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_12), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_1), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_2), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_3), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_4), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_5), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_6), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_7), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), (kernel_ulong_t)&skl_cfg }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2), (kernel_ulong_t)&rpl_s_cfg}, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3), (kernel_ulong_t)&rpl_s_cfg}, { 0, } /* 0 terminated list. */ }; MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl); @@ -617,12 +750,10 @@ static int __init ie31200_init(void) int pci_rc, i; edac_dbg(3, "MC:\n"); - /* Ensure that the OPSTATE is set correctly for POLL or NMI */ - opstate_init(); pci_rc = pci_register_driver(&ie31200_driver); if (pci_rc < 0) - goto fail0; + return pci_rc; if (!mci_pdev) { ie31200_registered = 0; @@ -633,11 +764,13 @@ static int __init ie31200_init(void) if (mci_pdev) break; } + if (!mci_pdev) { edac_dbg(0, "ie31200 pci_get_device fail\n"); pci_rc = -ENODEV; - goto fail1; + goto fail0; } + pci_rc = ie31200_init_one(mci_pdev, &ie31200_pci_tbl[i]); if (pci_rc < 0) { edac_dbg(0, "ie31200 init fail\n"); @@ -645,12 +778,12 @@ static int __init ie31200_init(void) goto fail1; } } - return 0; + return 0; fail1: - pci_unregister_driver(&ie31200_driver); -fail0: pci_dev_put(mci_pdev); +fail0: + pci_unregister_driver(&ie31200_driver); return pci_rc; } |