diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-25 06:40:00 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-25 06:40:00 -0700 |
commit | 36168d7123311d52e085c116f6c66e16f0b84615 (patch) | |
tree | 4e4c4f80a6edf160f7dd511c7a78788ca61a82cd /drivers/edac | |
parent | 6078e07dcf5348075713124508d03786dc9ffa8b (diff) | |
parent | 4cf841e398503990df640f7a7c5b2ea56f11c08c (diff) | |
download | lwn-36168d7123311d52e085c116f6c66e16f0b84615.tar.gz lwn-36168d7123311d52e085c116f6c66e16f0b84615.zip |
Merge tag 'edac_for_4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
Pull EDAC updates from Borislav Petkov:
"The EDAC tree was busier than usual this cycle as the shortlog below
shows.
Also, this pull request is carrying an ACPI DSM driver which is used
to ask the platform to supply the DIMM location of a reported hardware
error and thus simplify all the EDAC logic when trying to map the
error address to the respective DIMM.
Core EDAC updates:
- amd64_edac: AMD family 0x17, models 0x10-0x2f support (Michael Jin)
Hygon Dhyana support (Pu Wen)
- sb_edac: New maintainer + fixes (Tony Luck) Error reporting
improvements and fixes (Qiuxu Zhuo)
- ghes_edac: SMBIOS handle type 17 for DIMM locating and per-DIMM
error accounting (Fan Wu)
- altera_edac: Stratix10 support and refactoring (Thor Thayer)
Out of tree addition:
- acpi_adxl: Address Translation interface using an ACPI DSM (Tony
Luck)
- the usual amount of other misc fixes and cleanups all over"
* tag 'edac_for_4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp: (22 commits)
ACPI/ADXL: Add address translation interface using an ACPI DSM
EDAC, thunderx: Fix memory leak in thunderx_l2c_threaded_isr()
EDAC, skx_edac: Fix logical channel intermediate decoding
EDAC, {i7core,sb,skx}_edac: Fix uncorrected error counting
EDAC, altera: Work around int-to-pointer-cast warnings
EDAC, amd64: Add Hygon Dhyana support
EDAC: Raise the maximum number of memory controllers
arm64: dts: stratix10: Add peripheral EDAC nodes
EDAC, altera: Add Stratix10 peripheral support
EDAC, altera: Merge Stratix10 into the Arria10 SDRAM probe routine
arm64: dts: stratix10: Add SDRAM node
EDAC, altera: Combine Stratix10 and Arria10 probe functions
arm64: dts: stratix10: Additions to EDAC System Manager
EDAC, i7core: Remove set but not used variable pvt
EDAC, ghes: Use CPER module handles to locate DIMMs
EDAC: Correct DIMM capacity unit symbol
EDAC, sb_edac: Fix signedness bugs in *_get_ha() functions
EDAC, sb_edac: Fix reporting for patrol scrubber errors
EDAC, sb_edac: Return early on ADDRV bit and address type test
MAINTAINERS: Update maintainer for drivers/edac/sb_edac.c
...
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/altera_edac.c | 667 | ||||
-rw-r--r-- | drivers/edac/altera_edac.h | 73 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 24 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 3 | ||||
-rw-r--r-- | drivers/edac/ghes_edac.c | 23 | ||||
-rw-r--r-- | drivers/edac/i3200_edac.c | 2 | ||||
-rw-r--r-- | drivers/edac/i7core_edac.c | 5 | ||||
-rw-r--r-- | drivers/edac/mce_amd.c | 4 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 204 | ||||
-rw-r--r-- | drivers/edac/skx_edac.c | 7 | ||||
-rw-r--r-- | drivers/edac/thunderx_edac.c | 4 |
11 files changed, 462 insertions, 554 deletions
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index ab7c5a937ab0..c89d82aa2776 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -69,25 +69,6 @@ static const struct altr_sdram_prv_data a10_data = { .ue_set_mask = A10_DIAGINT_TDERRA_MASK, }; -static const struct altr_sdram_prv_data s10_data = { - .ecc_ctrl_offset = S10_ECCCTRL1_OFST, - .ecc_ctl_en_mask = A10_ECCCTRL1_ECC_EN, - .ecc_stat_offset = S10_INTSTAT_OFST, - .ecc_stat_ce_mask = A10_INTSTAT_SBEERR, - .ecc_stat_ue_mask = A10_INTSTAT_DBEERR, - .ecc_saddr_offset = S10_SERRADDR_OFST, - .ecc_daddr_offset = S10_DERRADDR_OFST, - .ecc_irq_en_offset = S10_ERRINTEN_OFST, - .ecc_irq_en_mask = A10_ECC_IRQ_EN_MASK, - .ecc_irq_clr_offset = S10_INTSTAT_OFST, - .ecc_irq_clr_mask = (A10_INTSTAT_SBEERR | A10_INTSTAT_DBEERR), - .ecc_cnt_rst_offset = S10_ECCCTRL1_OFST, - .ecc_cnt_rst_mask = A10_ECC_CNT_RESET_MASK, - .ce_ue_trgr_offset = S10_DIAGINTTEST_OFST, - .ce_set_mask = A10_DIAGINT_TSERRA_MASK, - .ue_set_mask = A10_DIAGINT_TDERRA_MASK, -}; - /*********************** EDAC Memory Controller Functions ****************/ /* The SDRAM controller uses the EDAC Memory Controller framework. */ @@ -239,7 +220,7 @@ static unsigned long get_total_mem(void) static const struct of_device_id altr_sdram_ctrl_of_match[] = { { .compatible = "altr,sdram-edac", .data = &c5_data}, { .compatible = "altr,sdram-edac-a10", .data = &a10_data}, - { .compatible = "altr,sdram-edac-s10", .data = &s10_data}, + { .compatible = "altr,sdram-edac-s10", .data = &a10_data}, {}, }; MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match); @@ -293,6 +274,7 @@ release: return ret; } +static int socfpga_is_a10(void); static int altr_sdram_probe(struct platform_device *pdev) { const struct of_device_id *id; @@ -416,7 +398,7 @@ static int altr_sdram_probe(struct platform_device *pdev) goto err; /* Only the Arria10 has separate IRQs */ - if (irq2 > 0) { + if (socfpga_is_a10()) { /* Arria10 specific initialization */ res = a10_init(mc_vbase); if (res < 0) @@ -502,8 +484,9 @@ static int s10_protected_reg_write(void *context, unsigned int reg, unsigned int val) { struct arm_smccc_res result; + unsigned long offset = (unsigned long)context; - arm_smccc_smc(INTEL_SIP_SMC_REG_WRITE, reg, val, 0, 0, + arm_smccc_smc(INTEL_SIP_SMC_REG_WRITE, offset + reg, val, 0, 0, 0, 0, 0, &result); return (int)result.a0; @@ -523,8 +506,9 @@ static int s10_protected_reg_read(void *context, unsigned int reg, unsigned int *val) { struct arm_smccc_res result; + unsigned long offset = (unsigned long)context; - arm_smccc_smc(INTEL_SIP_SMC_REG_READ, reg, 0, 0, 0, + arm_smccc_smc(INTEL_SIP_SMC_REG_READ, offset + reg, 0, 0, 0, 0, 0, 0, &result); *val = (unsigned int)result.a1; @@ -532,246 +516,18 @@ static int s10_protected_reg_read(void *context, unsigned int reg, return (int)result.a0; } -static bool s10_sdram_writeable_reg(struct device *dev, unsigned int reg) -{ - switch (reg) { - case S10_ECCCTRL1_OFST: - case S10_ERRINTEN_OFST: - case S10_INTMODE_OFST: - case S10_INTSTAT_OFST: - case S10_DIAGINTTEST_OFST: - case S10_SYSMGR_ECC_INTMASK_VAL_OFST: - case S10_SYSMGR_ECC_INTMASK_SET_OFST: - case S10_SYSMGR_ECC_INTMASK_CLR_OFST: - return true; - } - return false; -} - -static bool s10_sdram_readable_reg(struct device *dev, unsigned int reg) -{ - switch (reg) { - case S10_ECCCTRL1_OFST: - case S10_ERRINTEN_OFST: - case S10_INTMODE_OFST: - case S10_INTSTAT_OFST: - case S10_DERRADDR_OFST: - case S10_SERRADDR_OFST: - case S10_DIAGINTTEST_OFST: - case S10_SYSMGR_ECC_INTMASK_VAL_OFST: - case S10_SYSMGR_ECC_INTMASK_SET_OFST: - case S10_SYSMGR_ECC_INTMASK_CLR_OFST: - case S10_SYSMGR_ECC_INTSTAT_SERR_OFST: - case S10_SYSMGR_ECC_INTSTAT_DERR_OFST: - return true; - } - return false; -} - -static bool s10_sdram_volatile_reg(struct device *dev, unsigned int reg) -{ - switch (reg) { - case S10_ECCCTRL1_OFST: - case S10_ERRINTEN_OFST: - case S10_INTMODE_OFST: - case S10_INTSTAT_OFST: - case S10_DERRADDR_OFST: - case S10_SERRADDR_OFST: - case S10_DIAGINTTEST_OFST: - case S10_SYSMGR_ECC_INTMASK_VAL_OFST: - case S10_SYSMGR_ECC_INTMASK_SET_OFST: - case S10_SYSMGR_ECC_INTMASK_CLR_OFST: - case S10_SYSMGR_ECC_INTSTAT_SERR_OFST: - case S10_SYSMGR_ECC_INTSTAT_DERR_OFST: - return true; - } - return false; -} - static const struct regmap_config s10_sdram_regmap_cfg = { .name = "s10_ddr", .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = 0xffffffff, - .writeable_reg = s10_sdram_writeable_reg, - .readable_reg = s10_sdram_readable_reg, - .volatile_reg = s10_sdram_volatile_reg, + .max_register = 0xffd12228, .reg_read = s10_protected_reg_read, .reg_write = s10_protected_reg_write, .use_single_read = true, .use_single_write = true, }; -static int altr_s10_sdram_probe(struct platform_device *pdev) -{ - const struct of_device_id *id; - struct edac_mc_layer layers[2]; - struct mem_ctl_info *mci; - struct altr_sdram_mc_data *drvdata; - const struct altr_sdram_prv_data *priv; - struct regmap *regmap; - struct dimm_info *dimm; - u32 read_reg; - int irq, ret = 0; - unsigned long mem_size; - - id = of_match_device(altr_sdram_ctrl_of_match, &pdev->dev); - if (!id) - return -ENODEV; - - /* Grab specific offsets and masks for Stratix10 */ - priv = of_match_node(altr_sdram_ctrl_of_match, - pdev->dev.of_node)->data; - - regmap = devm_regmap_init(&pdev->dev, NULL, (void *)priv, - &s10_sdram_regmap_cfg); - if (IS_ERR(regmap)) - return PTR_ERR(regmap); - - /* Validate the SDRAM controller has ECC enabled */ - if (regmap_read(regmap, priv->ecc_ctrl_offset, &read_reg) || - ((read_reg & priv->ecc_ctl_en_mask) != priv->ecc_ctl_en_mask)) { - edac_printk(KERN_ERR, EDAC_MC, - "No ECC/ECC disabled [0x%08X]\n", read_reg); - return -ENODEV; - } - - /* Grab memory size from device tree. */ - mem_size = get_total_mem(); - if (!mem_size) { - edac_printk(KERN_ERR, EDAC_MC, "Unable to calculate memory size\n"); - return -ENODEV; - } - - /* Ensure the SDRAM Interrupt is disabled */ - if (regmap_update_bits(regmap, priv->ecc_irq_en_offset, - priv->ecc_irq_en_mask, 0)) { - edac_printk(KERN_ERR, EDAC_MC, - "Error disabling SDRAM ECC IRQ\n"); - return -ENODEV; - } - - /* Toggle to clear the SDRAM Error count */ - if (regmap_update_bits(regmap, priv->ecc_cnt_rst_offset, - priv->ecc_cnt_rst_mask, - priv->ecc_cnt_rst_mask)) { - edac_printk(KERN_ERR, EDAC_MC, - "Error clearing SDRAM ECC count\n"); - return -ENODEV; - } - - if (regmap_update_bits(regmap, priv->ecc_cnt_rst_offset, - priv->ecc_cnt_rst_mask, 0)) { - edac_printk(KERN_ERR, EDAC_MC, - "Error clearing SDRAM ECC count\n"); - return -ENODEV; - } - - irq = platform_get_irq(pdev, 0); - if (irq < 0) { - edac_printk(KERN_ERR, EDAC_MC, - "No irq %d in DT\n", irq); - return -ENODEV; - } - - layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = 1; - layers[0].is_virt_csrow = true; - layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = 1; - layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, - sizeof(struct altr_sdram_mc_data)); - if (!mci) - return -ENOMEM; - - mci->pdev = &pdev->dev; - drvdata = mci->pvt_info; - drvdata->mc_vbase = regmap; - drvdata->data = priv; - platform_set_drvdata(pdev, mci); - - if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { - edac_printk(KERN_ERR, EDAC_MC, - "Unable to get managed device resource\n"); - ret = -ENOMEM; - goto free; - } - - mci->mtype_cap = MEM_FLAG_DDR3; - mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; - mci->edac_cap = EDAC_FLAG_SECDED; - mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = dev_name(&pdev->dev); - mci->scrub_mode = SCRUB_SW_SRC; - mci->dev_name = dev_name(&pdev->dev); - - dimm = *mci->dimms; - dimm->nr_pages = ((mem_size - 1) >> PAGE_SHIFT) + 1; - dimm->grain = 8; - dimm->dtype = DEV_X8; - dimm->mtype = MEM_DDR3; - dimm->edac_mode = EDAC_SECDED; - - ret = edac_mc_add_mc(mci); - if (ret < 0) - goto err; - - ret = devm_request_irq(&pdev->dev, irq, altr_sdram_mc_err_handler, - IRQF_SHARED, dev_name(&pdev->dev), mci); - if (ret < 0) { - edac_mc_printk(mci, KERN_ERR, - "Unable to request irq %d\n", irq); - ret = -ENODEV; - goto err2; - } - - if (regmap_write(regmap, S10_SYSMGR_ECC_INTMASK_CLR_OFST, - S10_DDR0_IRQ_MASK)) { - edac_printk(KERN_ERR, EDAC_MC, - "Error clearing SDRAM ECC count\n"); - ret = -ENODEV; - goto err2; - } - - if (regmap_update_bits(drvdata->mc_vbase, priv->ecc_irq_en_offset, - priv->ecc_irq_en_mask, priv->ecc_irq_en_mask)) { - edac_mc_printk(mci, KERN_ERR, - "Error enabling SDRAM ECC IRQ\n"); - ret = -ENODEV; - goto err2; - } - - altr_sdr_mc_create_debugfs_nodes(mci); - - devres_close_group(&pdev->dev, NULL); - - return 0; - -err2: - edac_mc_del_mc(&pdev->dev); -err: - devres_release_group(&pdev->dev, NULL); -free: - edac_mc_free(mci); - edac_printk(KERN_ERR, EDAC_MC, - "EDAC Probe Failed; Error %d\n", ret); - - return ret; -} - -static int altr_s10_sdram_remove(struct platform_device *pdev) -{ - struct mem_ctl_info *mci = platform_get_drvdata(pdev); - - edac_mc_del_mc(&pdev->dev); - edac_mc_free(mci); - platform_set_drvdata(pdev, NULL); - - return 0; -} - /************** </Stratix10 EDAC Memory Controller Functions> ***********/ /* @@ -805,20 +561,6 @@ static struct platform_driver altr_sdram_edac_driver = { module_platform_driver(altr_sdram_edac_driver); -static struct platform_driver altr_s10_sdram_edac_driver = { - .probe = altr_s10_sdram_probe, - .remove = altr_s10_sdram_remove, - .driver = { - .name = "altr_s10_sdram_edac", -#ifdef CONFIG_PM - .pm = &altr_sdram_pm_ops, -#endif - .of_match_table = altr_sdram_ctrl_of_match, - }, -}; - -module_platform_driver(altr_s10_sdram_edac_driver); - /************************* EDAC Parent Probe *************************/ static const struct of_device_id altr_edac_device_of_match[]; @@ -972,6 +714,16 @@ static const struct file_operations altr_edac_a10_device_inject_fops = { .llseek = generic_file_llseek, }; +static ssize_t altr_edac_a10_device_trig2(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos); + +static const struct file_operations altr_edac_a10_device_inject2_fops = { + .open = simple_open, + .write = altr_edac_a10_device_trig2, + .llseek = generic_file_llseek, +}; + static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci, const struct edac_device_prv_data *priv) { @@ -1253,6 +1005,16 @@ static int __maybe_unused altr_init_memory_port(void __iomem *ioaddr, int port) return ret; } +static int socfpga_is_a10(void) +{ + return of_machine_is_compatible("altr,socfpga-arria10"); +} + +static int socfpga_is_s10(void) +{ + return of_machine_is_compatible("altr,socfpga-stratix10"); +} + static __init int __maybe_unused altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, u32 ecc_ctrl_en_mask, bool dual_port) @@ -1267,8 +1029,32 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, /* Get the ECC Manager - parent of the device EDACs */ np_eccmgr = of_get_parent(np); - ecc_mgr_map = syscon_regmap_lookup_by_phandle(np_eccmgr, - "altr,sysmgr-syscon"); + + if (socfpga_is_a10()) { + ecc_mgr_map = syscon_regmap_lookup_by_phandle(np_eccmgr, + "altr,sysmgr-syscon"); + } else { + struct device_node *sysmgr_np; + struct resource res; + uintptr_t base; + + sysmgr_np = of_parse_phandle(np_eccmgr, + "altr,sysmgr-syscon", 0); + if (!sysmgr_np) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to find altr,sysmgr-syscon\n"); + return -ENODEV; + } + + if (of_address_to_resource(sysmgr_np, 0, &res)) + return -ENOMEM; + + /* Need physical address for SMCC call */ + base = res.start; + + ecc_mgr_map = regmap_init(NULL, NULL, (void *)base, + &s10_sdram_regmap_cfg); + } of_node_put(np_eccmgr); if (IS_ERR(ecc_mgr_map)) { edac_printk(KERN_ERR, EDAC_DEVICE, @@ -1326,11 +1112,6 @@ out: return ret; } -static int socfpga_is_a10(void) -{ - return of_machine_is_compatible("altr,socfpga-arria10"); -} - static int validate_parent_available(struct device_node *np); static const struct of_device_id altr_edac_a10_device_of_match[]; static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) @@ -1338,7 +1119,7 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) int irq; struct device_node *child, *np; - if (!socfpga_is_a10()) + if (!socfpga_is_a10() && !socfpga_is_s10()) return -ENODEV; np = of_find_compatible_node(NULL, NULL, @@ -1584,7 +1365,7 @@ static const struct edac_device_prv_data a10_enetecc_data = { .ue_set_mask = ALTR_A10_ECC_TDERRA, .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, .ecc_irq_handler = altr_edac_a10_ecc_irq, - .inject_fops = &altr_edac_a10_device_inject_fops, + .inject_fops = &altr_edac_a10_device_inject2_fops, }; static int __init socfpga_init_ethernet_ecc(void) @@ -1662,7 +1443,7 @@ static const struct edac_device_prv_data a10_usbecc_data = { .ue_set_mask = ALTR_A10_ECC_TDERRA, .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, .ecc_irq_handler = altr_edac_a10_ecc_irq, - .inject_fops = &altr_edac_a10_device_inject_fops, + .inject_fops = &altr_edac_a10_device_inject2_fops, }; static int __init socfpga_init_usb_ecc(void) @@ -1860,7 +1641,7 @@ static int __init socfpga_init_sdmmc_ecc(void) int rc = -ENODEV; struct device_node *child; - if (!socfpga_is_a10()) + if (!socfpga_is_a10() && !socfpga_is_s10()) return -ENODEV; child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc"); @@ -1944,6 +1725,74 @@ static ssize_t altr_edac_a10_device_trig(struct file *file, writel(priv->ue_set_mask, set_addr); else writel(priv->ce_set_mask, set_addr); + + /* Ensure the interrupt test bits are set */ + wmb(); + local_irq_restore(flags); + + return count; +} + +/* + * The Stratix10 EDAC Error Injection Functions differ from Arria10 + * slightly. A few Arria10 peripherals can use this injection function. + * Inject the error into the memory and then readback to trigger the IRQ. + */ +static ssize_t altr_edac_a10_device_trig2(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dci = file->private_data; + struct altr_edac_device_dev *drvdata = edac_dci->pvt_info; + const struct edac_device_prv_data *priv = drvdata->data; + void __iomem *set_addr = (drvdata->base + priv->set_err_ofst); + unsigned long flags; + u8 trig_type; + + if (!user_buf || get_user(trig_type, user_buf)) + return -EFAULT; + + local_irq_save(flags); + if (trig_type == ALTR_UE_TRIGGER_CHAR) { + writel(priv->ue_set_mask, set_addr); + } else { + /* Setup write of 0 to first 4 bytes */ + writel(0x0, drvdata->base + ECC_BLK_WDATA0_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST); + /* Setup write of 4 bytes */ + writel(ECC_WORD_WRITE, drvdata->base + ECC_BLK_DBYTECTRL_OFST); + /* Setup Address to 0 */ + writel(0x0, drvdata->base + ECC_BLK_ADDRESS_OFST); + /* Setup accctrl to write & data override */ + writel(ECC_WRITE_DOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + /* Setup accctrl to read & ecc override */ + writel(ECC_READ_EOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + /* Setup write for single bit change */ + writel(0x1, drvdata->base + ECC_BLK_WDATA0_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST); + /* Copy Read ECC to Write ECC */ + writel(readl(drvdata->base + ECC_BLK_RECC0_OFST), + drvdata->base + ECC_BLK_WECC0_OFST); + writel(readl(drvdata->base + ECC_BLK_RECC1_OFST), + drvdata->base + ECC_BLK_WECC1_OFST); + /* Setup accctrl to write & ecc override & data override */ + writel(ECC_WRITE_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + /* Setup accctrl to read & ecc overwrite & data overwrite */ + writel(ECC_READ_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + } + /* Ensure the interrupt test bits are set */ wmb(); local_irq_restore(flags); @@ -2147,6 +1996,35 @@ static const struct irq_domain_ops a10_eccmgr_ic_ops = { .xlate = irq_domain_xlate_twocell, }; +/************** Stratix 10 EDAC Double Bit Error Handler ************/ +#define to_a10edac(p, m) container_of(p, struct altr_arria10_edac, m) + +/* + * The double bit error is handled through SError which is fatal. This is + * called as a panic notifier to printout ECC error info as part of the panic. + */ +static int s10_edac_dberr_handler(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct altr_arria10_edac *edac = to_a10edac(this, panic_notifier); + int err_addr, dberror; + + regmap_read(edac->ecc_mgr_map, S10_SYSMGR_ECC_INTSTAT_DERR_OFST, + &dberror); + regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, dberror); + if (dberror & S10_DDR0_IRQ_MASK) { + regmap_read(edac->ecc_mgr_map, A10_DERRADDR_OFST, &err_addr); + regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST, + err_addr); + edac_printk(KERN_ERR, EDAC_MC, + "EDAC: [Uncorrectable errors @ 0x%08X]\n\n", + err_addr); + } + + return NOTIFY_DONE; +} + +/****************** Arria 10 EDAC Probe Function *********************/ static int altr_edac_a10_probe(struct platform_device *pdev) { struct altr_arria10_edac *edac; @@ -2160,8 +2038,34 @@ static int altr_edac_a10_probe(struct platform_device *pdev) platform_set_drvdata(pdev, edac); INIT_LIST_HEAD(&edac->a10_ecc_devices); - edac->ecc_mgr_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + if (socfpga_is_a10()) { + edac->ecc_mgr_map = + syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "altr,sysmgr-syscon"); + } else { + struct device_node *sysmgr_np; + struct resource res; + uintptr_t base; + + sysmgr_np = of_parse_phandle(pdev->dev.of_node, + "altr,sysmgr-syscon", 0); + if (!sysmgr_np) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to find altr,sysmgr-syscon\n"); + return -ENODEV; + } + + if (of_address_to_resource(sysmgr_np, 0, &res)) + return -ENOMEM; + + /* Need physical address for SMCC call */ + base = res.start; + + edac->ecc_mgr_map = devm_regmap_init(&pdev->dev, NULL, + (void *)base, + &s10_sdram_regmap_cfg); + } + if (IS_ERR(edac->ecc_mgr_map)) { edac_printk(KERN_ERR, EDAC_DEVICE, "Unable to get syscon altr,sysmgr-syscon\n"); @@ -2188,14 +2092,38 @@ static int altr_edac_a10_probe(struct platform_device *pdev) altr_edac_a10_irq_handler, edac); - edac->db_irq = platform_get_irq(pdev, 1); - if (edac->db_irq < 0) { - dev_err(&pdev->dev, "No DBERR IRQ resource\n"); - return edac->db_irq; + if (socfpga_is_a10()) { + edac->db_irq = platform_get_irq(pdev, 1); + if (edac->db_irq < 0) { + dev_err(&pdev->dev, "No DBERR IRQ resource\n"); + return edac->db_irq; + } + irq_set_chained_handler_and_data(edac->db_irq, + altr_edac_a10_irq_handler, + edac); + } else { + int dberror, err_addr; + + edac->panic_notifier.notifier_call = s10_edac_dberr_handler; + atomic_notifier_chain_register(&panic_notifier_list, + &edac->panic_notifier); + + /* Printout a message if uncorrectable error previously. */ + regmap_read(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, + &dberror); + if (dberror) { + regmap_read(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST, + &err_addr); + edac_printk(KERN_ERR, EDAC_DEVICE, + "Previous Boot UE detected[0x%X] @ 0x%X\n", + dberror, err_addr); + /* Reset the sticky registers */ + regmap_write(edac->ecc_mgr_map, + S10_SYSMGR_UE_VAL_OFST, 0); + regmap_write(edac->ecc_mgr_map, + S10_SYSMGR_UE_ADDR_OFST, 0); + } } - irq_set_chained_handler_and_data(edac->db_irq, - altr_edac_a10_irq_handler, - edac); for_each_child_of_node(pdev->dev.of_node, child) { if (!of_device_is_available(child)) @@ -2212,7 +2140,8 @@ static int altr_edac_a10_probe(struct platform_device *pdev) altr_edac_a10_device_add(edac, child); - else if (of_device_is_compatible(child, "altr,sdram-edac-a10")) + else if ((of_device_is_compatible(child, "altr,sdram-edac-a10")) || + (of_device_is_compatible(child, "altr,sdram-edac-s10"))) of_platform_populate(pdev->dev.of_node, altr_sdram_ctrl_of_match, NULL, &pdev->dev); @@ -2223,6 +2152,7 @@ static int altr_edac_a10_probe(struct platform_device *pdev) static const struct of_device_id altr_edac_a10_of_match[] = { { .compatible = "altr,socfpga-a10-ecc-manager" }, + { .compatible = "altr,socfpga-s10-ecc-manager" }, {}, }; MODULE_DEVICE_TABLE(of, altr_edac_a10_of_match); @@ -2236,171 +2166,6 @@ static struct platform_driver altr_edac_a10_driver = { }; module_platform_driver(altr_edac_a10_driver); -/************** Stratix 10 EDAC Device Controller Functions> ************/ - -#define to_s10edac(p, m) container_of(p, struct altr_stratix10_edac, m) - -/* - * The double bit error is handled through SError which is fatal. This is - * called as a panic notifier to printout ECC error info as part of the panic. - */ -static int s10_edac_dberr_handler(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct altr_stratix10_edac *edac = to_s10edac(this, panic_notifier); - int err_addr, dberror; - - s10_protected_reg_read(edac, S10_SYSMGR_ECC_INTSTAT_DERR_OFST, - &dberror); - /* Remember the UE Errors for a reboot */ - s10_protected_reg_write(edac, S10_SYSMGR_UE_VAL_OFST, dberror); - if (dberror & S10_DDR0_IRQ_MASK) { - s10_protected_reg_read(edac, S10_DERRADDR_OFST, &err_addr); - /* Remember the UE Error address */ - s10_protected_reg_write(edac, S10_SYSMGR_UE_ADDR_OFST, - err_addr); - edac_printk(KERN_ERR, EDAC_MC, - "EDAC: [Uncorrectable errors @ 0x%08X]\n\n", - err_addr); - } - - return NOTIFY_DONE; -} - -static void altr_edac_s10_irq_handler(struct irq_desc *desc) -{ - struct altr_stratix10_edac *edac = irq_desc_get_handler_data(desc); - struct irq_chip *chip = irq_desc_get_chip(desc); - int irq = irq_desc_get_irq(desc); - int bit, sm_offset, irq_status; - - sm_offset = S10_SYSMGR_ECC_INTSTAT_SERR_OFST; - - chained_irq_enter(chip, desc); - - s10_protected_reg_read(NULL, sm_offset, &irq_status); - - for_each_set_bit(bit, (unsigned long *)&irq_status, 32) { - irq = irq_linear_revmap(edac->domain, bit); - if (irq) - generic_handle_irq(irq); - } - - chained_irq_exit(chip, desc); -} - -static void s10_eccmgr_irq_mask(struct irq_data *d) -{ - struct altr_stratix10_edac *edac = irq_data_get_irq_chip_data(d); - - s10_protected_reg_write(edac, S10_SYSMGR_ECC_INTMASK_SET_OFST, - BIT(d->hwirq)); -} - -static void s10_eccmgr_irq_unmask(struct irq_data *d) -{ - struct altr_stratix10_edac *edac = irq_data_get_irq_chip_data(d); - - s10_protected_reg_write(edac, S10_SYSMGR_ECC_INTMASK_CLR_OFST, - BIT(d->hwirq)); -} - -static int s10_eccmgr_irqdomain_map(struct irq_domain *d, unsigned int irq, - irq_hw_number_t hwirq) -{ - struct altr_stratix10_edac *edac = d->host_data; - - irq_set_chip_and_handler(irq, &edac->irq_chip, handle_simple_irq); - irq_set_chip_data(irq, edac); - irq_set_noprobe(irq); - - return 0; -} - -static const struct irq_domain_ops s10_eccmgr_ic_ops = { - .map = s10_eccmgr_irqdomain_map, - .xlate = irq_domain_xlate_twocell, -}; - -static int altr_edac_s10_probe(struct platform_device *pdev) -{ - struct altr_stratix10_edac *edac; - struct device_node *child; - int dberror, err_addr; - - edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL); - if (!edac) - return -ENOMEM; - - edac->dev = &pdev->dev; - platform_set_drvdata(pdev, edac); - INIT_LIST_HEAD(&edac->s10_ecc_devices); - - edac->irq_chip.name = pdev->dev.of_node->name; - edac->irq_chip.irq_mask = s10_eccmgr_irq_mask; - edac->irq_chip.irq_unmask = s10_eccmgr_irq_unmask; - edac->domain = irq_domain_add_linear(pdev->dev.of_node, 64, - &s10_eccmgr_ic_ops, edac); - if (!edac->domain) { - dev_err(&pdev->dev, "Error adding IRQ domain\n"); - return -ENOMEM; - } - - edac->sb_irq = platform_get_irq(pdev, 0); - if (edac->sb_irq < 0) { - dev_err(&pdev->dev, "No SBERR IRQ resource\n"); - return edac->sb_irq; - } - - irq_set_chained_handler_and_data(edac->sb_irq, - altr_edac_s10_irq_handler, - edac); - - edac->panic_notifier.notifier_call = s10_edac_dberr_handler; - atomic_notifier_chain_register(&panic_notifier_list, - &edac->panic_notifier); - - /* Printout a message if uncorrectable error previously. */ - s10_protected_reg_read(edac, S10_SYSMGR_UE_VAL_OFST, &dberror); - if (dberror) { - s10_protected_reg_read(edac, S10_SYSMGR_UE_ADDR_OFST, - &err_addr); - edac_printk(KERN_ERR, EDAC_DEVICE, - "Previous Boot UE detected[0x%X] @ 0x%X\n", - dberror, err_addr); - /* Reset the sticky registers */ - s10_protected_reg_write(edac, S10_SYSMGR_UE_VAL_OFST, 0); - s10_protected_reg_write(edac, S10_SYSMGR_UE_ADDR_OFST, 0); - } - - for_each_child_of_node(pdev->dev.of_node, child) { - if (!of_device_is_available(child)) - continue; - - if (of_device_is_compatible(child, "altr,sdram-edac-s10")) - of_platform_populate(pdev->dev.of_node, - altr_sdram_ctrl_of_match, - NULL, &pdev->dev); - } - - return 0; -} - -static const struct of_device_id altr_edac_s10_of_match[] = { - { .compatible = "altr,socfpga-s10-ecc-manager" }, - {}, -}; -MODULE_DEVICE_TABLE(of, altr_edac_s10_of_match); - -static struct platform_driver altr_edac_s10_driver = { - .probe = altr_edac_s10_probe, - .driver = { - .name = "socfpga_s10_ecc_manager", - .of_match_table = altr_edac_s10_of_match, - }, -}; -module_platform_driver(altr_edac_s10_driver); - MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Thor Thayer"); MODULE_DESCRIPTION("EDAC Driver for Altera Memories"); diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 81f0554e09de..4213cb0bb2a7 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -156,34 +156,6 @@ #define A10_INTMASK_CLR_OFST 0x10 #define A10_DDR0_IRQ_MASK BIT(17) -/************* Stratix10 Defines **************/ - -/* SDRAM Controller EccCtrl Register */ -#define S10_ECCCTRL1_OFST 0xF8011100 - -/* SDRAM Controller DRAM IRQ Register */ -#define S10_ERRINTEN_OFST 0xF8011110 - -/* SDRAM Interrupt Mode Register */ -#define S10_INTMODE_OFST 0xF801111C - -/* SDRAM Controller Error Status Register */ -#define S10_INTSTAT_OFST 0xF8011120 - -/* SDRAM Controller ECC Error Address Register */ -#define S10_DERRADDR_OFST 0xF801112C -#define S10_SERRADDR_OFST 0xF8011130 - -/* SDRAM Controller ECC Diagnostic Register */ -#define S10_DIAGINTTEST_OFST 0xF8011124 - -/* SDRAM Single Bit Error Count Compare Set Register */ -#define S10_SERRCNTREG_OFST 0xF801113C - -/* Sticky registers for Uncorrected Errors */ -#define S10_SYSMGR_UE_VAL_OFST 0xFFD12220 -#define S10_SYSMGR_UE_ADDR_OFST 0xFFD12224 - struct altr_sdram_prv_data { int ecc_ctrl_offset; int ecc_ctl_en_mask; @@ -319,15 +291,40 @@ struct altr_sdram_mc_data { /************* Stratix10 Defines **************/ /* Stratix10 ECC Manager Defines */ -#define S10_SYSMGR_ECC_INTMASK_VAL_OFST 0xFFD12090 -#define S10_SYSMGR_ECC_INTMASK_SET_OFST 0xFFD12094 -#define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0xFFD12098 +#define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 +#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 -#define S10_SYSMGR_ECC_INTSTAT_SERR_OFST 0xFFD1209C -#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xFFD120A0 +/* Sticky registers for Uncorrected Errors */ +#define S10_SYSMGR_UE_VAL_OFST 0x120 +#define S10_SYSMGR_UE_ADDR_OFST 0x124 #define S10_DDR0_IRQ_MASK BIT(16) +/* Define ECC Block Offsets for peripherals */ +#define ECC_BLK_ADDRESS_OFST 0x40 +#define ECC_BLK_RDATA0_OFST 0x44 +#define ECC_BLK_RDATA1_OFST 0x48 +#define ECC_BLK_RDATA2_OFST 0x4C +#define ECC_BLK_RDATA3_OFST 0x50 +#define ECC_BLK_WDATA0_OFST 0x54 +#define ECC_BLK_WDATA1_OFST 0x58 +#define ECC_BLK_WDATA2_OFST 0x5C +#define ECC_BLK_WDATA3_OFST 0x60 +#define ECC_BLK_RECC0_OFST 0x64 +#define ECC_BLK_RECC1_OFST 0x68 +#define ECC_BLK_WECC0_OFST 0x6C +#define ECC_BLK_WECC1_OFST 0x70 +#define ECC_BLK_DBYTECTRL_OFST 0x74 +#define ECC_BLK_ACCCTRL_OFST 0x78 +#define ECC_BLK_STARTACC_OFST 0x7C + +#define ECC_XACT_KICK 0x10000 +#define ECC_WORD_WRITE 0xF +#define ECC_WRITE_DOVR 0x101 +#define ECC_WRITE_EDOVR 0x103 +#define ECC_READ_EOVR 0x2 +#define ECC_READ_EDOVR 0x3 + struct altr_edac_device_dev; struct edac_device_prv_data { @@ -370,6 +367,7 @@ struct altr_arria10_edac { struct irq_domain *domain; struct irq_chip irq_chip; struct list_head a10_ecc_devices; + struct notifier_block panic_notifier; }; /* @@ -437,13 +435,4 @@ struct altr_arria10_edac { #define INTEL_SIP_SMC_REG_WRITE \ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE) -struct altr_stratix10_edac { - struct device *dev; - int sb_irq; - struct irq_domain *domain; - struct irq_chip irq_chip; - struct list_head s10_ecc_devices; - struct notifier_block panic_notifier; -}; - #endif /* #ifndef _ALTERA_EDAC_H */ diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 18aeabb1d5ee..6ea98575a402 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -211,7 +211,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate) scrubval = scrubrates[i].scrubval; - if (pvt->fam == 0x17) { + if (pvt->fam == 0x17 || pvt->fam == 0x18) { __f17h_set_scrubval(pvt, scrubval); } else if (pvt->fam == 0x15 && pvt->model == 0x60) { f15h_select_dct(pvt, 0); @@ -264,6 +264,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci) break; case 0x17: + case 0x18: amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval); if (scrubval & BIT(0)) { amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval); @@ -1044,6 +1045,7 @@ static void determine_memory_type(struct amd64_pvt *pvt) goto ddr3; case 0x17: + case 0x18: if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) pvt->dram_type = MEM_LRDDR4; else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) @@ -2200,6 +2202,15 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_base_addr_to_cs_size, } }, + [F17_M10H_CPUS] = { + .ctl_name = "F17h_M10h", + .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_base_addr_to_cs_size, + } + }, }; /* @@ -3188,8 +3199,18 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) break; case 0x17: + if (pvt->model >= 0x10 && pvt->model <= 0x2f) { + fam_type = &family_types[F17_M10H_CPUS]; + pvt->ops = &family_types[F17_M10H_CPUS].ops; + break; + } + /* fall through */ + case 0x18: fam_type = &family_types[F17_CPUS]; pvt->ops = &family_types[F17_CPUS].ops; + + if (pvt->fam == 0x18) + family_types[F17_CPUS].ctl_name = "F18h"; break; default: @@ -3428,6 +3449,7 @@ static const struct x86_cpu_id amd64_cpuids[] = { { X86_VENDOR_AMD, 0x15, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, + { X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { } }; MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 1d4b74e9a037..4242f8e39c18 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -115,6 +115,8 @@ #define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582 #define PCI_DEVICE_ID_AMD_17H_DF_F0 0x1460 #define PCI_DEVICE_ID_AMD_17H_DF_F6 0x1466 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee /* * Function 1 - Address Map @@ -281,6 +283,7 @@ enum amd_families { F16_CPUS, F16_M30H_CPUS, F17_CPUS, + F17_M10H_CPUS, NUM_FAMILIES, }; diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 473aeec4b1da..49396bf6ad88 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -81,6 +81,18 @@ static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) (*num_dimm)++; } +static int get_dimm_smbios_index(u16 handle) +{ + struct mem_ctl_info *mci = ghes_pvt->mci; + int i; + + for (i = 0; i < mci->tot_dimms; i++) { + if (mci->dimms[i]->smbios_handle == handle) + return i; + } + return -1; +} + static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) { struct ghes_edac_dimm_fill *dimm_fill = arg; @@ -177,6 +189,8 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) entry->total_width, entry->data_width); } + dimm->smbios_handle = entry->handle; + dimm_fill->count++; } } @@ -327,12 +341,21 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { const char *bank = NULL, *device = NULL; + int index = -1; + dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device); if (bank != NULL && device != NULL) p += sprintf(p, "DIMM location:%s %s ", bank, device); else p += sprintf(p, "DIMM DMI handle: 0x%.4x ", mem_err->mem_dev_handle); + + index = get_dimm_smbios_index(mem_err->mem_dev_handle); + if (index >= 0) { + e->top_layer = index; + e->enable_per_layer_report = true; + } + } if (p > e->location) *(p - 1) = '\0'; diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index d92d56cee101..299b441647cd 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -399,7 +399,7 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) if (nr_pages == 0) continue; - edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j, + edac_dbg(0, "csrow %d, channel %d%s, size = %ld MiB\n", i, j, stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages)); dimm->nr_pages = nr_pages; diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 8e120bf60624..9ef448fef12f 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -597,7 +597,7 @@ static int get_dimm_config(struct mem_ctl_info *mci) /* DDR3 has 8 I/O banks */ size = (rows * cols * banks * ranks) >> (20 - 3); - edac_dbg(0, "\tdimm %d %d Mb offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n", + edac_dbg(0, "\tdimm %d %d MiB offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n", j, size, RANKOFFSET(dimm_dod[j]), banks, ranks, rows, cols); @@ -1711,6 +1711,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, u32 errnum = find_first_bit(&error, 32); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) tp_event = HW_EVENT_ERR_FATAL; else @@ -1815,14 +1816,12 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, struct mce *mce = (struct mce *)data; struct i7core_dev *i7_dev; struct mem_ctl_info *mci; - struct i7core_pvt *pvt; i7_dev = get_i7core_dev(mce->socketid); if (!i7_dev) return NOTIFY_DONE; mci = i7_dev->mci; - pvt = mci->pvt_info; /* * Just let mcelog handle it if the error is diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 2ab4d61ee47e..c605089d899f 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1059,7 +1059,8 @@ static int __init mce_amd_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; - if (c->x86_vendor != X86_VENDOR_AMD) + if (c->x86_vendor != X86_VENDOR_AMD && + c->x86_vendor != X86_VENDOR_HYGON) return -ENODEV; fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); @@ -1113,6 +1114,7 @@ static int __init mce_amd_init(void) break; case 0x17: + case 0x18: xec_mask = 0x3f; if (!boot_cpu_has(X86_FEATURE_SMCA)) { printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n"); diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 07726fb00321..9353c3fc7c05 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -326,6 +326,7 @@ struct sbridge_info { const struct interleave_pkg *interleave_pkg; u8 max_sad; u8 (*get_node_id)(struct sbridge_pvt *pvt); + u8 (*get_ha)(u8 bank); enum mem_type (*get_memory_type)(struct sbridge_pvt *pvt); enum dev_type (*get_width)(struct sbridge_pvt *pvt, u32 mtr); struct pci_dev *pci_vtd; @@ -1002,6 +1003,39 @@ static u8 knl_get_node_id(struct sbridge_pvt *pvt) return GET_BITFIELD(reg, 0, 2); } +/* + * Use the reporting bank number to determine which memory + * controller (also known as "ha" for "home agent"). Sandy + * Bridge only has one memory controller per socket, so the + * answer is always zero. + */ +static u8 sbridge_get_ha(u8 bank) +{ + return 0; +} + +/* + * On Ivy Bridge, Haswell and Broadwell the error may be in a + * home agent bank (7, 8), or one of the per-channel memory + * controller banks (9 .. 16). + */ +static u8 ibridge_get_ha(u8 bank) +{ + switch (bank) { + case 7 ... 8: + return bank - 7; + case 9 ... 16: + return (bank - 9) / 4; + default: + return 0xff; + } +} + +/* Not used, but included for safety/symmetry */ +static u8 knl_get_ha(u8 bank) +{ + return 0xff; +} static u64 haswell_get_tolm(struct sbridge_pvt *pvt) { @@ -1622,7 +1656,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, size = ((u64)rows * cols * banks * ranks) >> (20 - 3); npages = MiB_TO_PAGES(size); - edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", + edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", pvt->sbridge_dev->mc, pvt->sbridge_dev->dom, i, j, size, npages, banks, ranks, rows, cols); @@ -2207,6 +2241,60 @@ static int get_memory_error_data(struct mem_ctl_info *mci, return 0; } +static int get_memory_error_data_from_mce(struct mem_ctl_info *mci, + const struct mce *m, u8 *socket, + u8 *ha, long *channel_mask, + char *msg) +{ + u32 reg, channel = GET_BITFIELD(m->status, 0, 3); + struct mem_ctl_info *new_mci; + struct sbridge_pvt *pvt; + struct pci_dev *pci_ha; + bool tad0; + + if (channel >= NUM_CHANNELS) { + sprintf(msg, "Invalid channel 0x%x", channel); + return -EINVAL; + } + + pvt = mci->pvt_info; + if (!pvt->info.get_ha) { + sprintf(msg, "No get_ha()"); + return -EINVAL; + } + *ha = pvt->info.get_ha(m->bank); + if (*ha != 0 && *ha != 1) { + sprintf(msg, "Impossible bank %d", m->bank); + return -EINVAL; + } + + *socket = m->socketid; + new_mci = get_mci_for_node_id(*socket, *ha); + if (!new_mci) { + strcpy(msg, "mci socket got corrupted!"); + return -EINVAL; + } + + pvt = new_mci->pvt_info; + pci_ha = pvt->pci_ha; + pci_read_config_dword(pci_ha, tad_dram_rule[0], ®); + tad0 = m->addr <= TAD_LIMIT(reg); + + *channel_mask = 1 << channel; + if (pvt->mirror_mode == FULL_MIRRORING || + (pvt->mirror_mode == ADDR_RANGE_MIRRORING && tad0)) { + *channel_mask |= 1 << ((channel + 2) % 4); + pvt->is_cur_addr_mirrored = true; + } else { + pvt->is_cur_addr_mirrored = false; + } + + if (pvt->is_lockstep) + *channel_mask |= 1 << ((channel + 1) % 4); + + return 0; +} + /**************************************************************************** Device initialization routines: put/get, init/exit ****************************************************************************/ @@ -2877,10 +2965,16 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, u32 errcode = GET_BITFIELD(m->status, 0, 15); u32 channel = GET_BITFIELD(m->status, 0, 3); u32 optypenum = GET_BITFIELD(m->status, 4, 6); + /* + * Bits 5-0 of MCi_MISC give the least significant bit that is valid. + * A value 6 is for cache line aligned address, a value 12 is for page + * aligned address reported by patrol scrubber. + */ + u32 lsb = GET_BITFIELD(m->misc, 0, 5); long channel_mask, first_channel; - u8 rank, socket, ha; + u8 rank = 0xff, socket, ha; int rc, dimm; - char *area_type = NULL; + char *area_type = "DRAM"; if (pvt->info.type != SANDY_BRIDGE) recoverable = true; @@ -2888,6 +2982,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, recoverable = GET_BITFIELD(m->status, 56, 56); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) { type = "FATAL"; tp_event = HW_EVENT_ERR_FATAL; @@ -2911,35 +3006,27 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, * cccc = channel * If the mask doesn't match, report an error to the parsing logic */ - if (! ((errcode & 0xef80) == 0x80)) { - optype = "Can't parse: it is not a mem"; - } else { - switch (optypenum) { - case 0: - optype = "generic undef request error"; - break; - case 1: - optype = "memory read error"; - break; - case 2: - optype = "memory write error"; - break; - case 3: - optype = "addr/cmd error"; - break; - case 4: - optype = "memory scrubbing error"; - break; - default: - optype = "reserved"; - break; - } + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + break; + default: + optype = "reserved"; + break; } - /* Only decode errors with an valid address (ADDRV) */ - if (!GET_BITFIELD(m->status, 58, 58)) - return; - if (pvt->info.type == KNIGHTS_LANDING) { if (channel == 14) { edac_dbg(0, "%s%s err_code:%04x:%04x EDRAM bank %d\n", @@ -2972,9 +3059,13 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, optype, msg); } return; - } else { + } else if (lsb < 12) { rc = get_memory_error_data(mci, m->addr, &socket, &ha, - &channel_mask, &rank, &area_type, msg); + &channel_mask, &rank, + &area_type, msg); + } else { + rc = get_memory_error_data_from_mce(mci, m, &socket, &ha, + &channel_mask, msg); } if (rc < 0) @@ -2989,14 +3080,15 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, first_channel = find_first_bit(&channel_mask, NUM_CHANNELS); - if (rank < 4) + if (rank == 0xff) + dimm = -1; + else if (rank < 4) dimm = 0; else if (rank < 8) dimm = 1; else dimm = 2; - /* * FIXME: On some memory configurations (mirror, lockstep), the * Memory Controller can't point the error to a single DIMM. The @@ -3045,17 +3137,11 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, { struct mce *mce = (struct mce *)data; struct mem_ctl_info *mci; - struct sbridge_pvt *pvt; char *type; if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; - mci = get_mci_for_node_id(mce->socketid, IMC0); - if (!mci) - return NOTIFY_DONE; - pvt = mci->pvt_info; - /* * Just let mcelog handle it if the error is * outside the memory controller. A memory error @@ -3065,6 +3151,22 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, if ((mce->status & 0xefff) >> 7 != 1) return NOTIFY_DONE; + /* Check ADDRV bit in STATUS */ + if (!GET_BITFIELD(mce->status, 58, 58)) + return NOTIFY_DONE; + + /* Check MISCV bit in STATUS */ + if (!GET_BITFIELD(mce->status, 59, 59)) + return NOTIFY_DONE; + + /* Check address type in MISC (physical address only) */ + if (GET_BITFIELD(mce->misc, 6, 8) != 2) + return NOTIFY_DONE; + + mci = get_mci_for_node_id(mce->socketid, IMC0); + if (!mci) + return NOTIFY_DONE; + if (mce->mcgstatus & MCG_STATUS_MCIP) type = "Exception"; else @@ -3173,6 +3275,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = ibridge_dram_rule; pvt->info.get_memory_type = get_memory_type; pvt->info.get_node_id = get_node_id; + pvt->info.get_ha = ibridge_get_ha; pvt->info.rir_limit = rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3197,6 +3300,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = sbridge_dram_rule; pvt->info.get_memory_type = get_memory_type; pvt->info.get_node_id = get_node_id; + pvt->info.get_ha = sbridge_get_ha; pvt->info.rir_limit = rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3221,6 +3325,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = ibridge_dram_rule; pvt->info.get_memory_type = haswell_get_memory_type; pvt->info.get_node_id = haswell_get_node_id; + pvt->info.get_ha = ibridge_get_ha; pvt->info.rir_limit = haswell_rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3245,6 +3350,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = ibridge_dram_rule; pvt->info.get_memory_type = haswell_get_memory_type; pvt->info.get_node_id = haswell_get_node_id; + pvt->info.get_ha = ibridge_get_ha; pvt->info.rir_limit = haswell_rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3269,6 +3375,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = knl_dram_rule; pvt->info.get_memory_type = knl_get_memory_type; pvt->info.get_node_id = knl_get_node_id; + pvt->info.get_ha = knl_get_ha; pvt->info.rir_limit = NULL; pvt->info.sad_limit = knl_sad_limit; pvt->info.interleave_mode = knl_interleave_mode; @@ -3320,17 +3427,14 @@ fail0: return rc; } -#define ICPU(model, table) \ - { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table } - static const struct x86_cpu_id sbridge_cpuids[] = { - ICPU(INTEL_FAM6_SANDYBRIDGE_X, pci_dev_descr_sbridge_table), - ICPU(INTEL_FAM6_IVYBRIDGE_X, pci_dev_descr_ibridge_table), - ICPU(INTEL_FAM6_HASWELL_X, pci_dev_descr_haswell_table), - ICPU(INTEL_FAM6_BROADWELL_X, pci_dev_descr_broadwell_table), - ICPU(INTEL_FAM6_BROADWELL_XEON_D, pci_dev_descr_broadwell_table), - ICPU(INTEL_FAM6_XEON_PHI_KNL, pci_dev_descr_knl_table), - ICPU(INTEL_FAM6_XEON_PHI_KNM, pci_dev_descr_knl_table), + INTEL_CPU_FAM6(SANDYBRIDGE_X, pci_dev_descr_sbridge_table), + INTEL_CPU_FAM6(IVYBRIDGE_X, pci_dev_descr_ibridge_table), + INTEL_CPU_FAM6(HASWELL_X, pci_dev_descr_haswell_table), + INTEL_CPU_FAM6(BROADWELL_X, pci_dev_descr_broadwell_table), + INTEL_CPU_FAM6(BROADWELL_XEON_D, pci_dev_descr_broadwell_table), + INTEL_CPU_FAM6(XEON_PHI_KNL, pci_dev_descr_knl_table), + INTEL_CPU_FAM6(XEON_PHI_KNM, pci_dev_descr_knl_table), { } }; MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids); diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index fae095162c01..dd209e0dd9ab 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -364,7 +364,7 @@ static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm, size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3); npages = MiB_TO_PAGES(size); - edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", + edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", imc->mc, chan, dimmno, size, npages, banks, 1 << ranks, rows, cols); @@ -424,7 +424,7 @@ unknown_size: dimm->mtype = MEM_NVDIMM; dimm->edac_mode = EDAC_SECDED; /* likely better than this */ - edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu Mb (%u pages)\n", + edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n", imc->mc, chan, dimmno, size >> 20, dimm->nr_pages); snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", @@ -668,7 +668,7 @@ sad_found: break; case 2: lchan = (addr >> shift) % 2; - lchan = (lchan << 1) | ~lchan; + lchan = (lchan << 1) | !lchan; break; case 3: lchan = ((addr >> shift) % 2) << 1; @@ -959,6 +959,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, recoverable = GET_BITFIELD(m->status, 56, 56); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) { type = "FATAL"; tp_event = HW_EVENT_ERR_FATAL; diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c index c009d94f40c5..34be60fe6892 100644 --- a/drivers/edac/thunderx_edac.c +++ b/drivers/edac/thunderx_edac.c @@ -1884,7 +1884,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) default: dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n", l2c->pdev->device); - return IRQ_NONE; + goto err_free; } while (CIRC_CNT(l2c->ring_head, l2c->ring_tail, @@ -1906,7 +1906,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) l2c->ring_tail++; } - return IRQ_HANDLED; + ret = IRQ_HANDLED; err_free: kfree(other); |