summaryrefslogtreecommitdiff
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/altera_edac.c262
-rw-r--r--drivers/edac/altera_edac.h69
-rw-r--r--drivers/edac/amd64_edac.c135
-rw-r--r--drivers/edac/amd64_edac.h11
-rw-r--r--drivers/edac/i10nm_base.c52
-rw-r--r--drivers/edac/skx_base.c50
-rw-r--r--drivers/edac/skx_common.c57
-rw-r--r--drivers/edac/skx_common.h8
8 files changed, 373 insertions, 271 deletions
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 1bcf9aea0cdf..8816f74a22b4 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -9,6 +9,7 @@
#include <linux/ctype.h>
#include <linux/delay.h>
#include <linux/edac.h>
+#include <linux/firmware/intel/stratix10-smc.h>
#include <linux/genalloc.h>
#include <linux/interrupt.h>
#include <linux/irqchip/chained_irq.h>
@@ -1361,8 +1362,19 @@ static const struct edac_device_prv_data a10_l2ecc_data = {
#ifdef CONFIG_EDAC_ALTERA_ETHERNET
+static int __init socfpga_init_ethernet_ecc(struct altr_edac_device_dev *dev)
+{
+ int ret;
+
+ ret = altr_init_a10_ecc_device_type("altr,socfpga-eth-mac-ecc");
+ if (ret)
+ return ret;
+
+ return altr_check_ecc_deps(dev);
+}
+
static const struct edac_device_prv_data a10_enetecc_data = {
- .setup = altr_check_ecc_deps,
+ .setup = socfpga_init_ethernet_ecc,
.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
@@ -1374,21 +1386,25 @@ static const struct edac_device_prv_data a10_enetecc_data = {
.inject_fops = &altr_edac_a10_device_inject2_fops,
};
-static int __init socfpga_init_ethernet_ecc(void)
-{
- return altr_init_a10_ecc_device_type("altr,socfpga-eth-mac-ecc");
-}
-
-early_initcall(socfpga_init_ethernet_ecc);
-
#endif /* CONFIG_EDAC_ALTERA_ETHERNET */
/********************** NAND Device Functions **********************/
#ifdef CONFIG_EDAC_ALTERA_NAND
+static int __init socfpga_init_nand_ecc(struct altr_edac_device_dev *device)
+{
+ int ret;
+
+ ret = altr_init_a10_ecc_device_type("altr,socfpga-nand-ecc");
+ if (ret)
+ return ret;
+
+ return altr_check_ecc_deps(device);
+}
+
static const struct edac_device_prv_data a10_nandecc_data = {
- .setup = altr_check_ecc_deps,
+ .setup = socfpga_init_nand_ecc,
.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
@@ -1400,21 +1416,25 @@ static const struct edac_device_prv_data a10_nandecc_data = {
.inject_fops = &altr_edac_a10_device_inject_fops,
};
-static int __init socfpga_init_nand_ecc(void)
-{
- return altr_init_a10_ecc_device_type("altr,socfpga-nand-ecc");
-}
-
-early_initcall(socfpga_init_nand_ecc);
-
#endif /* CONFIG_EDAC_ALTERA_NAND */
/********************** DMA Device Functions **********************/
#ifdef CONFIG_EDAC_ALTERA_DMA
+static int __init socfpga_init_dma_ecc(struct altr_edac_device_dev *device)
+{
+ int ret;
+
+ ret = altr_init_a10_ecc_device_type("altr,socfpga-dma-ecc");
+ if (ret)
+ return ret;
+
+ return altr_check_ecc_deps(device);
+}
+
static const struct edac_device_prv_data a10_dmaecc_data = {
- .setup = altr_check_ecc_deps,
+ .setup = socfpga_init_dma_ecc,
.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
@@ -1426,21 +1446,25 @@ static const struct edac_device_prv_data a10_dmaecc_data = {
.inject_fops = &altr_edac_a10_device_inject_fops,
};
-static int __init socfpga_init_dma_ecc(void)
-{
- return altr_init_a10_ecc_device_type("altr,socfpga-dma-ecc");
-}
-
-early_initcall(socfpga_init_dma_ecc);
-
#endif /* CONFIG_EDAC_ALTERA_DMA */
/********************** USB Device Functions **********************/
#ifdef CONFIG_EDAC_ALTERA_USB
+static int __init socfpga_init_usb_ecc(struct altr_edac_device_dev *device)
+{
+ int ret;
+
+ ret = altr_init_a10_ecc_device_type("altr,socfpga-usb-ecc");
+ if (ret)
+ return ret;
+
+ return altr_check_ecc_deps(device);
+}
+
static const struct edac_device_prv_data a10_usbecc_data = {
- .setup = altr_check_ecc_deps,
+ .setup = socfpga_init_usb_ecc,
.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
@@ -1452,21 +1476,25 @@ static const struct edac_device_prv_data a10_usbecc_data = {
.inject_fops = &altr_edac_a10_device_inject2_fops,
};
-static int __init socfpga_init_usb_ecc(void)
-{
- return altr_init_a10_ecc_device_type("altr,socfpga-usb-ecc");
-}
-
-early_initcall(socfpga_init_usb_ecc);
-
#endif /* CONFIG_EDAC_ALTERA_USB */
/********************** QSPI Device Functions **********************/
#ifdef CONFIG_EDAC_ALTERA_QSPI
+static int __init socfpga_init_qspi_ecc(struct altr_edac_device_dev *device)
+{
+ int ret;
+
+ ret = altr_init_a10_ecc_device_type("altr,socfpga-qspi-ecc");
+ if (ret)
+ return ret;
+
+ return altr_check_ecc_deps(device);
+}
+
static const struct edac_device_prv_data a10_qspiecc_data = {
- .setup = altr_check_ecc_deps,
+ .setup = socfpga_init_qspi_ecc,
.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
@@ -1478,13 +1506,6 @@ static const struct edac_device_prv_data a10_qspiecc_data = {
.inject_fops = &altr_edac_a10_device_inject_fops,
};
-static int __init socfpga_init_qspi_ecc(void)
-{
- return altr_init_a10_ecc_device_type("altr,socfpga-qspi-ecc");
-}
-
-early_initcall(socfpga_init_qspi_ecc);
-
#endif /* CONFIG_EDAC_ALTERA_QSPI */
/********************* SDMMC Device Functions **********************/
@@ -1593,6 +1614,35 @@ err_release_group_1:
return rc;
}
+static int __init socfpga_init_sdmmc_ecc(struct altr_edac_device_dev *device)
+{
+ int rc = -ENODEV;
+ struct device_node *child;
+
+ child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc");
+ if (!child)
+ return -ENODEV;
+
+ if (!of_device_is_available(child))
+ goto exit;
+
+ if (validate_parent_available(child))
+ goto exit;
+
+ /* Init portB */
+ rc = altr_init_a10_ecc_block(child, ALTR_A10_SDMMC_IRQ_MASK,
+ a10_sdmmceccb_data.ecc_enable_mask, 1);
+ if (rc)
+ goto exit;
+
+ /* Setup portB */
+ return altr_portb_setup(device);
+
+exit:
+ of_node_put(child);
+ return rc;
+}
+
static irqreturn_t altr_edac_a10_ecc_irq_portb(int irq, void *dev_id)
{
struct altr_edac_device_dev *ad = dev_id;
@@ -1617,7 +1667,7 @@ static irqreturn_t altr_edac_a10_ecc_irq_portb(int irq, void *dev_id)
}
static const struct edac_device_prv_data a10_sdmmcecca_data = {
- .setup = altr_portb_setup,
+ .setup = socfpga_init_sdmmc_ecc,
.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
@@ -1630,7 +1680,7 @@ static const struct edac_device_prv_data a10_sdmmcecca_data = {
};
static const struct edac_device_prv_data a10_sdmmceccb_data = {
- .setup = altr_portb_setup,
+ .setup = socfpga_init_sdmmc_ecc,
.ce_clear_mask = ALTR_A10_ECC_SERRPENB,
.ue_clear_mask = ALTR_A10_ECC_DERRPENB,
.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
@@ -1642,35 +1692,6 @@ static const struct edac_device_prv_data a10_sdmmceccb_data = {
.inject_fops = &altr_edac_a10_device_inject_fops,
};
-static int __init socfpga_init_sdmmc_ecc(void)
-{
- int rc = -ENODEV;
- struct device_node *child;
-
- if (!socfpga_is_a10() && !socfpga_is_s10())
- return -ENODEV;
-
- child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc");
- if (!child) {
- edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n");
- return -ENODEV;
- }
-
- if (!of_device_is_available(child))
- goto exit;
-
- if (validate_parent_available(child))
- goto exit;
-
- rc = altr_init_a10_ecc_block(child, ALTR_A10_SDMMC_IRQ_MASK,
- a10_sdmmcecca_data.ecc_enable_mask, 1);
-exit:
- of_node_put(child);
- return rc;
-}
-
-early_initcall(socfpga_init_sdmmc_ecc);
-
#endif /* CONFIG_EDAC_ALTERA_SDMMC */
/********************* Arria10 EDAC Device Functions *************************/
@@ -1762,28 +1783,24 @@ static ssize_t altr_edac_a10_device_trig2(struct file *file,
if (trig_type == ALTR_UE_TRIGGER_CHAR) {
writel(priv->ue_set_mask, set_addr);
} else {
- /* Setup write of 0 to first 4 bytes */
- writel(0x0, drvdata->base + ECC_BLK_WDATA0_OFST);
- writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST);
- writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST);
- writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST);
- /* Setup write of 4 bytes */
+ /* Setup read/write of 4 bytes */
writel(ECC_WORD_WRITE, drvdata->base + ECC_BLK_DBYTECTRL_OFST);
/* Setup Address to 0 */
- writel(0x0, drvdata->base + ECC_BLK_ADDRESS_OFST);
- /* Setup accctrl to write & data override */
- writel(ECC_WRITE_DOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST);
- /* Kick it. */
- writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST);
- /* Setup accctrl to read & ecc override */
- writel(ECC_READ_EOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST);
+ writel(0, drvdata->base + ECC_BLK_ADDRESS_OFST);
+ /* Setup accctrl to read & ecc & data override */
+ writel(ECC_READ_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST);
/* Kick it. */
writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST);
/* Setup write for single bit change */
- writel(0x1, drvdata->base + ECC_BLK_WDATA0_OFST);
- writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST);
- writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST);
- writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST);
+ writel(readl(drvdata->base + ECC_BLK_RDATA0_OFST) ^ 0x1,
+ drvdata->base + ECC_BLK_WDATA0_OFST);
+ writel(readl(drvdata->base + ECC_BLK_RDATA1_OFST),
+ drvdata->base + ECC_BLK_WDATA1_OFST);
+ writel(readl(drvdata->base + ECC_BLK_RDATA2_OFST),
+ drvdata->base + ECC_BLK_WDATA2_OFST);
+ writel(readl(drvdata->base + ECC_BLK_RDATA3_OFST),
+ drvdata->base + ECC_BLK_WDATA3_OFST);
+
/* Copy Read ECC to Write ECC */
writel(readl(drvdata->base + ECC_BLK_RECC0_OFST),
drvdata->base + ECC_BLK_WECC0_OFST);
@@ -1930,6 +1947,15 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
goto err_release_group1;
}
+#ifdef CONFIG_ARCH_STRATIX10
+ /* Use IRQ to determine SError origin instead of assigning IRQ */
+ rc = of_property_read_u32_index(np, "interrupts", 0, &altdev->db_irq);
+ if (rc) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Unable to parse DB IRQ index\n");
+ goto err_release_group1;
+ }
+#else
altdev->db_irq = irq_of_parse_and_map(np, 1);
if (!altdev->db_irq) {
edac_printk(KERN_ERR, EDAC_DEVICE, "Error allocating DBIRQ\n");
@@ -1943,6 +1969,7 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n");
goto err_release_group1;
}
+#endif
rc = edac_device_add_device(dci);
if (rc) {
@@ -2005,6 +2032,10 @@ static const struct irq_domain_ops a10_eccmgr_ic_ops = {
/************** Stratix 10 EDAC Double Bit Error Handler ************/
#define to_a10edac(p, m) container_of(p, struct altr_arria10_edac, m)
+#ifdef CONFIG_ARCH_STRATIX10
+/* panic routine issues reboot on non-zero panic_timeout */
+extern int panic_timeout;
+
/*
* The double bit error is handled through SError which is fatal. This is
* called as a panic notifier to printout ECC error info as part of the panic.
@@ -2018,17 +2049,37 @@ static int s10_edac_dberr_handler(struct notifier_block *this,
regmap_read(edac->ecc_mgr_map, S10_SYSMGR_ECC_INTSTAT_DERR_OFST,
&dberror);
regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, dberror);
- if (dberror & S10_DDR0_IRQ_MASK) {
- regmap_read(edac->ecc_mgr_map, A10_DERRADDR_OFST, &err_addr);
- regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST,
- err_addr);
- edac_printk(KERN_ERR, EDAC_MC,
- "EDAC: [Uncorrectable errors @ 0x%08X]\n\n",
- err_addr);
+ if (dberror & S10_DBE_IRQ_MASK) {
+ struct list_head *position;
+ struct altr_edac_device_dev *ed;
+ struct arm_smccc_res result;
+
+ /* Find the matching DBE in the list of devices */
+ list_for_each(position, &edac->a10_ecc_devices) {
+ ed = list_entry(position, struct altr_edac_device_dev,
+ next);
+ if (!(BIT(ed->db_irq) & dberror))
+ continue;
+
+ writel(ALTR_A10_ECC_DERRPENA,
+ ed->base + ALTR_A10_ECC_INTSTAT_OFST);
+ err_addr = readl(ed->base + ALTR_S10_DERR_ADDRA_OFST);
+ regmap_write(edac->ecc_mgr_map,
+ S10_SYSMGR_UE_ADDR_OFST, err_addr);
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "EDAC: [Fatal DBE on %s @ 0x%08X]\n",
+ ed->edac_dev_name, err_addr);
+ break;
+ }
+ /* Notify the System through SMC. Reboot delay = 1 second */
+ panic_timeout = 1;
+ arm_smccc_smc(INTEL_SIP_SMC_ECC_DBE, dberror, 0, 0, 0, 0,
+ 0, 0, &result);
}
return NOTIFY_DONE;
}
+#endif
/****************** Arria 10 EDAC Probe Function *********************/
static int altr_edac_a10_probe(struct platform_device *pdev)
@@ -2098,16 +2149,8 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
altr_edac_a10_irq_handler,
edac);
- if (socfpga_is_a10()) {
- edac->db_irq = platform_get_irq(pdev, 1);
- if (edac->db_irq < 0) {
- dev_err(&pdev->dev, "No DBERR IRQ resource\n");
- return edac->db_irq;
- }
- irq_set_chained_handler_and_data(edac->db_irq,
- altr_edac_a10_irq_handler,
- edac);
- } else {
+#ifdef CONFIG_ARCH_STRATIX10
+ {
int dberror, err_addr;
edac->panic_notifier.notifier_call = s10_edac_dberr_handler;
@@ -2130,6 +2173,15 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
S10_SYSMGR_UE_ADDR_OFST, 0);
}
}
+#else
+ edac->db_irq = platform_get_irq(pdev, 1);
+ if (edac->db_irq < 0) {
+ dev_err(&pdev->dev, "No DBERR IRQ resource\n");
+ return edac->db_irq;
+ }
+ irq_set_chained_handler_and_data(edac->db_irq,
+ altr_edac_a10_irq_handler, edac);
+#endif
for_each_child_of_node(pdev->dev.of_node, child) {
if (!of_device_is_available(child))
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index f8664bac9fa8..55654cc4bcdf 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -289,6 +289,7 @@ struct altr_sdram_mc_data {
#define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000
/************* Stratix10 Defines **************/
+#define ALTR_S10_DERR_ADDRA_OFST 0x2C
/* Stratix10 ECC Manager Defines */
#define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98
@@ -299,6 +300,7 @@ struct altr_sdram_mc_data {
#define S10_SYSMGR_UE_ADDR_OFST 0x224
#define S10_DDR0_IRQ_MASK BIT(16)
+#define S10_DBE_IRQ_MASK 0x3FE
/* Define ECC Block Offsets for peripherals */
#define ECC_BLK_ADDRESS_OFST 0x40
@@ -319,7 +321,7 @@ struct altr_sdram_mc_data {
#define ECC_BLK_STARTACC_OFST 0x7C
#define ECC_XACT_KICK 0x10000
-#define ECC_WORD_WRITE 0xF
+#define ECC_WORD_WRITE 0xFF
#define ECC_WRITE_DOVR 0x101
#define ECC_WRITE_EDOVR 0x103
#define ECC_READ_EOVR 0x2
@@ -370,69 +372,4 @@ struct altr_arria10_edac {
struct notifier_block panic_notifier;
};
-/*
- * Functions specified by ARM SMC Calling convention:
- *
- * FAST call executes atomic operations, returns when the requested operation
- * has completed.
- * STD call starts a operation which can be preempted by a non-secure
- * interrupt. The call can return before the requested operation has
- * completed.
- *
- * a0..a7 is used as register names in the descriptions below, on arm32
- * that translates to r0..r7 and on arm64 to w0..w7.
- */
-
-#define INTEL_SIP_SMC_STD_CALL_VAL(func_num) \
- ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL, ARM_SMCCC_SMC_64, \
- ARM_SMCCC_OWNER_SIP, (func_num))
-
-#define INTEL_SIP_SMC_FAST_CALL_VAL(func_num) \
- ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64, \
- ARM_SMCCC_OWNER_SIP, (func_num))
-
-#define INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION 0xFFFFFFFF
-#define INTEL_SIP_SMC_STATUS_OK 0x0
-#define INTEL_SIP_SMC_REG_ERROR 0x5
-
-/*
- * Request INTEL_SIP_SMC_REG_READ
- *
- * Read a protected register using SMCCC
- *
- * Call register usage:
- * a0: INTEL_SIP_SMC_REG_READ.
- * a1: register address.
- * a2-7: not used.
- *
- * Return status:
- * a0: INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_REG_ERROR, or
- * INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION
- * a1: Value in the register
- * a2-3: not used.
- */
-#define INTEL_SIP_SMC_FUNCID_REG_READ 7
-#define INTEL_SIP_SMC_REG_READ \
- INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_READ)
-
-/*
- * Request INTEL_SIP_SMC_REG_WRITE
- *
- * Write a protected register using SMCCC
- *
- * Call register usage:
- * a0: INTEL_SIP_SMC_REG_WRITE.
- * a1: register address
- * a2: value to program into register.
- * a3-7: not used.
- *
- * Return status:
- * a0: INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_REG_ERROR, or
- * INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION
- * a1-3: not used.
- */
-#define INTEL_SIP_SMC_FUNCID_REG_WRITE 8
-#define INTEL_SIP_SMC_REG_WRITE \
- INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE)
-
#endif /* #ifndef _ALTERA_EDAC_H */
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 6ea98575a402..e2a99466faaa 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -18,6 +18,9 @@ static struct msr __percpu *msrs;
/* Per-node stuff */
static struct ecc_settings **ecc_stngs;
+/* Number of Unified Memory Controllers */
+static u8 num_umcs;
+
/*
* Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
* bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
@@ -449,6 +452,9 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
#define for_each_chip_select_mask(i, dct, pvt) \
for (i = 0; i < pvt->csels[dct].m_cnt; i++)
+#define for_each_umc(i) \
+ for (i = 0; i < num_umcs; i++)
+
/*
* @input_addr is an InputAddr associated with the node given by mci. Return the
* csrow that input_addr maps to, or -1 on failure (no csrow claims input_addr).
@@ -722,7 +728,7 @@ static unsigned long determine_edac_cap(struct amd64_pvt *pvt)
if (pvt->umc) {
u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0;
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT))
continue;
@@ -781,6 +787,22 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
(dclr & BIT(15)) ? "yes" : "no");
}
+/*
+ * The Address Mask should be a contiguous set of bits in the non-interleaved
+ * case. So to check for CS interleaving, find the most- and least-significant
+ * bits of the mask, generate a contiguous bitmask, and compare the two.
+ */
+static bool f17_cs_interleaved(struct amd64_pvt *pvt, u8 ctrl, int cs)
+{
+ u32 mask = pvt->csels[ctrl].csmasks[cs >> 1];
+ u32 msb = fls(mask) - 1, lsb = ffs(mask) - 1;
+ u32 test_mask = GENMASK(msb, lsb);
+
+ edac_dbg(1, "mask=0x%08x test_mask=0x%08x\n", mask, test_mask);
+
+ return mask ^ test_mask;
+}
+
static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
{
int dimm, size0, size1, cs0, cs1;
@@ -797,8 +819,19 @@ static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
size1 = 0;
cs1 = dimm * 2 + 1;
- if (csrow_enabled(cs1, ctrl, pvt))
- size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1);
+ if (csrow_enabled(cs1, ctrl, pvt)) {
+ /*
+ * CS interleaving is only supported if both CSes have
+ * the same amount of memory. Because they are
+ * interleaved, it will look like both CSes have the
+ * full amount of memory. Save the size for both as
+ * half the amount we found on CS0, if interleaved.
+ */
+ if (f17_cs_interleaved(pvt, ctrl, cs1))
+ size1 = size0 = (size0 >> 1);
+ else
+ size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1);
+ }
amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
cs0, size0,
@@ -811,7 +844,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
struct amd64_umc *umc;
u32 i, tmp, umc_base;
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
umc_base = get_umc_base(i);
umc = &pvt->umc[i];
@@ -894,8 +927,7 @@ static void dump_misc_regs(struct amd64_pvt *pvt)
edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
- amd64_info("using %s syndromes.\n",
- ((pvt->ecc_sym_sz == 8) ? "x8" : "x4"));
+ amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz);
}
/*
@@ -1388,7 +1420,7 @@ static int f17_early_channel_count(struct amd64_pvt *pvt)
int i, channels = 0;
/* SDP Control bit 31 (SdpInit) is clear for unused UMC channels */
- for (i = 0; i < NUM_UMCS; i++)
+ for_each_umc(i)
channels += !!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT);
amd64_info("MCT channel count: %d\n", channels);
@@ -2211,6 +2243,15 @@ static struct amd64_family_type family_types[] = {
.dbam_to_cs = f17_base_addr_to_cs_size,
}
},
+ [F17_M30H_CPUS] = {
+ .ctl_name = "F17h_M30h",
+ .f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0,
+ .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6,
+ .ops = {
+ .early_channel_count = f17_early_channel_count,
+ .dbam_to_cs = f17_base_addr_to_cs_size,
+ }
+ },
};
/*
@@ -2464,18 +2505,14 @@ static inline void decode_bus_error(int node_id, struct mce *m)
* To find the UMC channel represented by this bank we need to match on its
* instance_id. The instance_id of a bank is held in the lower 32 bits of its
* IPID.
+ *
+ * Currently, we can derive the channel number by looking at the 6th nibble in
+ * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel
+ * number.
*/
-static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
+static int find_umc_channel(struct mce *m)
{
- u32 umc_instance_id[] = {0x50f00, 0x150f00};
- u32 instance_id = m->ipid & GENMASK(31, 0);
- int i, channel = -1;
-
- for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
- if (umc_instance_id[i] == instance_id)
- channel = i;
-
- return channel;
+ return (m->ipid & GENMASK(31, 0)) >> 20;
}
static void decode_umc_error(int node_id, struct mce *m)
@@ -2497,11 +2534,7 @@ static void decode_umc_error(int node_id, struct mce *m)
if (m->status & MCI_STATUS_DEFERRED)
ecc_type = 3;
- err.channel = find_umc_channel(pvt, m);
- if (err.channel < 0) {
- err.err_code = ERR_CHANNEL;
- goto log_error;
- }
+ err.channel = find_umc_channel(m);
if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
err.err_code = ERR_NORM_ADDR;
@@ -2603,19 +2636,19 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt)
if (pvt->umc) {
u8 i;
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
/* Check enabled channels only: */
- if ((pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) &&
- (pvt->umc[i].ecc_ctrl & BIT(7))) {
- pvt->ecc_sym_sz = 8;
- break;
+ if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
+ if (pvt->umc[i].ecc_ctrl & BIT(9)) {
+ pvt->ecc_sym_sz = 16;
+ return;
+ } else if (pvt->umc[i].ecc_ctrl & BIT(7)) {
+ pvt->ecc_sym_sz = 8;
+ return;
+ }
}
}
-
- return;
- }
-
- if (pvt->fam >= 0x10) {
+ } else if (pvt->fam >= 0x10) {
u32 tmp;
amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
@@ -2639,7 +2672,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
u32 i, umc_base;
/* Read registers from each UMC */
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
umc_base = get_umc_base(i);
umc = &pvt->umc[i];
@@ -3052,7 +3085,7 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid)
if (boot_cpu_data.x86 >= 0x17) {
u8 umc_en_mask = 0, ecc_en_mask = 0;
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
u32 base = get_umc_base(i);
/* Only check enabled UMCs. */
@@ -3105,7 +3138,7 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
{
u8 i, ecc_en = 1, cpk_en = 1;
- for (i = 0; i < NUM_UMCS; i++) {
+ for_each_umc(i) {
if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED);
cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP);
@@ -3203,6 +3236,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
fam_type = &family_types[F17_M10H_CPUS];
pvt->ops = &family_types[F17_M10H_CPUS].ops;
break;
+ } else if (pvt->model >= 0x30 && pvt->model <= 0x3f) {
+ fam_type = &family_types[F17_M30H_CPUS];
+ pvt->ops = &family_types[F17_M30H_CPUS].ops;
+ break;
}
/* fall through */
case 0x18:
@@ -3236,6 +3273,22 @@ static const struct attribute_group *amd64_edac_attr_groups[] = {
NULL
};
+/* Set the number of Unified Memory Controllers in the system. */
+static void compute_num_umcs(void)
+{
+ u8 model = boot_cpu_data.x86_model;
+
+ if (boot_cpu_data.x86 < 0x17)
+ return;
+
+ if (model >= 0x30 && model <= 0x3f)
+ num_umcs = 8;
+ else
+ num_umcs = 2;
+
+ edac_dbg(1, "Number of UMCs: %x", num_umcs);
+}
+
static int init_one_instance(unsigned int nid)
{
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
@@ -3260,7 +3313,7 @@ static int init_one_instance(unsigned int nid)
goto err_free;
if (pvt->fam >= 0x17) {
- pvt->umc = kcalloc(NUM_UMCS, sizeof(struct amd64_umc), GFP_KERNEL);
+ pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL);
if (!pvt->umc) {
ret = -ENOMEM;
goto err_free;
@@ -3299,8 +3352,14 @@ static int init_one_instance(unsigned int nid)
* Always allocate two channels since we can have setups with DIMMs on
* only one channel. Also, this simplifies handling later for the price
* of a couple of KBs tops.
+ *
+ * On Fam17h+, the number of controllers may be greater than two. So set
+ * the size equal to the maximum number of UMCs.
*/
- layers[1].size = 2;
+ if (pvt->fam >= 0x17)
+ layers[1].size = num_umcs;
+ else
+ layers[1].size = 2;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
@@ -3481,6 +3540,8 @@ static int __init amd64_edac_init(void)
if (!msrs)
goto err_free;
+ compute_num_umcs();
+
for (i = 0; i < amd_nb_num(); i++) {
err = probe_one_instance(i);
if (err) {
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 4242f8e39c18..8f66472f7adc 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -117,6 +117,8 @@
#define PCI_DEVICE_ID_AMD_17H_DF_F6 0x1466
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee
+#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F0 0x1490
+#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496
/*
* Function 1 - Address Map
@@ -272,8 +274,6 @@
#define UMC_SDP_INIT BIT(31)
-#define NUM_UMCS 2
-
enum amd_families {
K8_CPUS = 0,
F10_CPUS,
@@ -284,6 +284,7 @@ enum amd_families {
F16_M30H_CPUS,
F17_CPUS,
F17_M10H_CPUS,
+ F17_M30H_CPUS,
NUM_FAMILIES,
};
@@ -363,7 +364,7 @@ struct amd64_pvt {
u32 dct_sel_hi; /* DRAM Controller Select High */
u32 online_spare; /* On-Line spare Reg */
- /* x4 or x8 syndromes in use */
+ /* x4, x8, or x16 syndromes in use */
u8 ecc_sym_sz;
/* place to store error injection parameters prior to issue */
@@ -396,8 +397,8 @@ struct err_info {
static inline u32 get_umc_base(u8 channel)
{
- /* ch0: 0x50000, ch1: 0x150000 */
- return 0x50000 + (!!channel << 20);
+ /* chY: 0xY50000 */
+ return 0x50000 + (channel << 20);
}
static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i)
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index c334fb7c63df..6f06aec4877c 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -181,6 +181,54 @@ static struct notifier_block i10nm_mce_dec = {
.priority = MCE_PRIO_EDAC,
};
+#ifdef CONFIG_EDAC_DEBUG
+/*
+ * Debug feature.
+ * Exercise the address decode logic by writing an address to
+ * /sys/kernel/debug/edac/i10nm_test/addr.
+ */
+static struct dentry *i10nm_test;
+
+static int debugfs_u64_set(void *data, u64 val)
+{
+ struct mce m;
+
+ pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
+
+ memset(&m, 0, sizeof(m));
+ /* ADDRV + MemRd + Unknown channel */
+ m.status = MCI_STATUS_ADDRV + 0x90;
+ /* One corrected error */
+ m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
+ m.addr = val;
+ skx_mce_check_error(NULL, 0, &m);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
+
+static void setup_i10nm_debug(void)
+{
+ i10nm_test = edac_debugfs_create_dir("i10nm_test");
+ if (!i10nm_test)
+ return;
+
+ if (!edac_debugfs_create_file("addr", 0200, i10nm_test,
+ NULL, &fops_u64_wo)) {
+ debugfs_remove(i10nm_test);
+ i10nm_test = NULL;
+ }
+}
+
+static void teardown_i10nm_debug(void)
+{
+ debugfs_remove_recursive(i10nm_test);
+}
+#else
+static inline void setup_i10nm_debug(void) {}
+static inline void teardown_i10nm_debug(void) {}
+#endif /*CONFIG_EDAC_DEBUG*/
+
static int __init i10nm_init(void)
{
u8 mc = 0, src_id = 0, node_id = 0;
@@ -249,7 +297,7 @@ static int __init i10nm_init(void)
opstate_init();
mce_register_decode_chain(&i10nm_mce_dec);
- setup_skx_debug("i10nm_test");
+ setup_i10nm_debug();
i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
@@ -262,7 +310,7 @@ fail:
static void __exit i10nm_exit(void)
{
edac_dbg(2, "\n");
- teardown_skx_debug();
+ teardown_i10nm_debug();
mce_unregister_decode_chain(&i10nm_mce_dec);
skx_adxl_put();
skx_remove();
diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
index adae4c848ca1..a5c8fa3a249a 100644
--- a/drivers/edac/skx_base.c
+++ b/drivers/edac/skx_base.c
@@ -540,6 +540,54 @@ static struct notifier_block skx_mce_dec = {
.priority = MCE_PRIO_EDAC,
};
+#ifdef CONFIG_EDAC_DEBUG
+/*
+ * Debug feature.
+ * Exercise the address decode logic by writing an address to
+ * /sys/kernel/debug/edac/skx_test/addr.
+ */
+static struct dentry *skx_test;
+
+static int debugfs_u64_set(void *data, u64 val)
+{
+ struct mce m;
+
+ pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
+
+ memset(&m, 0, sizeof(m));
+ /* ADDRV + MemRd + Unknown channel */
+ m.status = MCI_STATUS_ADDRV + 0x90;
+ /* One corrected error */
+ m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
+ m.addr = val;
+ skx_mce_check_error(NULL, 0, &m);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
+
+static void setup_skx_debug(void)
+{
+ skx_test = edac_debugfs_create_dir("skx_test");
+ if (!skx_test)
+ return;
+
+ if (!edac_debugfs_create_file("addr", 0200, skx_test,
+ NULL, &fops_u64_wo)) {
+ debugfs_remove(skx_test);
+ skx_test = NULL;
+ }
+}
+
+static void teardown_skx_debug(void)
+{
+ debugfs_remove_recursive(skx_test);
+}
+#else
+static inline void setup_skx_debug(void) {}
+static inline void teardown_skx_debug(void) {}
+#endif /*CONFIG_EDAC_DEBUG*/
+
/*
* skx_init:
* make sure we are running on the correct cpu model
@@ -619,7 +667,7 @@ static int __init skx_init(void)
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
- setup_skx_debug("skx_test");
+ setup_skx_debug();
mce_register_decode_chain(&skx_mce_dec);
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 0e96e7b5b0a7..b0dddcfa9baa 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -1,7 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
- * Originally split out from the skx_edac driver.
+ *
+ * Shared code by both skx_edac and i10nm_edac. Originally split out
+ * from the skx_edac driver.
+ *
+ * This file is linked into both skx_edac and i10nm_edac drivers. In
+ * order to avoid link errors, this file must be like a pure library
+ * without including symbols and defines which would otherwise conflict,
+ * when linked once into a module and into a built-in object, at the
+ * same time. For example, __this_module symbol references when that
+ * file is being linked into a built-in object.
*
* Copyright (c) 2018, Intel Corporation.
*/
@@ -644,48 +652,3 @@ void skx_remove(void)
kfree(d);
}
}
-
-#ifdef CONFIG_EDAC_DEBUG
-/*
- * Debug feature.
- * Exercise the address decode logic by writing an address to
- * /sys/kernel/debug/edac/dirname/addr.
- */
-static struct dentry *skx_test;
-
-static int debugfs_u64_set(void *data, u64 val)
-{
- struct mce m;
-
- pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
-
- memset(&m, 0, sizeof(m));
- /* ADDRV + MemRd + Unknown channel */
- m.status = MCI_STATUS_ADDRV + 0x90;
- /* One corrected error */
- m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
- m.addr = val;
- skx_mce_check_error(NULL, 0, &m);
-
- return 0;
-}
-DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
-
-void setup_skx_debug(const char *dirname)
-{
- skx_test = edac_debugfs_create_dir(dirname);
- if (!skx_test)
- return;
-
- if (!edac_debugfs_create_file("addr", 0200, skx_test,
- NULL, &fops_u64_wo)) {
- debugfs_remove(skx_test);
- skx_test = NULL;
- }
-}
-
-void teardown_skx_debug(void)
-{
- debugfs_remove_recursive(skx_test);
-}
-#endif /*CONFIG_EDAC_DEBUG*/
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index d25374e34d4f..d18fa98669af 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -141,12 +141,4 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
void skx_remove(void);
-#ifdef CONFIG_EDAC_DEBUG
-void setup_skx_debug(const char *dirname);
-void teardown_skx_debug(void);
-#else
-static inline void setup_skx_debug(const char *dirname) {}
-static inline void teardown_skx_debug(void) {}
-#endif /*CONFIG_EDAC_DEBUG*/
-
#endif /* _SKX_COMM_EDAC_H */