summaryrefslogtreecommitdiff
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig7
-rw-r--r--drivers/edac/Makefile1
-rw-r--r--drivers/edac/armada_xp_edac.c26
-rw-r--r--drivers/edac/dmc520_edac.c656
-rw-r--r--drivers/edac/edac_mc.c511
-rw-r--r--drivers/edac/edac_mc.h6
-rw-r--r--drivers/edac/edac_mc_sysfs.c110
-rw-r--r--drivers/edac/edac_module.h1
-rw-r--r--drivers/edac/ghes_edac.c16
-rw-r--r--drivers/edac/mce_amd.c2
10 files changed, 973 insertions, 363 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index b3c99bb5fe77..fe2eb892a1bd 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -523,4 +523,11 @@ config EDAC_BLUEFIELD
Support for error detection and correction on the
Mellanox BlueField SoCs.
+config EDAC_DMC520
+ tristate "ARM DMC-520 ECC"
+ depends on ARM64
+ help
+ Support for error detection and correction on the
+ SoCs with ARM DMC-520 DRAM controller.
+
endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index d77200c9680b..269e15118cea 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -87,3 +87,4 @@ obj-$(CONFIG_EDAC_TI) += ti_edac.o
obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o
obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o
obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o
+obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o
diff --git a/drivers/edac/armada_xp_edac.c b/drivers/edac/armada_xp_edac.c
index 7f227bdcbc84..a7502ebe9bdc 100644
--- a/drivers/edac/armada_xp_edac.c
+++ b/drivers/edac/armada_xp_edac.c
@@ -429,26 +429,26 @@ static void aurora_l2_check(struct edac_device_ctl_info *dci)
src = (attr_cap & AURORA_ERR_ATTR_SRC_MSK) >> AURORA_ERR_ATTR_SRC_OFF;
if (src <= 3)
- len += snprintf(msg+len, size-len, "src=CPU%d ", src);
+ len += scnprintf(msg+len, size-len, "src=CPU%d ", src);
else
- len += snprintf(msg+len, size-len, "src=IO ");
+ len += scnprintf(msg+len, size-len, "src=IO ");
txn = (attr_cap & AURORA_ERR_ATTR_TXN_MSK) >> AURORA_ERR_ATTR_TXN_OFF;
switch (txn) {
case 0:
- len += snprintf(msg+len, size-len, "txn=Data-Read ");
+ len += scnprintf(msg+len, size-len, "txn=Data-Read ");
break;
case 1:
- len += snprintf(msg+len, size-len, "txn=Isn-Read ");
+ len += scnprintf(msg+len, size-len, "txn=Isn-Read ");
break;
case 2:
- len += snprintf(msg+len, size-len, "txn=Clean-Flush ");
+ len += scnprintf(msg+len, size-len, "txn=Clean-Flush ");
break;
case 3:
- len += snprintf(msg+len, size-len, "txn=Eviction ");
+ len += scnprintf(msg+len, size-len, "txn=Eviction ");
break;
case 4:
- len += snprintf(msg+len, size-len,
+ len += scnprintf(msg+len, size-len,
"txn=Read-Modify-Write ");
break;
}
@@ -456,19 +456,19 @@ static void aurora_l2_check(struct edac_device_ctl_info *dci)
err = (attr_cap & AURORA_ERR_ATTR_ERR_MSK) >> AURORA_ERR_ATTR_ERR_OFF;
switch (err) {
case 0:
- len += snprintf(msg+len, size-len, "err=CorrECC ");
+ len += scnprintf(msg+len, size-len, "err=CorrECC ");
break;
case 1:
- len += snprintf(msg+len, size-len, "err=UnCorrECC ");
+ len += scnprintf(msg+len, size-len, "err=UnCorrECC ");
break;
case 2:
- len += snprintf(msg+len, size-len, "err=TagParity ");
+ len += scnprintf(msg+len, size-len, "err=TagParity ");
break;
}
- len += snprintf(msg+len, size-len, "addr=0x%x ", addr_cap & AURORA_ERR_ADDR_CAP_ADDR_MASK);
- len += snprintf(msg+len, size-len, "index=0x%x ", (way_cap & AURORA_ERR_WAY_IDX_MSK) >> AURORA_ERR_WAY_IDX_OFF);
- len += snprintf(msg+len, size-len, "way=0x%x", (way_cap & AURORA_ERR_WAY_CAP_WAY_MASK) >> AURORA_ERR_WAY_CAP_WAY_OFFSET);
+ len += scnprintf(msg+len, size-len, "addr=0x%x ", addr_cap & AURORA_ERR_ADDR_CAP_ADDR_MASK);
+ len += scnprintf(msg+len, size-len, "index=0x%x ", (way_cap & AURORA_ERR_WAY_IDX_MSK) >> AURORA_ERR_WAY_IDX_OFF);
+ len += scnprintf(msg+len, size-len, "way=0x%x", (way_cap & AURORA_ERR_WAY_CAP_WAY_MASK) >> AURORA_ERR_WAY_CAP_WAY_OFFSET);
/* clear error capture registers */
writel(AURORA_ERR_ATTR_CAP_VALID, drvdata->base + AURORA_ERR_ATTR_CAP_REG);
diff --git a/drivers/edac/dmc520_edac.c b/drivers/edac/dmc520_edac.c
new file mode 100644
index 000000000000..fc1153ab1ebb
--- /dev/null
+++ b/drivers/edac/dmc520_edac.c
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * EDAC driver for DMC-520 memory controller.
+ *
+ * The driver supports 10 interrupt lines,
+ * though only dram_ecc_errc and dram_ecc_errd are currently handled.
+ *
+ * Authors: Rui Zhao <ruizhao@microsoft.com>
+ * Lei Wang <lewan@microsoft.com>
+ * Shiping Ji <shji@microsoft.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/edac.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include "edac_mc.h"
+
+/* DMC-520 registers */
+#define REG_OFFSET_FEATURE_CONFIG 0x130
+#define REG_OFFSET_ECC_ERRC_COUNT_31_00 0x158
+#define REG_OFFSET_ECC_ERRC_COUNT_63_32 0x15C
+#define REG_OFFSET_ECC_ERRD_COUNT_31_00 0x160
+#define REG_OFFSET_ECC_ERRD_COUNT_63_32 0x164
+#define REG_OFFSET_INTERRUPT_CONTROL 0x500
+#define REG_OFFSET_INTERRUPT_CLR 0x508
+#define REG_OFFSET_INTERRUPT_STATUS 0x510
+#define REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_31_00 0x528
+#define REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_63_32 0x52C
+#define REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_31_00 0x530
+#define REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_63_32 0x534
+#define REG_OFFSET_ADDRESS_CONTROL_NOW 0x1010
+#define REG_OFFSET_MEMORY_TYPE_NOW 0x1128
+#define REG_OFFSET_SCRUB_CONTROL0_NOW 0x1170
+#define REG_OFFSET_FORMAT_CONTROL 0x18
+
+/* DMC-520 types, masks and bitfields */
+#define RAM_ECC_INT_CE_BIT BIT(0)
+#define RAM_ECC_INT_UE_BIT BIT(1)
+#define DRAM_ECC_INT_CE_BIT BIT(2)
+#define DRAM_ECC_INT_UE_BIT BIT(3)
+#define FAILED_ACCESS_INT_BIT BIT(4)
+#define FAILED_PROG_INT_BIT BIT(5)
+#define LINK_ERR_INT_BIT BIT(6)
+#define TEMPERATURE_EVENT_INT_BIT BIT(7)
+#define ARCH_FSM_INT_BIT BIT(8)
+#define PHY_REQUEST_INT_BIT BIT(9)
+#define MEMORY_WIDTH_MASK GENMASK(1, 0)
+#define SCRUB_TRIGGER0_NEXT_MASK GENMASK(1, 0)
+#define REG_FIELD_DRAM_ECC_ENABLED GENMASK(1, 0)
+#define REG_FIELD_MEMORY_TYPE GENMASK(2, 0)
+#define REG_FIELD_DEVICE_WIDTH GENMASK(9, 8)
+#define REG_FIELD_ADDRESS_CONTROL_COL GENMASK(2, 0)
+#define REG_FIELD_ADDRESS_CONTROL_ROW GENMASK(10, 8)
+#define REG_FIELD_ADDRESS_CONTROL_BANK GENMASK(18, 16)
+#define REG_FIELD_ADDRESS_CONTROL_RANK GENMASK(25, 24)
+#define REG_FIELD_ERR_INFO_LOW_VALID BIT(0)
+#define REG_FIELD_ERR_INFO_LOW_COL GENMASK(10, 1)
+#define REG_FIELD_ERR_INFO_LOW_ROW GENMASK(28, 11)
+#define REG_FIELD_ERR_INFO_LOW_RANK GENMASK(31, 29)
+#define REG_FIELD_ERR_INFO_HIGH_BANK GENMASK(3, 0)
+#define REG_FIELD_ERR_INFO_HIGH_VALID BIT(31)
+
+#define DRAM_ADDRESS_CONTROL_MIN_COL_BITS 8
+#define DRAM_ADDRESS_CONTROL_MIN_ROW_BITS 11
+
+#define DMC520_SCRUB_TRIGGER_ERR_DETECT 2
+#define DMC520_SCRUB_TRIGGER_IDLE 3
+
+/* Driver settings */
+/*
+ * The max-length message would be: "rank:7 bank:15 row:262143 col:1023".
+ * Max length is 34. Using a 40-size buffer is enough.
+ */
+#define DMC520_MSG_BUF_SIZE 40
+#define EDAC_MOD_NAME "dmc520-edac"
+#define EDAC_CTL_NAME "dmc520"
+
+/* the data bus width for the attached memory chips. */
+enum dmc520_mem_width {
+ MEM_WIDTH_X32 = 2,
+ MEM_WIDTH_X64 = 3
+};
+
+/* memory type */
+enum dmc520_mem_type {
+ MEM_TYPE_DDR3 = 1,
+ MEM_TYPE_DDR4 = 2
+};
+
+/* memory device width */
+enum dmc520_dev_width {
+ DEV_WIDTH_X4 = 0,
+ DEV_WIDTH_X8 = 1,
+ DEV_WIDTH_X16 = 2
+};
+
+struct ecc_error_info {
+ u32 col;
+ u32 row;
+ u32 bank;
+ u32 rank;
+};
+
+/* The interrupt config */
+struct dmc520_irq_config {
+ char *name;
+ int mask;
+};
+
+/* The interrupt mappings */
+static struct dmc520_irq_config dmc520_irq_configs[] = {
+ {
+ .name = "ram_ecc_errc",
+ .mask = RAM_ECC_INT_CE_BIT
+ },
+ {
+ .name = "ram_ecc_errd",
+ .mask = RAM_ECC_INT_UE_BIT
+ },
+ {
+ .name = "dram_ecc_errc",
+ .mask = DRAM_ECC_INT_CE_BIT
+ },
+ {
+ .name = "dram_ecc_errd",
+ .mask = DRAM_ECC_INT_UE_BIT
+ },
+ {
+ .name = "failed_access",
+ .mask = FAILED_ACCESS_INT_BIT
+ },
+ {
+ .name = "failed_prog",
+ .mask = FAILED_PROG_INT_BIT
+ },
+ {
+ .name = "link_err",
+ .mask = LINK_ERR_INT_BIT
+ },
+ {
+ .name = "temperature_event",
+ .mask = TEMPERATURE_EVENT_INT_BIT
+ },
+ {
+ .name = "arch_fsm",
+ .mask = ARCH_FSM_INT_BIT
+ },
+ {
+ .name = "phy_request",
+ .mask = PHY_REQUEST_INT_BIT
+ }
+};
+
+#define NUMBER_OF_IRQS ARRAY_SIZE(dmc520_irq_configs)
+
+/*
+ * The EDAC driver private data.
+ * error_lock is to protect concurrent writes to the mci->error_desc through
+ * edac_mc_handle_error().
+ */
+struct dmc520_edac {
+ void __iomem *reg_base;
+ spinlock_t error_lock;
+ u32 mem_width_in_bytes;
+ int irqs[NUMBER_OF_IRQS];
+ int masks[NUMBER_OF_IRQS];
+};
+
+static int dmc520_mc_idx;
+
+static u32 dmc520_read_reg(struct dmc520_edac *pvt, u32 offset)
+{
+ return readl(pvt->reg_base + offset);
+}
+
+static void dmc520_write_reg(struct dmc520_edac *pvt, u32 val, u32 offset)
+{
+ writel(val, pvt->reg_base + offset);
+}
+
+static u32 dmc520_calc_dram_ecc_error(u32 value)
+{
+ u32 total = 0;
+
+ /* Each rank's error counter takes one byte. */
+ while (value > 0) {
+ total += (value & 0xFF);
+ value >>= 8;
+ }
+ return total;
+}
+
+static u32 dmc520_get_dram_ecc_error_count(struct dmc520_edac *pvt,
+ bool is_ce)
+{
+ u32 reg_offset_low, reg_offset_high;
+ u32 err_low, err_high;
+ u32 err_count;
+
+ reg_offset_low = is_ce ? REG_OFFSET_ECC_ERRC_COUNT_31_00 :
+ REG_OFFSET_ECC_ERRD_COUNT_31_00;
+ reg_offset_high = is_ce ? REG_OFFSET_ECC_ERRC_COUNT_63_32 :
+ REG_OFFSET_ECC_ERRD_COUNT_63_32;
+
+ err_low = dmc520_read_reg(pvt, reg_offset_low);
+ err_high = dmc520_read_reg(pvt, reg_offset_high);
+ /* Reset error counters */
+ dmc520_write_reg(pvt, 0, reg_offset_low);
+ dmc520_write_reg(pvt, 0, reg_offset_high);
+
+ err_count = dmc520_calc_dram_ecc_error(err_low) +
+ dmc520_calc_dram_ecc_error(err_high);
+
+ return err_count;
+}
+
+static void dmc520_get_dram_ecc_error_info(struct dmc520_edac *pvt,
+ bool is_ce,
+ struct ecc_error_info *info)
+{
+ u32 reg_offset_low, reg_offset_high;
+ u32 reg_val_low, reg_val_high;
+ bool valid;
+
+ reg_offset_low = is_ce ? REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_31_00 :
+ REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_31_00;
+ reg_offset_high = is_ce ? REG_OFFSET_DRAM_ECC_ERRC_INT_INFO_63_32 :
+ REG_OFFSET_DRAM_ECC_ERRD_INT_INFO_63_32;
+
+ reg_val_low = dmc520_read_reg(pvt, reg_offset_low);
+ reg_val_high = dmc520_read_reg(pvt, reg_offset_high);
+
+ valid = (FIELD_GET(REG_FIELD_ERR_INFO_LOW_VALID, reg_val_low) != 0) &&
+ (FIELD_GET(REG_FIELD_ERR_INFO_HIGH_VALID, reg_val_high) != 0);
+
+ if (valid) {
+ info->col = FIELD_GET(REG_FIELD_ERR_INFO_LOW_COL, reg_val_low);
+ info->row = FIELD_GET(REG_FIELD_ERR_INFO_LOW_ROW, reg_val_low);
+ info->rank = FIELD_GET(REG_FIELD_ERR_INFO_LOW_RANK, reg_val_low);
+ info->bank = FIELD_GET(REG_FIELD_ERR_INFO_HIGH_BANK, reg_val_high);
+ } else {
+ memset(info, 0, sizeof(*info));
+ }
+}
+
+static bool dmc520_is_ecc_enabled(void __iomem *reg_base)
+{
+ u32 reg_val = readl(reg_base + REG_OFFSET_FEATURE_CONFIG);
+
+ return FIELD_GET(REG_FIELD_DRAM_ECC_ENABLED, reg_val);
+}
+
+static enum scrub_type dmc520_get_scrub_type(struct dmc520_edac *pvt)
+{
+ enum scrub_type type = SCRUB_NONE;
+ u32 reg_val, scrub_cfg;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_SCRUB_CONTROL0_NOW);
+ scrub_cfg = FIELD_GET(SCRUB_TRIGGER0_NEXT_MASK, reg_val);
+
+ if (scrub_cfg == DMC520_SCRUB_TRIGGER_ERR_DETECT ||
+ scrub_cfg == DMC520_SCRUB_TRIGGER_IDLE)
+ type = SCRUB_HW_PROG;
+
+ return type;
+}
+
+/* Get the memory data bus width, in number of bytes. */
+static u32 dmc520_get_memory_width(struct dmc520_edac *pvt)
+{
+ enum dmc520_mem_width mem_width_field;
+ u32 mem_width_in_bytes = 0;
+ u32 reg_val;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_FORMAT_CONTROL);
+ mem_width_field = FIELD_GET(MEMORY_WIDTH_MASK, reg_val);
+
+ if (mem_width_field == MEM_WIDTH_X32)
+ mem_width_in_bytes = 4;
+ else if (mem_width_field == MEM_WIDTH_X64)
+ mem_width_in_bytes = 8;
+ return mem_width_in_bytes;
+}
+
+static enum mem_type dmc520_get_mtype(struct dmc520_edac *pvt)
+{
+ enum mem_type mt = MEM_UNKNOWN;
+ enum dmc520_mem_type type;
+ u32 reg_val;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_MEMORY_TYPE_NOW);
+ type = FIELD_GET(REG_FIELD_MEMORY_TYPE, reg_val);
+
+ switch (type) {
+ case MEM_TYPE_DDR3:
+ mt = MEM_DDR3;
+ break;
+
+ case MEM_TYPE_DDR4:
+ mt = MEM_DDR4;
+ break;
+ }
+
+ return mt;
+}
+
+static enum dev_type dmc520_get_dtype(struct dmc520_edac *pvt)
+{
+ enum dmc520_dev_width device_width;
+ enum dev_type dt = DEV_UNKNOWN;
+ u32 reg_val;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_MEMORY_TYPE_NOW);
+ device_width = FIELD_GET(REG_FIELD_DEVICE_WIDTH, reg_val);
+
+ switch (device_width) {
+ case DEV_WIDTH_X4:
+ dt = DEV_X4;
+ break;
+
+ case DEV_WIDTH_X8:
+ dt = DEV_X8;
+ break;
+
+ case DEV_WIDTH_X16:
+ dt = DEV_X16;
+ break;
+ }
+
+ return dt;
+}
+
+static u32 dmc520_get_rank_count(void __iomem *reg_base)
+{
+ u32 reg_val, rank_bits;
+
+ reg_val = readl(reg_base + REG_OFFSET_ADDRESS_CONTROL_NOW);
+ rank_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_RANK, reg_val);
+
+ return BIT(rank_bits);
+}
+
+static u64 dmc520_get_rank_size(struct dmc520_edac *pvt)
+{
+ u32 reg_val, col_bits, row_bits, bank_bits;
+
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_ADDRESS_CONTROL_NOW);
+
+ col_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_COL, reg_val) +
+ DRAM_ADDRESS_CONTROL_MIN_COL_BITS;
+ row_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_ROW, reg_val) +
+ DRAM_ADDRESS_CONTROL_MIN_ROW_BITS;
+ bank_bits = FIELD_GET(REG_FIELD_ADDRESS_CONTROL_BANK, reg_val);
+
+ return (u64)pvt->mem_width_in_bytes << (col_bits + row_bits + bank_bits);
+}
+
+static void dmc520_handle_dram_ecc_errors(struct mem_ctl_info *mci,
+ bool is_ce)
+{
+ struct dmc520_edac *pvt = mci->pvt_info;
+ char message[DMC520_MSG_BUF_SIZE];
+ struct ecc_error_info info;
+ u32 cnt;
+
+ dmc520_get_dram_ecc_error_info(pvt, is_ce, &info);
+
+ cnt = dmc520_get_dram_ecc_error_count(pvt, is_ce);
+ if (!cnt)
+ return;
+
+ snprintf(message, ARRAY_SIZE(message),
+ "rank:%d bank:%d row:%d col:%d",
+ info.rank, info.bank,
+ info.row, info.col);
+
+ spin_lock(&pvt->error_lock);
+ edac_mc_handle_error((is_ce ? HW_EVENT_ERR_CORRECTED :
+ HW_EVENT_ERR_UNCORRECTED),
+ mci, cnt, 0, 0, 0, info.rank, -1, -1,
+ message, "");
+ spin_unlock(&pvt->error_lock);
+}
+
+static irqreturn_t dmc520_edac_dram_ecc_isr(int irq, struct mem_ctl_info *mci,
+ bool is_ce)
+{
+ struct dmc520_edac *pvt = mci->pvt_info;
+ u32 i_mask;
+
+ i_mask = is_ce ? DRAM_ECC_INT_CE_BIT : DRAM_ECC_INT_UE_BIT;
+
+ dmc520_handle_dram_ecc_errors(mci, is_ce);
+
+ dmc520_write_reg(pvt, i_mask, REG_OFFSET_INTERRUPT_CLR);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t dmc520_edac_dram_all_isr(int irq, struct mem_ctl_info *mci,
+ u32 irq_mask)
+{
+ struct dmc520_edac *pvt = mci->pvt_info;
+ irqreturn_t irq_ret = IRQ_NONE;
+ u32 status;
+
+ status = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_STATUS);
+
+ if ((irq_mask & DRAM_ECC_INT_CE_BIT) &&
+ (status & DRAM_ECC_INT_CE_BIT))
+ irq_ret = dmc520_edac_dram_ecc_isr(irq, mci, true);
+
+ if ((irq_mask & DRAM_ECC_INT_UE_BIT) &&
+ (status & DRAM_ECC_INT_UE_BIT))
+ irq_ret = dmc520_edac_dram_ecc_isr(irq, mci, false);
+
+ return irq_ret;
+}
+
+static irqreturn_t dmc520_isr(int irq, void *data)
+{
+ struct mem_ctl_info *mci = data;
+ struct dmc520_edac *pvt = mci->pvt_info;
+ u32 mask = 0;
+ int idx;
+
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ if (pvt->irqs[idx] == irq) {
+ mask = pvt->masks[idx];
+ break;
+ }
+ }
+ return dmc520_edac_dram_all_isr(irq, mci, mask);
+}
+
+static void dmc520_init_csrow(struct mem_ctl_info *mci)
+{
+ struct dmc520_edac *pvt = mci->pvt_info;
+ struct csrow_info *csi;
+ struct dimm_info *dimm;
+ u32 pages_per_rank;
+ enum dev_type dt;
+ enum mem_type mt;
+ int row, ch;
+ u64 rs;
+
+ dt = dmc520_get_dtype(pvt);
+ mt = dmc520_get_mtype(pvt);
+ rs = dmc520_get_rank_size(pvt);
+ pages_per_rank = rs >> PAGE_SHIFT;
+
+ for (row = 0; row < mci->nr_csrows; row++) {
+ csi = mci->csrows[row];
+
+ for (ch = 0; ch < csi->nr_channels; ch++) {
+ dimm = csi->channels[ch]->dimm;
+ dimm->grain = pvt->mem_width_in_bytes;
+ dimm->dtype = dt;
+ dimm->mtype = mt;
+ dimm->edac_mode = EDAC_FLAG_SECDED;
+ dimm->nr_pages = pages_per_rank / csi->nr_channels;
+ }
+ }
+}
+
+static int dmc520_edac_probe(struct platform_device *pdev)
+{
+ bool registered[NUMBER_OF_IRQS] = { false };
+ int irqs[NUMBER_OF_IRQS] = { -ENXIO };
+ int masks[NUMBER_OF_IRQS] = { 0 };
+ struct edac_mc_layer layers[1];
+ struct dmc520_edac *pvt = NULL;
+ struct mem_ctl_info *mci;
+ void __iomem *reg_base;
+ u32 irq_mask_all = 0;
+ struct resource *res;
+ struct device *dev;
+ int ret, idx, irq;
+ u32 reg_val;
+
+ /* Parse the device node */
+ dev = &pdev->dev;
+
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ irq = platform_get_irq_byname(pdev, dmc520_irq_configs[idx].name);
+ irqs[idx] = irq;
+ masks[idx] = dmc520_irq_configs[idx].mask;
+ if (irq >= 0) {
+ irq_mask_all |= dmc520_irq_configs[idx].mask;
+ edac_dbg(0, "Discovered %s, irq: %d.\n", dmc520_irq_configs[idx].name, irq);
+ }
+ }
+
+ if (!irq_mask_all) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME,
+ "At least one valid interrupt line is expected.\n");
+ return -EINVAL;
+ }
+
+ /* Initialize dmc520 edac */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ reg_base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(reg_base))
+ return PTR_ERR(reg_base);
+
+ if (!dmc520_is_ecc_enabled(reg_base))
+ return -ENXIO;
+
+ layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+ layers[0].size = dmc520_get_rank_count(reg_base);
+ layers[0].is_virt_csrow = true;
+
+ mci = edac_mc_alloc(dmc520_mc_idx++, ARRAY_SIZE(layers), layers, sizeof(*pvt));
+ if (!mci) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME,
+ "Failed to allocate memory for mc instance\n");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ pvt = mci->pvt_info;
+
+ pvt->reg_base = reg_base;
+ spin_lock_init(&pvt->error_lock);
+ memcpy(pvt->irqs, irqs, sizeof(irqs));
+ memcpy(pvt->masks, masks, sizeof(masks));
+
+ platform_set_drvdata(pdev, mci);
+
+ mci->pdev = dev;
+ mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4;
+ mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+ mci->edac_cap = EDAC_FLAG_SECDED;
+ mci->scrub_cap = SCRUB_FLAG_HW_SRC;
+ mci->scrub_mode = dmc520_get_scrub_type(pvt);
+ mci->ctl_name = EDAC_CTL_NAME;
+ mci->dev_name = dev_name(mci->pdev);
+ mci->mod_name = EDAC_MOD_NAME;
+
+ edac_op_state = EDAC_OPSTATE_INT;
+
+ pvt->mem_width_in_bytes = dmc520_get_memory_width(pvt);
+
+ dmc520_init_csrow(mci);
+
+ /* Clear interrupts, not affecting other unrelated interrupts */
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_CONTROL);
+ dmc520_write_reg(pvt, reg_val & (~irq_mask_all),
+ REG_OFFSET_INTERRUPT_CONTROL);
+ dmc520_write_reg(pvt, irq_mask_all, REG_OFFSET_INTERRUPT_CLR);
+
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ irq = irqs[idx];
+ if (irq >= 0) {
+ ret = devm_request_irq(&pdev->dev, irq,
+ dmc520_isr, IRQF_SHARED,
+ dev_name(&pdev->dev), mci);
+ if (ret < 0) {
+ edac_printk(KERN_ERR, EDAC_MC,
+ "Failed to request irq %d\n", irq);
+ goto err;
+ }
+ registered[idx] = true;
+ }
+ }
+
+ /* Reset DRAM CE/UE counters */
+ if (irq_mask_all & DRAM_ECC_INT_CE_BIT)
+ dmc520_get_dram_ecc_error_count(pvt, true);
+
+ if (irq_mask_all & DRAM_ECC_INT_UE_BIT)
+ dmc520_get_dram_ecc_error_count(pvt, false);
+
+ ret = edac_mc_add_mc(mci);
+ if (ret) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME,
+ "Failed to register with EDAC core\n");
+ goto err;
+ }
+
+ /* Enable interrupts, not affecting other unrelated interrupts */
+ dmc520_write_reg(pvt, reg_val | irq_mask_all,
+ REG_OFFSET_INTERRUPT_CONTROL);
+
+ return 0;
+
+err:
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ if (registered[idx])
+ devm_free_irq(&pdev->dev, pvt->irqs[idx], mci);
+ }
+ if (mci)
+ edac_mc_free(mci);
+
+ return ret;
+}
+
+static int dmc520_edac_remove(struct platform_device *pdev)
+{
+ u32 reg_val, idx, irq_mask_all = 0;
+ struct mem_ctl_info *mci;
+ struct dmc520_edac *pvt;
+
+ mci = platform_get_drvdata(pdev);
+ pvt = mci->pvt_info;
+
+ /* Disable interrupts */
+ reg_val = dmc520_read_reg(pvt, REG_OFFSET_INTERRUPT_CONTROL);
+ dmc520_write_reg(pvt, reg_val & (~irq_mask_all),
+ REG_OFFSET_INTERRUPT_CONTROL);
+
+ /* free irq's */
+ for (idx = 0; idx < NUMBER_OF_IRQS; idx++) {
+ if (pvt->irqs[idx] >= 0) {
+ irq_mask_all |= pvt->masks[idx];
+ devm_free_irq(&pdev->dev, pvt->irqs[idx], mci);
+ }
+ }
+
+ edac_mc_del_mc(&pdev->dev);
+ edac_mc_free(mci);
+
+ return 0;
+}
+
+static const struct of_device_id dmc520_edac_driver_id[] = {
+ { .compatible = "arm,dmc-520", },
+ { /* end of table */ }
+};
+
+MODULE_DEVICE_TABLE(of, dmc520_edac_driver_id);
+
+static struct platform_driver dmc520_edac_driver = {
+ .driver = {
+ .name = "dmc520",
+ .of_match_table = dmc520_edac_driver_id,
+ },
+
+ .probe = dmc520_edac_probe,
+ .remove = dmc520_edac_remove
+};
+
+module_platform_driver(dmc520_edac_driver);
+
+MODULE_AUTHOR("Rui Zhao <ruizhao@microsoft.com>");
+MODULE_AUTHOR("Lei Wang <lewan@microsoft.com>");
+MODULE_AUTHOR("Shiping Ji <shji@microsoft.com>");
+MODULE_DESCRIPTION("DMC-520 ECC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 69e0d90460e6..75ede27bdf6a 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -55,6 +55,11 @@ static LIST_HEAD(mc_devices);
*/
static const char *edac_mc_owner;
+static struct mem_ctl_info *error_desc_to_mci(struct edac_raw_error_desc *e)
+{
+ return container_of(e, struct mem_ctl_info, error_desc);
+}
+
int edac_get_report_status(void)
{
return edac_report;
@@ -278,6 +283,12 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems)
static void _edac_mc_free(struct mem_ctl_info *mci)
{
+ put_device(&mci->dev);
+}
+
+static void mci_release(struct device *dev)
+{
+ struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
struct csrow_info *csr;
int i, chn, row;
@@ -305,103 +316,26 @@ static void _edac_mc_free(struct mem_ctl_info *mci)
kfree(mci);
}
-struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
- unsigned int n_layers,
- struct edac_mc_layer *layers,
- unsigned int sz_pvt)
+static int edac_mc_alloc_csrows(struct mem_ctl_info *mci)
{
- struct mem_ctl_info *mci;
- struct edac_mc_layer *layer;
- struct csrow_info *csr;
- struct rank_info *chan;
- struct dimm_info *dimm;
- u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
- unsigned int pos[EDAC_MAX_LAYERS];
- unsigned int idx, size, tot_dimms = 1, count = 1;
- unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
- void *pvt, *p, *ptr = NULL;
- int i, j, row, chn, n, len;
- bool per_rank = false;
-
- if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
- return NULL;
-
- /*
- * Calculate the total amount of dimms and csrows/cschannels while
- * in the old API emulation mode
- */
- for (idx = 0; idx < n_layers; idx++) {
- tot_dimms *= layers[idx].size;
-
- if (layers[idx].is_virt_csrow)
- tot_csrows *= layers[idx].size;
- else
- tot_channels *= layers[idx].size;
-
- if (layers[idx].type == EDAC_MC_LAYER_CHIP_SELECT)
- per_rank = true;
- }
-
- /* Figure out the offsets of the various items from the start of an mc
- * structure. We want the alignment of each item to be at least as
- * stringent as what the compiler would provide if we could simply
- * hardcode everything into a single struct.
- */
- mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
- layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
- for (i = 0; i < n_layers; i++) {
- count *= layers[i].size;
- edac_dbg(4, "errcount layer %d size %d\n", i, count);
- ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
- ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
- tot_errcount += 2 * count;
- }
-
- edac_dbg(4, "allocating %d error counters\n", tot_errcount);
- pvt = edac_align_ptr(&ptr, sz_pvt, 1);
- size = ((unsigned long)pvt) + sz_pvt;
-
- edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
- size,
- tot_dimms,
- per_rank ? "ranks" : "dimms",
- tot_csrows * tot_channels);
-
- mci = kzalloc(size, GFP_KERNEL);
- if (mci == NULL)
- return NULL;
-
- /* Adjust pointers so they point within the memory we just allocated
- * rather than an imaginary chunk of memory located at address 0.
- */
- layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
- for (i = 0; i < n_layers; i++) {
- mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
- mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
- }
- pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
-
- /* setup index and various internal pointers */
- mci->mc_idx = mc_num;
- mci->tot_dimms = tot_dimms;
- mci->pvt_info = pvt;
- mci->n_layers = n_layers;
- mci->layers = layer;
- memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
- mci->nr_csrows = tot_csrows;
- mci->num_cschannel = tot_channels;
- mci->csbased = per_rank;
+ unsigned int tot_channels = mci->num_cschannel;
+ unsigned int tot_csrows = mci->nr_csrows;
+ unsigned int row, chn;
/*
* Alocate and fill the csrow/channels structs
*/
mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
if (!mci->csrows)
- goto error;
+ return -ENOMEM;
+
for (row = 0; row < tot_csrows; row++) {
+ struct csrow_info *csr;
+
csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
if (!csr)
- goto error;
+ return -ENOMEM;
+
mci->csrows[row] = csr;
csr->csrow_idx = row;
csr->mci = mci;
@@ -409,34 +343,51 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
GFP_KERNEL);
if (!csr->channels)
- goto error;
+ return -ENOMEM;
for (chn = 0; chn < tot_channels; chn++) {
+ struct rank_info *chan;
+
chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
if (!chan)
- goto error;
+ return -ENOMEM;
+
csr->channels[chn] = chan;
chan->chan_idx = chn;
chan->csrow = csr;
}
}
+ return 0;
+}
+
+static int edac_mc_alloc_dimms(struct mem_ctl_info *mci)
+{
+ unsigned int pos[EDAC_MAX_LAYERS];
+ unsigned int row, chn, idx;
+ int layer;
+ void *p;
+
/*
* Allocate and fill the dimm structs
*/
- mci->dimms = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
+ mci->dimms = kcalloc(mci->tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
if (!mci->dimms)
- goto error;
+ return -ENOMEM;
memset(&pos, 0, sizeof(pos));
row = 0;
chn = 0;
- for (idx = 0; idx < tot_dimms; idx++) {
+ for (idx = 0; idx < mci->tot_dimms; idx++) {
+ struct dimm_info *dimm;
+ struct rank_info *chan;
+ int n, len;
+
chan = mci->csrows[row]->channels[chn];
dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
if (!dimm)
- goto error;
+ return -ENOMEM;
mci->dimms[idx] = dimm;
dimm->mci = mci;
dimm->idx = idx;
@@ -446,16 +397,16 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
*/
len = sizeof(dimm->label);
p = dimm->label;
- n = snprintf(p, len, "mc#%u", mc_num);
+ n = snprintf(p, len, "mc#%u", mci->mc_idx);
p += n;
len -= n;
- for (j = 0; j < n_layers; j++) {
+ for (layer = 0; layer < mci->n_layers; layer++) {
n = snprintf(p, len, "%s#%u",
- edac_layer_name[layers[j].type],
- pos[j]);
+ edac_layer_name[mci->layers[layer].type],
+ pos[layer]);
p += n;
len -= n;
- dimm->location[j] = pos[j];
+ dimm->location[layer] = pos[layer];
if (len <= 0)
break;
@@ -467,29 +418,109 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
dimm->cschannel = chn;
/* Increment csrow location */
- if (layers[0].is_virt_csrow) {
+ if (mci->layers[0].is_virt_csrow) {
chn++;
- if (chn == tot_channels) {
+ if (chn == mci->num_cschannel) {
chn = 0;
row++;
}
} else {
row++;
- if (row == tot_csrows) {
+ if (row == mci->nr_csrows) {
row = 0;
chn++;
}
}
/* Increment dimm location */
- for (j = n_layers - 1; j >= 0; j--) {
- pos[j]++;
- if (pos[j] < layers[j].size)
+ for (layer = mci->n_layers - 1; layer >= 0; layer--) {
+ pos[layer]++;
+ if (pos[layer] < mci->layers[layer].size)
break;
- pos[j] = 0;
+ pos[layer] = 0;
}
}
+ return 0;
+}
+
+struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
+ unsigned int n_layers,
+ struct edac_mc_layer *layers,
+ unsigned int sz_pvt)
+{
+ struct mem_ctl_info *mci;
+ struct edac_mc_layer *layer;
+ unsigned int idx, size, tot_dimms = 1;
+ unsigned int tot_csrows = 1, tot_channels = 1;
+ void *pvt, *ptr = NULL;
+ bool per_rank = false;
+
+ if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
+ return NULL;
+
+ /*
+ * Calculate the total amount of dimms and csrows/cschannels while
+ * in the old API emulation mode
+ */
+ for (idx = 0; idx < n_layers; idx++) {
+ tot_dimms *= layers[idx].size;
+
+ if (layers[idx].is_virt_csrow)
+ tot_csrows *= layers[idx].size;
+ else
+ tot_channels *= layers[idx].size;
+
+ if (layers[idx].type == EDAC_MC_LAYER_CHIP_SELECT)
+ per_rank = true;
+ }
+
+ /* Figure out the offsets of the various items from the start of an mc
+ * structure. We want the alignment of each item to be at least as
+ * stringent as what the compiler would provide if we could simply
+ * hardcode everything into a single struct.
+ */
+ mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
+ layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
+ pvt = edac_align_ptr(&ptr, sz_pvt, 1);
+ size = ((unsigned long)pvt) + sz_pvt;
+
+ edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
+ size,
+ tot_dimms,
+ per_rank ? "ranks" : "dimms",
+ tot_csrows * tot_channels);
+
+ mci = kzalloc(size, GFP_KERNEL);
+ if (mci == NULL)
+ return NULL;
+
+ mci->dev.release = mci_release;
+ device_initialize(&mci->dev);
+
+ /* Adjust pointers so they point within the memory we just allocated
+ * rather than an imaginary chunk of memory located at address 0.
+ */
+ layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
+ pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
+
+ /* setup index and various internal pointers */
+ mci->mc_idx = mc_num;
+ mci->tot_dimms = tot_dimms;
+ mci->pvt_info = pvt;
+ mci->n_layers = n_layers;
+ mci->layers = layer;
+ memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
+ mci->nr_csrows = tot_csrows;
+ mci->num_cschannel = tot_channels;
+ mci->csbased = per_rank;
+
+ if (edac_mc_alloc_csrows(mci))
+ goto error;
+
+ if (edac_mc_alloc_dimms(mci))
+ goto error;
+
mci->op_state = OP_ALLOC;
return mci;
@@ -505,9 +536,6 @@ void edac_mc_free(struct mem_ctl_info *mci)
{
edac_dbg(1, "\n");
- if (device_is_registered(&mci->dev))
- edac_unregister_sysfs(mci);
-
_edac_mc_free(mci);
}
EXPORT_SYMBOL_GPL(edac_mc_free);
@@ -902,88 +930,51 @@ const char *edac_layer_name[] = {
};
EXPORT_SYMBOL_GPL(edac_layer_name);
-static void edac_inc_ce_error(struct mem_ctl_info *mci,
- bool enable_per_layer_report,
- const int pos[EDAC_MAX_LAYERS],
- const u16 count)
+static void edac_inc_ce_error(struct edac_raw_error_desc *e)
{
- int i, index = 0;
-
- mci->ce_mc += count;
-
- if (!enable_per_layer_report) {
- mci->ce_noinfo_count += count;
- return;
- }
+ int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
+ struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
- for (i = 0; i < mci->n_layers; i++) {
- if (pos[i] < 0)
- break;
- index += pos[i];
- mci->ce_per_layer[i][index] += count;
+ mci->ce_mc += e->error_count;
- if (i < mci->n_layers - 1)
- index *= mci->layers[i + 1].size;
- }
+ if (dimm)
+ dimm->ce_count += e->error_count;
+ else
+ mci->ce_noinfo_count += e->error_count;
}
-static void edac_inc_ue_error(struct mem_ctl_info *mci,
- bool enable_per_layer_report,
- const int pos[EDAC_MAX_LAYERS],
- const u16 count)
+static void edac_inc_ue_error(struct edac_raw_error_desc *e)
{
- int i, index = 0;
-
- mci->ue_mc += count;
-
- if (!enable_per_layer_report) {
- mci->ue_noinfo_count += count;
- return;
- }
+ int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
+ struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
- for (i = 0; i < mci->n_layers; i++) {
- if (pos[i] < 0)
- break;
- index += pos[i];
- mci->ue_per_layer[i][index] += count;
+ mci->ue_mc += e->error_count;
- if (i < mci->n_layers - 1)
- index *= mci->layers[i + 1].size;
- }
+ if (dimm)
+ dimm->ue_count += e->error_count;
+ else
+ mci->ue_noinfo_count += e->error_count;
}
-static void edac_ce_error(struct mem_ctl_info *mci,
- const u16 error_count,
- const int pos[EDAC_MAX_LAYERS],
- const char *msg,
- const char *location,
- const char *label,
- const char *detail,
- const char *other_detail,
- const bool enable_per_layer_report,
- const unsigned long page_frame_number,
- const unsigned long offset_in_page,
- long grain)
+static void edac_ce_error(struct edac_raw_error_desc *e)
{
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
unsigned long remapped_page;
- char *msg_aux = "";
-
- if (*msg)
- msg_aux = " ";
if (edac_mc_get_log_ce()) {
- if (other_detail && *other_detail)
- edac_mc_printk(mci, KERN_WARNING,
- "%d CE %s%son %s (%s %s - %s)\n",
- error_count, msg, msg_aux, label,
- location, detail, other_detail);
- else
- edac_mc_printk(mci, KERN_WARNING,
- "%d CE %s%son %s (%s %s)\n",
- error_count, msg, msg_aux, label,
- location, detail);
+ edac_mc_printk(mci, KERN_WARNING,
+ "%d CE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx%s%s)\n",
+ e->error_count, e->msg,
+ *e->msg ? " " : "",
+ e->label, e->location, e->page_frame_number, e->offset_in_page,
+ e->grain, e->syndrome,
+ *e->other_detail ? " - " : "",
+ e->other_detail);
}
- edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
+
+ edac_inc_ce_error(e);
if (mci->scrub_mode == SCRUB_SW_SRC) {
/*
@@ -998,60 +989,64 @@ static void edac_ce_error(struct mem_ctl_info *mci,
* be scrubbed.
*/
remapped_page = mci->ctl_page_to_phys ?
- mci->ctl_page_to_phys(mci, page_frame_number) :
- page_frame_number;
+ mci->ctl_page_to_phys(mci, e->page_frame_number) :
+ e->page_frame_number;
- edac_mc_scrub_block(remapped_page,
- offset_in_page, grain);
+ edac_mc_scrub_block(remapped_page, e->offset_in_page, e->grain);
}
}
-static void edac_ue_error(struct mem_ctl_info *mci,
- const u16 error_count,
- const int pos[EDAC_MAX_LAYERS],
- const char *msg,
- const char *location,
- const char *label,
- const char *detail,
- const char *other_detail,
- const bool enable_per_layer_report)
+static void edac_ue_error(struct edac_raw_error_desc *e)
{
- char *msg_aux = "";
-
- if (*msg)
- msg_aux = " ";
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
if (edac_mc_get_log_ue()) {
- if (other_detail && *other_detail)
- edac_mc_printk(mci, KERN_WARNING,
- "%d UE %s%son %s (%s %s - %s)\n",
- error_count, msg, msg_aux, label,
- location, detail, other_detail);
- else
- edac_mc_printk(mci, KERN_WARNING,
- "%d UE %s%son %s (%s %s)\n",
- error_count, msg, msg_aux, label,
- location, detail);
+ edac_mc_printk(mci, KERN_WARNING,
+ "%d UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
+ e->error_count, e->msg,
+ *e->msg ? " " : "",
+ e->label, e->location, e->page_frame_number, e->offset_in_page,
+ e->grain,
+ *e->other_detail ? " - " : "",
+ e->other_detail);
}
if (edac_mc_get_panic_on_ue()) {
- if (other_detail && *other_detail)
- panic("UE %s%son %s (%s%s - %s)\n",
- msg, msg_aux, label, location, detail, other_detail);
- else
- panic("UE %s%son %s (%s%s)\n",
- msg, msg_aux, label, location, detail);
+ panic("UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
+ e->msg,
+ *e->msg ? " " : "",
+ e->label, e->location, e->page_frame_number, e->offset_in_page,
+ e->grain,
+ *e->other_detail ? " - " : "",
+ e->other_detail);
}
- edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
+ edac_inc_ue_error(e);
}
-void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
- struct mem_ctl_info *mci,
- struct edac_raw_error_desc *e)
+static void edac_inc_csrow(struct edac_raw_error_desc *e, int row, int chan)
{
- char detail[80];
- int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
+ enum hw_event_mc_err_type type = e->type;
+ u16 count = e->error_count;
+
+ if (row < 0)
+ return;
+
+ edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
+
+ if (type == HW_EVENT_ERR_CORRECTED) {
+ mci->csrows[row]->ce_count += count;
+ if (chan >= 0)
+ mci->csrows[row]->channels[chan]->ce_count += count;
+ } else {
+ mci->csrows[row]->ue_count += count;
+ }
+}
+
+void edac_raw_mc_handle_error(struct edac_raw_error_desc *e)
+{
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
u8 grain_bits;
/* Sanity-check driver-supplied grain value. */
@@ -1062,31 +1057,16 @@ void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
/* Report the error via the trace interface */
if (IS_ENABLED(CONFIG_RAS))
- trace_mc_event(type, e->msg, e->label, e->error_count,
+ trace_mc_event(e->type, e->msg, e->label, e->error_count,
mci->mc_idx, e->top_layer, e->mid_layer,
e->low_layer,
(e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
grain_bits, e->syndrome, e->other_detail);
- /* Memory type dependent details about the error */
- if (type == HW_EVENT_ERR_CORRECTED) {
- snprintf(detail, sizeof(detail),
- "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
- e->page_frame_number, e->offset_in_page,
- e->grain, e->syndrome);
- edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
- detail, e->other_detail, e->enable_per_layer_report,
- e->page_frame_number, e->offset_in_page, e->grain);
- } else {
- snprintf(detail, sizeof(detail),
- "page:0x%lx offset:0x%lx grain:%ld",
- e->page_frame_number, e->offset_in_page, e->grain);
-
- edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
- detail, e->other_detail, e->enable_per_layer_report);
- }
-
-
+ if (e->type == HW_EVENT_ERR_CORRECTED)
+ edac_ce_error(e);
+ else
+ edac_ue_error(e);
}
EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
@@ -1108,25 +1088,27 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
int i, n_labels = 0;
struct edac_raw_error_desc *e = &mci->error_desc;
+ bool any_memory = true;
edac_dbg(3, "MC%d\n", mci->mc_idx);
/* Fills the error report buffer */
memset(e, 0, sizeof (*e));
e->error_count = error_count;
+ e->type = type;
e->top_layer = top_layer;
e->mid_layer = mid_layer;
e->low_layer = low_layer;
e->page_frame_number = page_frame_number;
e->offset_in_page = offset_in_page;
e->syndrome = syndrome;
- e->msg = msg;
- e->other_detail = other_detail;
+ /* need valid strings here for both: */
+ e->msg = msg ?: "";
+ e->other_detail = other_detail ?: "";
/*
- * Check if the event report is consistent and if the memory
- * location is known. If it is known, enable_per_layer_report will be
- * true, the DIMM(s) label info will be filled and the per-layer
+ * Check if the event report is consistent and if the memory location is
+ * known. If it is, the DIMM(s) label info will be filled and the DIMM's
* error counters will be incremented.
*/
for (i = 0; i < mci->n_layers; i++) {
@@ -1145,7 +1127,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
pos[i] = -1;
}
if (pos[i] >= 0)
- e->enable_per_layer_report = true;
+ any_memory = false;
}
/*
@@ -1176,24 +1158,25 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
/*
* If the error is memory-controller wide, there's no need to
- * seek for the affected DIMMs because the whole
- * channel/memory controller/... may be affected.
- * Also, don't show errors for empty DIMM slots.
+ * seek for the affected DIMMs because the whole channel/memory
+ * controller/... may be affected. Also, don't show errors for
+ * empty DIMM slots.
*/
- if (!e->enable_per_layer_report || !dimm->nr_pages)
+ if (!dimm->nr_pages)
continue;
- if (n_labels >= EDAC_MAX_LABELS) {
- e->enable_per_layer_report = false;
- break;
- }
n_labels++;
- if (p != e->label) {
- strcpy(p, OTHER_LABEL);
- p += strlen(OTHER_LABEL);
+ if (n_labels > EDAC_MAX_LABELS) {
+ p = e->label;
+ *p = '\0';
+ } else {
+ if (p != e->label) {
+ strcpy(p, OTHER_LABEL);
+ p += strlen(OTHER_LABEL);
+ }
+ strcpy(p, dimm->label);
+ p += strlen(p);
}
- strcpy(p, dimm->label);
- p += strlen(p);
/*
* get csrow/channel of the DIMM, in order to allow
@@ -1213,22 +1196,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
chan = -2;
}
- if (!e->enable_per_layer_report) {
+ if (any_memory)
strcpy(e->label, "any memory");
- } else {
- edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
- if (p == e->label)
- strcpy(e->label, "unknown memory");
- if (type == HW_EVENT_ERR_CORRECTED) {
- if (row >= 0) {
- mci->csrows[row]->ce_count += error_count;
- if (chan >= 0)
- mci->csrows[row]->channels[chan]->ce_count += error_count;
- }
- } else
- if (row >= 0)
- mci->csrows[row]->ue_count += error_count;
- }
+ else if (!*e->label)
+ strcpy(e->label, "unknown memory");
+
+ edac_inc_csrow(e, row, chan);
/* Fill the RAM location data */
p = e->location;
@@ -1244,6 +1217,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
if (p > e->location)
*(p - 1) = '\0';
- edac_raw_mc_handle_error(type, mci, e);
+ edac_raw_mc_handle_error(e);
}
EXPORT_SYMBOL_GPL(edac_mc_handle_error);
diff --git a/drivers/edac/edac_mc.h b/drivers/edac/edac_mc.h
index 02aac5c61d00..881b00eadf7a 100644
--- a/drivers/edac/edac_mc.h
+++ b/drivers/edac/edac_mc.h
@@ -212,17 +212,13 @@ extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
* edac_raw_mc_handle_error() - Reports a memory event to userspace without
* doing anything to discover the error location.
*
- * @type: severity of the error (CE/UE/Fatal)
- * @mci: a struct mem_ctl_info pointer
* @e: error description
*
* This raw function is used internally by edac_mc_handle_error(). It should
* only be called directly when the hardware error come directly from BIOS,
* like in the case of APEI GHES driver.
*/
-void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
- struct mem_ctl_info *mci,
- struct edac_raw_error_desc *e);
+void edac_raw_mc_handle_error(struct edac_raw_error_desc *e);
/**
* edac_mc_handle_error() - Reports a memory event to userspace.
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index c70ec0a306d8..4e6aca595133 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -274,14 +274,8 @@ static const struct attribute_group *csrow_attr_groups[] = {
NULL
};
-static void csrow_attr_release(struct device *dev)
-{
- /* release device with _edac_mc_free() */
-}
-
static const struct device_type csrow_attr_type = {
.groups = csrow_attr_groups,
- .release = csrow_attr_release,
};
/*
@@ -387,6 +381,14 @@ static const struct attribute_group *csrow_dev_groups[] = {
NULL
};
+static void csrow_release(struct device *dev)
+{
+ /*
+ * Nothing to do, just unregister sysfs here. The mci
+ * device owns the data and will also release it.
+ */
+}
+
static inline int nr_pages_per_csrow(struct csrow_info *csrow)
{
int chan, nr_pages = 0;
@@ -405,6 +407,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
csrow->dev.type = &csrow_attr_type;
csrow->dev.groups = csrow_dev_groups;
+ csrow->dev.release = csrow_release;
device_initialize(&csrow->dev);
csrow->dev.parent = &mci->dev;
csrow->mci = mci;
@@ -441,10 +444,8 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci)
error:
for (--i; i >= 0; i--) {
- csrow = mci->csrows[i];
- if (!nr_pages_per_csrow(csrow))
- continue;
- device_unregister(&mci->csrows[i]->dev);
+ if (device_is_registered(&mci->csrows[i]->dev))
+ device_unregister(&mci->csrows[i]->dev);
}
return err;
@@ -453,15 +454,13 @@ error:
static void edac_delete_csrow_objects(struct mem_ctl_info *mci)
{
int i;
- struct csrow_info *csrow;
- for (i = mci->nr_csrows - 1; i >= 0; i--) {
- csrow = mci->csrows[i];
- if (!nr_pages_per_csrow(csrow))
- continue;
- device_unregister(&mci->csrows[i]->dev);
+ for (i = 0; i < mci->nr_csrows; i++) {
+ if (device_is_registered(&mci->csrows[i]->dev))
+ device_unregister(&mci->csrows[i]->dev);
}
}
+
#endif
/*
@@ -552,10 +551,8 @@ static ssize_t dimmdev_ce_count_show(struct device *dev,
char *data)
{
struct dimm_info *dimm = to_dimm(dev);
- u32 count;
- count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][dimm->idx];
- return sprintf(data, "%u\n", count);
+ return sprintf(data, "%u\n", dimm->ce_count);
}
static ssize_t dimmdev_ue_count_show(struct device *dev,
@@ -563,10 +560,8 @@ static ssize_t dimmdev_ue_count_show(struct device *dev,
char *data)
{
struct dimm_info *dimm = to_dimm(dev);
- u32 count;
- count = dimm->mci->ue_per_layer[dimm->mci->n_layers-1][dimm->idx];
- return sprintf(data, "%u\n", count);
+ return sprintf(data, "%u\n", dimm->ue_count);
}
/* dimm/rank attribute files */
@@ -602,16 +597,18 @@ static const struct attribute_group *dimm_attr_groups[] = {
NULL
};
-static void dimm_attr_release(struct device *dev)
-{
- /* release device with _edac_mc_free() */
-}
-
static const struct device_type dimm_attr_type = {
.groups = dimm_attr_groups,
- .release = dimm_attr_release,
};
+static void dimm_release(struct device *dev)
+{
+ /*
+ * Nothing to do, just unregister sysfs here. The mci
+ * device owns the data and will also release it.
+ */
+}
+
/* Create a DIMM object under specifed memory controller device */
static int edac_create_dimm_object(struct mem_ctl_info *mci,
struct dimm_info *dimm)
@@ -620,6 +617,7 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci,
dimm->mci = mci;
dimm->dev.type = &dimm_attr_type;
+ dimm->dev.release = dimm_release;
device_initialize(&dimm->dev);
dimm->dev.parent = &mci->dev;
@@ -659,7 +657,9 @@ static ssize_t mci_reset_counters_store(struct device *dev,
const char *data, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
- int cnt, row, chan, i;
+ struct dimm_info *dimm;
+ int row, chan;
+
mci->ue_mc = 0;
mci->ce_mc = 0;
mci->ue_noinfo_count = 0;
@@ -675,11 +675,9 @@ static ssize_t mci_reset_counters_store(struct device *dev,
ri->channels[chan]->ce_count = 0;
}
- cnt = 1;
- for (i = 0; i < mci->n_layers; i++) {
- cnt *= mci->layers[i].size;
- memset(mci->ce_per_layer[i], 0, cnt * sizeof(u32));
- memset(mci->ue_per_layer[i], 0, cnt * sizeof(u32));
+ mci_for_each_dimm(mci, dimm) {
+ dimm->ue_count = 0;
+ dimm->ce_count = 0;
}
mci->start_time = jiffies;
@@ -884,14 +882,8 @@ static const struct attribute_group *mci_attr_groups[] = {
NULL
};
-static void mci_attr_release(struct device *dev)
-{
- /* release device with _edac_mc_free() */
-}
-
static const struct device_type mci_attr_type = {
.groups = mci_attr_groups,
- .release = mci_attr_release,
};
/*
@@ -910,8 +902,6 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
/* get the /sys/devices/system/edac subsys reference */
mci->dev.type = &mci_attr_type;
- device_initialize(&mci->dev);
-
mci->dev.parent = mci_pdev;
mci->dev.groups = groups;
dev_set_name(&mci->dev, "mc%d", mci->mc_idx);
@@ -921,7 +911,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
err = device_add(&mci->dev);
if (err < 0) {
edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
- put_device(&mci->dev);
+ /* no put_device() here, free mci with _edac_mc_free() */
return err;
}
@@ -937,24 +927,20 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
err = edac_create_dimm_object(mci, dimm);
if (err)
- goto fail_unregister_dimm;
+ goto fail;
}
#ifdef CONFIG_EDAC_LEGACY_SYSFS
err = edac_create_csrow_objects(mci);
if (err < 0)
- goto fail_unregister_dimm;
+ goto fail;
#endif
edac_create_debugfs_nodes(mci);
return 0;
-fail_unregister_dimm:
- mci_for_each_dimm(mci, dimm) {
- if (device_is_registered(&dimm->dev))
- device_unregister(&dimm->dev);
- }
- device_unregister(&mci->dev);
+fail:
+ edac_remove_sysfs_mci_device(mci);
return err;
}
@@ -966,6 +952,9 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
{
struct dimm_info *dimm;
+ if (!device_is_registered(&mci->dev))
+ return;
+
edac_dbg(0, "\n");
#ifdef CONFIG_EDAC_DEBUG
@@ -976,17 +965,14 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
#endif
mci_for_each_dimm(mci, dimm) {
- if (dimm->nr_pages == 0)
+ if (!device_is_registered(&dimm->dev))
continue;
edac_dbg(1, "unregistering device %s\n", dev_name(&dimm->dev));
device_unregister(&dimm->dev);
}
-}
-void edac_unregister_sysfs(struct mem_ctl_info *mci)
-{
- edac_dbg(1, "unregistering device %s\n", dev_name(&mci->dev));
- device_unregister(&mci->dev);
+ /* only remove the device, but keep mci */
+ device_del(&mci->dev);
}
static void mc_attr_release(struct device *dev)
@@ -1000,9 +986,6 @@ static void mc_attr_release(struct device *dev)
kfree(dev);
}
-static const struct device_type mc_attr_type = {
- .release = mc_attr_release,
-};
/*
* Init/exit code for the module. Basically, creates/removes /sys/class/rc
*/
@@ -1015,11 +998,10 @@ int __init edac_mc_sysfs_init(void)
return -ENOMEM;
mci_pdev->bus = edac_get_sysfs_subsys();
- mci_pdev->type = &mc_attr_type;
- device_initialize(mci_pdev);
- dev_set_name(mci_pdev, "mc");
+ mci_pdev->release = mc_attr_release;
+ mci_pdev->init_name = "mc";
- err = device_add(mci_pdev);
+ err = device_register(mci_pdev);
if (err < 0) {
edac_dbg(1, "failure: create device %s\n", dev_name(mci_pdev));
put_device(mci_pdev);
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index 388427d378b1..aa1f91688eb8 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -28,7 +28,6 @@ void edac_mc_sysfs_exit(void);
extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
const struct attribute_group **groups);
extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci);
-void edac_unregister_sysfs(struct mem_ctl_info *mci);
extern int edac_get_log_ue(void);
extern int edac_get_log_ce(void);
extern int edac_get_panic_on_ue(void);
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index b99080d8a10c..cb3dab56a875 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -201,7 +201,6 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
{
- enum hw_event_mc_err_type type;
struct edac_raw_error_desc *e;
struct mem_ctl_info *mci;
struct ghes_edac_pvt *pvt;
@@ -240,17 +239,17 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
switch (sev) {
case GHES_SEV_CORRECTED:
- type = HW_EVENT_ERR_CORRECTED;
+ e->type = HW_EVENT_ERR_CORRECTED;
break;
case GHES_SEV_RECOVERABLE:
- type = HW_EVENT_ERR_UNCORRECTED;
+ e->type = HW_EVENT_ERR_UNCORRECTED;
break;
case GHES_SEV_PANIC:
- type = HW_EVENT_ERR_FATAL;
+ e->type = HW_EVENT_ERR_FATAL;
break;
default:
case GHES_SEV_NO:
- type = HW_EVENT_ERR_INFO;
+ e->type = HW_EVENT_ERR_INFO;
}
edac_dbg(1, "error validation_bits: 0x%08llx\n",
@@ -356,11 +355,8 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
mem_err->mem_dev_handle);
index = get_dimm_smbios_index(mci, mem_err->mem_dev_handle);
- if (index >= 0) {
+ if (index >= 0)
e->top_layer = index;
- e->enable_per_layer_report = true;
- }
-
}
if (p > e->location)
*(p - 1) = '\0';
@@ -442,7 +438,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
if (p > pvt->other_detail)
*(p - 1) = '\0';
- edac_raw_mc_handle_error(type, mci, e);
+ edac_raw_mc_handle_error(e);
unlock:
spin_unlock_irqrestore(&ghes_lock, flags);
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index ea980c556f2e..8874b7722b2f 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1239,7 +1239,7 @@ static int __init mce_amd_init(void)
case 0x17:
case 0x18:
- pr_warn("Decoding supported only on Scalable MCA processors.\n");
+ pr_warn_once("Decoding supported only on Scalable MCA processors.\n");
return -EINVAL;
default: