summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/mce
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mce')
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c684
-rw-r--r--arch/x86/kernel/cpu/mce/core.c452
-rw-r--r--arch/x86/kernel/cpu/mce/dev-mcelog.c2
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c35
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c51
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h15
-rw-r--r--arch/x86/kernel/cpu/mce/threshold.c19
7 files changed, 612 insertions, 646 deletions
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 1075a90141da..6605a0224659 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -43,9 +43,6 @@
/* Deferred error settings */
#define MSR_CU_DEF_ERR 0xC0000410
#define MASK_DEF_LVTOFF 0x000000F0
-#define MASK_DEF_INT_TYPE 0x00000006
-#define DEF_LVT_OFF 0x2
-#define DEF_INT_TYPE_APIC 0x2
/* Scalable MCA: */
@@ -54,6 +51,17 @@
static bool thresholding_irq_en;
+struct mce_amd_cpu_data {
+ mce_banks_t thr_intr_banks;
+ mce_banks_t dfr_intr_banks;
+
+ u32 thr_intr_en: 1,
+ dfr_intr_en: 1,
+ __resv: 30;
+};
+
+static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data);
+
static const char * const th_names[] = {
"load_store",
"insn_fetch",
@@ -79,45 +87,57 @@ struct smca_bank {
const struct smca_hwid *hwid;
u32 id; /* Value of MCA_IPID[InstanceId]. */
u8 sysfs_id; /* Value used for sysfs name. */
+ u64 paddrv :1, /* Physical Address Valid bit in MCA_CONFIG */
+ __reserved :63;
};
static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks);
static DEFINE_PER_CPU_READ_MOSTLY(u8[N_SMCA_BANK_TYPES], smca_bank_counts);
static const char * const smca_names[] = {
- [SMCA_LS ... SMCA_LS_V2] = "load_store",
- [SMCA_IF] = "insn_fetch",
- [SMCA_L2_CACHE] = "l2_cache",
+ [SMCA_CS ... SMCA_CS_V2] = "coherent_station",
+ [SMCA_DACC_BE] = "dacc_be",
+ [SMCA_DACC_FE] = "dacc_fe",
[SMCA_DE] = "decode_unit",
- [SMCA_RESERVED] = "reserved",
+ [SMCA_EDDR5CMN] = "eddr5_cmn",
[SMCA_EX] = "execution_unit",
[SMCA_FP] = "floating_point",
+ [SMCA_GMI_PCS] = "gmi_pcs",
+ [SMCA_GMI_PHY] = "gmi_phy",
+ [SMCA_IF] = "insn_fetch",
+ [SMCA_L2_CACHE] = "l2_cache",
[SMCA_L3_CACHE] = "l3_cache",
- [SMCA_CS ... SMCA_CS_V2] = "coherent_slave",
- [SMCA_PIE] = "pie",
-
- /* UMC v2 is separate because both of them can exist in a single system. */
- [SMCA_UMC] = "umc",
- [SMCA_UMC_V2] = "umc_v2",
+ [SMCA_LS ... SMCA_LS_V2] = "load_store",
[SMCA_MA_LLC] = "ma_llc",
- [SMCA_PB] = "param_block",
- [SMCA_PSP ... SMCA_PSP_V2] = "psp",
- [SMCA_SMU ... SMCA_SMU_V2] = "smu",
[SMCA_MP5] = "mp5",
+ [SMCA_MPART] = "mpart",
+ [SMCA_MPASP ... SMCA_MPASP_V2] = "mpasp",
+ [SMCA_MPDACC] = "mpdacc",
[SMCA_MPDMA] = "mpdma",
+ [SMCA_MPM] = "mpm",
+ [SMCA_MPRAS] = "mpras",
+ [SMCA_NBIF] = "nbif",
[SMCA_NBIO] = "nbio",
+ [SMCA_PB] = "param_block",
[SMCA_PCIE ... SMCA_PCIE_V2] = "pcie",
- [SMCA_XGMI_PCS] = "xgmi_pcs",
- [SMCA_NBIF] = "nbif",
- [SMCA_SHUB] = "shub",
+ [SMCA_PCIE_PL] = "pcie_pl",
+ [SMCA_PIE] = "pie",
+ [SMCA_PSP ... SMCA_PSP_V2] = "psp",
+ [SMCA_RESERVED] = "reserved",
[SMCA_SATA] = "sata",
+ [SMCA_SHUB] = "shub",
+ [SMCA_SMU ... SMCA_SMU_V2] = "smu",
+ [SMCA_SSBDCI] = "ssbdci",
+
+ /* UMC v2 is separate because both of them can exist in a single system. */
+ [SMCA_UMC] = "umc",
+ [SMCA_UMC_V2] = "umc_v2",
[SMCA_USB] = "usb",
- [SMCA_USR_DP] = "usr_dp",
[SMCA_USR_CP] = "usr_cp",
- [SMCA_GMI_PCS] = "gmi_pcs",
- [SMCA_XGMI_PHY] = "xgmi_phy",
+ [SMCA_USR_DP] = "usr_dp",
[SMCA_WAFL_PHY] = "wafl_phy",
- [SMCA_GMI_PHY] = "gmi_phy",
+ [SMCA_XGMI_PCS] = "xgmi_pcs",
+ [SMCA_XGMI_PHY] = "xgmi_phy",
};
static const char *smca_get_name(enum smca_bank_types t)
@@ -143,68 +163,60 @@ enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
}
EXPORT_SYMBOL_GPL(smca_get_bank_type);
+/*
+ * Format:
+ * { bank_type, hwid_mcatype }
+ *
+ * alphanumerically sorted by bank type.
+ */
static const struct smca_hwid smca_hwid_mcatypes[] = {
- /* { bank_type, hwid_mcatype } */
-
- /* Reserved type */
- { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0) },
-
- /* ZN Core (HWID=0xB0) MCA types */
- { SMCA_LS, HWID_MCATYPE(0xB0, 0x0) },
- { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10) },
- { SMCA_IF, HWID_MCATYPE(0xB0, 0x1) },
- { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2) },
+ { SMCA_CS, HWID_MCATYPE(0x2E, 0x0) },
+ { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) },
+ { SMCA_DACC_BE, HWID_MCATYPE(0x164, 0x0) },
+ { SMCA_DACC_FE, HWID_MCATYPE(0x157, 0x0) },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3) },
- /* HWID 0xB0 MCATYPE 0x4 is Reserved */
+ { SMCA_EDDR5CMN, HWID_MCATYPE(0x1E0, 0x0) },
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5) },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6) },
+ { SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) },
+ { SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) },
+ { SMCA_IF, HWID_MCATYPE(0xB0, 0x1) },
+ { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2) },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7) },
-
- /* Data Fabric MCA types */
- { SMCA_CS, HWID_MCATYPE(0x2E, 0x0) },
- { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) },
- { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) },
+ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0) },
+ { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10) },
{ SMCA_MA_LLC, HWID_MCATYPE(0x2E, 0x4) },
-
- /* Unified Memory Controller MCA type */
- { SMCA_UMC, HWID_MCATYPE(0x96, 0x0) },
- { SMCA_UMC_V2, HWID_MCATYPE(0x96, 0x1) },
-
- /* Parameter Block MCA type */
- { SMCA_PB, HWID_MCATYPE(0x05, 0x0) },
-
- /* Platform Security Processor MCA type */
- { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0) },
- { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1) },
-
- /* System Management Unit MCA type */
- { SMCA_SMU, HWID_MCATYPE(0x01, 0x0) },
- { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1) },
-
- /* Microprocessor 5 Unit MCA type */
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2) },
-
- /* MPDMA MCA type */
+ { SMCA_MPART, HWID_MCATYPE(0xFF, 0x2) },
+ { SMCA_MPASP, HWID_MCATYPE(0xFD, 0x0) },
+ { SMCA_MPASP_V2, HWID_MCATYPE(0xFD, 0x1) },
+ { SMCA_MPDACC, HWID_MCATYPE(0xBE, 0x0) },
{ SMCA_MPDMA, HWID_MCATYPE(0x01, 0x3) },
-
- /* Northbridge IO Unit MCA type */
+ { SMCA_MPM, HWID_MCATYPE(0xF9, 0x0) },
+ { SMCA_MPRAS, HWID_MCATYPE(0x12, 0x0) },
+ { SMCA_NBIF, HWID_MCATYPE(0x6C, 0x0) },
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) },
-
- /* PCI Express Unit MCA type */
+ { SMCA_PB, HWID_MCATYPE(0x05, 0x0) },
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) },
{ SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) },
-
- { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) },
- { SMCA_NBIF, HWID_MCATYPE(0x6C, 0x0) },
- { SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) },
+ { SMCA_PCIE_PL, HWID_MCATYPE(0x1E1, 0x0) },
+ { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) },
+ { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0) },
+ { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1) },
+ { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0) },
{ SMCA_SATA, HWID_MCATYPE(0xA8, 0x0) },
+ { SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) },
+ { SMCA_SMU, HWID_MCATYPE(0x01, 0x0) },
+ { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1) },
+ { SMCA_SSBDCI, HWID_MCATYPE(0x5C, 0x0) },
+ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0) },
+ { SMCA_UMC_V2, HWID_MCATYPE(0x96, 0x1) },
{ SMCA_USB, HWID_MCATYPE(0xAA, 0x0) },
- { SMCA_USR_DP, HWID_MCATYPE(0x170, 0x0) },
{ SMCA_USR_CP, HWID_MCATYPE(0x180, 0x0) },
- { SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) },
- { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) },
+ { SMCA_USR_DP, HWID_MCATYPE(0x170, 0x0) },
{ SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) },
- { SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) },
+ { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) },
+ { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) },
};
/*
@@ -241,7 +253,8 @@ struct threshold_block {
struct threshold_bank {
struct kobject *kobj;
- struct threshold_block *blocks;
+ /* List of threshold blocks within this MCA bank. */
+ struct list_head miscj;
};
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
@@ -252,9 +265,6 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
*/
static DEFINE_PER_CPU(u64, bank_map);
-/* Map of banks that have more than MCA_MISC0 available. */
-static DEFINE_PER_CPU(u64, smca_misc_banks_map);
-
static void amd_threshold_interrupt(void);
static void amd_deferred_error_interrupt(void);
@@ -264,30 +274,9 @@ static void default_deferred_error_interrupt(void)
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
-static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
-{
- u32 low, high;
-
- /*
- * For SMCA enabled processors, BLKPTR field of the first MISC register
- * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
- */
- if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- return;
-
- if (!(low & MCI_CONFIG_MCAX))
- return;
-
- if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
- return;
-
- if (low & MASK_BLKPTR_LO)
- per_cpu(smca_misc_banks_map, cpu) |= BIT_ULL(bank);
-
-}
-
static void smca_configure(unsigned int bank, unsigned int cpu)
{
+ struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data);
u8 *bank_counts = this_cpu_ptr(smca_bank_counts);
const struct smca_hwid *s_hwid;
unsigned int i, hwid_mcatype;
@@ -318,16 +307,36 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
* APIC based interrupt. First, check that no interrupt has been
* set.
*/
- if ((low & BIT(5)) && !((high >> 5) & 0x3))
+ if ((low & BIT(5)) && !((high >> 5) & 0x3) && data->dfr_intr_en) {
+ __set_bit(bank, data->dfr_intr_banks);
high |= BIT(5);
+ }
+
+ /*
+ * SMCA Corrected Error Interrupt
+ *
+ * MCA_CONFIG[IntPresent] is bit 10, and tells us if the bank can
+ * send an MCA Thresholding interrupt without the OS initializing
+ * this feature. This can be used if the threshold limit is managed
+ * by the platform.
+ *
+ * MCA_CONFIG[IntEn] is bit 40 (8 in the high portion of the MSR).
+ * The OS should set this to inform the platform that the OS is ready
+ * to handle the MCA Thresholding interrupt.
+ */
+ if ((low & BIT(10)) && data->thr_intr_en) {
+ __set_bit(bank, data->thr_intr_banks);
+ high |= BIT(8);
+ }
this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8));
+ if (low & MCI_CONFIG_PADDRV)
+ this_cpu_ptr(smca_banks)[bank].paddrv = 1;
+
wrmsr(smca_config, low, high);
}
- smca_set_misc_banks_map(bank, cpu);
-
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
return;
@@ -350,7 +359,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
struct thresh_restart {
struct threshold_block *b;
- int reset;
int set_lvt_off;
int lvt_off;
u16 old_limit;
@@ -395,6 +403,14 @@ static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
{
int msr = (hi & MASK_LVTOFF_HI) >> 20;
+ /*
+ * On SMCA CPUs, LVT offset is programmed at a different MSR, and
+ * the BIOS provides the value. The original field where LVT offset
+ * was set is reserved. Return early here:
+ */
+ if (mce_flags.smca)
+ return false;
+
if (apic < 0) {
pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
"for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
@@ -403,14 +419,6 @@ static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
}
if (apic != msr) {
- /*
- * On SMCA CPUs, LVT offset is programmed at a different MSR, and
- * the BIOS provides the value. The original field where LVT offset
- * was set is reserved. Return early here:
- */
- if (mce_flags.smca)
- return false;
-
pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
"for bank %d, block %d (MSR%08X=0x%x%08x)\n",
b->cpu, apic, b->bank, b->block, b->address, hi, lo);
@@ -420,8 +428,8 @@ static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
return true;
};
-/* Reprogram MCx_MISC MSR behind this threshold bank. */
-static void threshold_restart_bank(void *_tr)
+/* Reprogram MCx_MISC MSR behind this threshold block. */
+static void threshold_restart_block(void *_tr)
{
struct thresh_restart *tr = _tr;
u32 hi, lo;
@@ -432,13 +440,13 @@ static void threshold_restart_bank(void *_tr)
rdmsr(tr->b->address, lo, hi);
- if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
- tr->reset = 1; /* limit cannot be lower than err count */
-
- if (tr->reset) { /* reset err count and overflow bit */
- hi =
- (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
- (THRESHOLD_MAX - tr->b->threshold_limit);
+ /*
+ * Reset error count and overflow bit.
+ * This is done during init or after handling an interrupt.
+ */
+ if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) {
+ hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI);
+ hi |= THRESHOLD_MAX - tr->b->threshold_limit;
} else if (tr->old_limit) { /* change limit w/o reset */
int new_count = (hi & THRESHOLD_MAX) +
(tr->old_limit - tr->b->threshold_limit);
@@ -470,6 +478,36 @@ static void threshold_restart_bank(void *_tr)
wrmsr(tr->b->address, lo, hi);
}
+static void threshold_restart_bank(unsigned int bank, bool intr_en)
+{
+ struct threshold_bank **thr_banks = this_cpu_read(threshold_banks);
+ struct threshold_block *block, *tmp;
+ struct thresh_restart tr;
+
+ if (!thr_banks || !thr_banks[bank])
+ return;
+
+ memset(&tr, 0, sizeof(tr));
+
+ list_for_each_entry_safe(block, tmp, &thr_banks[bank]->miscj, miscj) {
+ tr.b = block;
+ tr.b->interrupt_enable = intr_en;
+ threshold_restart_block(&tr);
+ }
+}
+
+/* Try to use the threshold limit reported through APEI. */
+static u16 get_thr_limit(void)
+{
+ u32 thr_limit = mce_get_apei_thr_limit();
+
+ /* Fallback to old default if APEI limit is not available. */
+ if (!thr_limit)
+ return THRESHOLD_MAX;
+
+ return min(thr_limit, THRESHOLD_MAX);
+}
+
static void mce_threshold_block_init(struct threshold_block *b, int offset)
{
struct thresh_restart tr = {
@@ -478,66 +516,19 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset)
.lvt_off = offset,
};
- b->threshold_limit = THRESHOLD_MAX;
- threshold_restart_bank(&tr);
+ b->threshold_limit = get_thr_limit();
+ threshold_restart_block(&tr);
};
static int setup_APIC_mce_threshold(int reserved, int new)
{
if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
- APIC_EILVT_MSG_FIX, 0))
- return new;
-
- return reserved;
-}
-
-static int setup_APIC_deferred_error(int reserved, int new)
-{
- if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
- APIC_EILVT_MSG_FIX, 0))
+ APIC_DELIVERY_MODE_FIXED, 0))
return new;
return reserved;
}
-static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
-{
- u32 low = 0, high = 0;
- int def_offset = -1, def_new;
-
- if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
- return;
-
- def_new = (low & MASK_DEF_LVTOFF) >> 4;
- if (!(low & MASK_DEF_LVTOFF)) {
- pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
- def_new = DEF_LVT_OFF;
- low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
- }
-
- def_offset = setup_APIC_deferred_error(def_offset, def_new);
- if ((def_offset == def_new) &&
- (deferred_error_int_vector != amd_deferred_error_interrupt))
- deferred_error_int_vector = amd_deferred_error_interrupt;
-
- if (!mce_flags.smca)
- low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
-
- wrmsr(MSR_CU_DEF_ERR, low, high);
-}
-
-static u32 smca_get_block_address(unsigned int bank, unsigned int block,
- unsigned int cpu)
-{
- if (!block)
- return MSR_AMD64_SMCA_MCx_MISC(bank);
-
- if (!(per_cpu(smca_misc_banks_map, cpu) & BIT_ULL(bank)))
- return 0;
-
- return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
-}
-
static u32 get_block_address(u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block,
unsigned int cpu)
@@ -547,8 +538,15 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
return addr;
- if (mce_flags.smca)
- return smca_get_block_address(bank, block, cpu);
+ if (mce_flags.smca) {
+ if (!block)
+ return MSR_AMD64_SMCA_MCx_MISC(bank);
+
+ if (!(low & MASK_BLKPTR_LO))
+ return 0;
+
+ return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ }
/* Fall back to method we used for older processors: */
switch (block) {
@@ -566,12 +564,10 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
return addr;
}
-static int
-prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
- int offset, u32 misc_high)
+static int prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
+ int offset, u32 misc_high)
{
unsigned int cpu = smp_processor_id();
- u32 smca_low, smca_high;
struct threshold_block b;
int new;
@@ -588,20 +584,13 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
if (!b.interrupt_capable)
goto done;
+ __set_bit(bank, this_cpu_ptr(&mce_amd_data)->thr_intr_banks);
b.interrupt_enable = 1;
- if (!mce_flags.smca) {
- new = (misc_high & MASK_LVTOFF_HI) >> 20;
- goto set_offset;
- }
-
- /* Gather LVT offset for thresholding: */
- if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
- goto out;
-
- new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
+ if (mce_flags.smca)
+ goto done;
-set_offset:
+ new = (misc_high & MASK_LVTOFF_HI) >> 20;
offset = setup_APIC_mce_threshold(offset, new);
if (offset == new)
thresholding_irq_en = true;
@@ -609,7 +598,6 @@ set_offset:
done:
mce_threshold_block_init(&b, offset);
-out:
return offset;
}
@@ -618,6 +606,14 @@ bool amd_filter_mce(struct mce *m)
enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
struct cpuinfo_x86 *c = &boot_cpu_data;
+ /* Bogus hw errors on Cezanne A0. */
+ if (c->x86 == 0x19 &&
+ c->x86_model == 0x50 &&
+ c->x86_stepping == 0x0) {
+ if (!(m->status & MCI_STATUS_EN))
+ return true;
+ }
+
/* See Family 17h Models 10h-2Fh Erratum #1114. */
if (c->x86 == 0x17 &&
c->x86_model >= 0x10 && c->x86_model <= 0x2F &&
@@ -662,12 +658,12 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
return;
}
- rdmsrl(MSR_K7_HWCR, hwcr);
+ rdmsrq(MSR_K7_HWCR, hwcr);
/* McStatusWrEn has to be set */
need_toggle = !(hwcr & BIT(18));
if (need_toggle)
- wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
+ wrmsrq(MSR_K7_HWCR, hwcr | BIT(18));
/* Clear CntP bit safely */
for (i = 0; i < num_msrs; i++)
@@ -675,7 +671,55 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
/* restore old settings */
if (need_toggle)
- wrmsrl(MSR_K7_HWCR, hwcr);
+ wrmsrq(MSR_K7_HWCR, hwcr);
+}
+
+static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c)
+{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+
+ /* This should be disabled by the BIOS, but isn't always */
+ if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
+ /*
+ * disable GART TBL walk error reporting, which
+ * trips off incorrectly with the IOMMU & 3ware
+ * & Cerberus:
+ */
+ clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
+ }
+
+ /*
+ * Various K7s with broken bank 0 around. Always disable
+ * by default.
+ */
+ if (c->x86 == 6 && this_cpu_read(mce_num_banks))
+ mce_banks[0].ctl = 0;
+}
+
+/*
+ * Enable the APIC LVT interrupt vectors once per-CPU. This should be done before hardware is
+ * ready to send interrupts.
+ *
+ * Individual error sources are enabled later during per-bank init.
+ */
+static void smca_enable_interrupt_vectors(void)
+{
+ struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data);
+ u64 mca_intr_cfg, offset;
+
+ if (!mce_flags.smca || !mce_flags.succor)
+ return;
+
+ if (rdmsrq_safe(MSR_CU_DEF_ERR, &mca_intr_cfg))
+ return;
+
+ offset = (mca_intr_cfg & SMCA_THR_LVT_OFF) >> 12;
+ if (!setup_APIC_eilvt(offset, THRESHOLD_APIC_VECTOR, APIC_DELIVERY_MODE_FIXED, 0))
+ data->thr_intr_en = 1;
+
+ offset = (mca_intr_cfg & MASK_DEF_LVTOFF) >> 4;
+ if (!setup_APIC_eilvt(offset, DEFERRED_ERROR_VECTOR, APIC_DELIVERY_MODE_FIXED, 0))
+ data->dfr_intr_en = 1;
}
/* cpu init entry point, called from mce.c with preempt off */
@@ -685,11 +729,20 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
u32 low = 0, high = 0, address = 0;
int offset = -1;
+ amd_apply_cpu_quirks(c);
+
+ mce_flags.amd_threshold = 1;
+
+ smca_enable_interrupt_vectors();
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
- if (mce_flags.smca)
+ if (mce_flags.smca) {
smca_configure(bank, cpu);
+ if (!this_cpu_ptr(&mce_amd_data)->thr_intr_en)
+ continue;
+ }
+
disable_err_thresholding(c, bank);
for (block = 0; block < NR_BLOCKS; ++block) {
@@ -710,9 +763,12 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
offset = prepare_threshold_block(bank, block, address, offset, high);
}
}
+}
- if (mce_flags.succor)
- deferred_error_interrupt_enable(c);
+void smca_bsp_init(void)
+{
+ mce_threshold_vector = amd_threshold_interrupt;
+ deferred_error_int_vector = amd_deferred_error_interrupt;
}
/*
@@ -749,9 +805,9 @@ bool amd_mce_is_memory_error(struct mce *m)
}
/*
- * AMD systems do not have an explicit indicator that the value in MCA_ADDR is
- * a system physical address. Therefore, individual cases need to be detected.
- * Future cases and checks will be added as needed.
+ * Some AMD systems have an explicit indicator that the value in MCA_ADDR is a
+ * system physical address. Individual cases though, need to be detected for
+ * other systems. Future cases will be added as needed.
*
* 1) General case
* a) Assume address is not usable.
@@ -765,6 +821,8 @@ bool amd_mce_is_memory_error(struct mce *m)
* a) Reported in legacy bank 4 with extended error code (XEC) 8.
* b) MCA_STATUS[43] is *not* defined as poison in legacy bank 4. Therefore,
* this bit should not be checked.
+ * 4) MCI_STATUS_PADDRVAL is set
+ * a) Will provide a valid system physical address.
*
* NOTE: SMCA UMC memory errors fall into case #1.
*/
@@ -778,6 +836,9 @@ bool amd_mce_usable_address(struct mce *m)
return false;
}
+ if (this_cpu_ptr(smca_banks)[m->bank].paddrv)
+ return m->status & MCI_STATUS_PADDRV;
+
/* Check poison bit for all other bank types. */
if (m->status & MCI_STATUS_POISON)
return true;
@@ -786,37 +847,6 @@ bool amd_mce_usable_address(struct mce *m)
return false;
}
-static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
-{
- struct mce_hw_err err;
- struct mce *m = &err.m;
-
- mce_prep_record(&err);
-
- m->status = status;
- m->misc = misc;
- m->bank = bank;
- m->tsc = rdtsc();
-
- if (m->status & MCI_STATUS_ADDRV) {
- m->addr = addr;
-
- smca_extract_err_addr(m);
- }
-
- if (mce_flags.smca) {
- rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
-
- if (m->status & MCI_STATUS_SYNDV) {
- rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
- rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
- rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
- }
- }
-
- mce_log(&err);
-}
-
DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
{
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
@@ -826,103 +856,20 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
apic_eoi();
}
-/*
- * Returns true if the logged error is deferred. False, otherwise.
- */
-static inline bool
-_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
-{
- u64 status, addr = 0;
-
- rdmsrl(msr_stat, status);
- if (!(status & MCI_STATUS_VAL))
- return false;
-
- if (status & MCI_STATUS_ADDRV)
- rdmsrl(msr_addr, addr);
-
- __log_error(bank, status, addr, misc);
-
- wrmsrl(msr_stat, 0);
-
- return status & MCI_STATUS_DEFERRED;
-}
-
-static bool _log_error_deferred(unsigned int bank, u32 misc)
-{
- if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
- mca_msr_reg(bank, MCA_ADDR), misc))
- return false;
-
- /*
- * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers.
- * Return true here to avoid accessing these registers.
- */
- if (!mce_flags.smca)
- return true;
-
- /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
- wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
- return true;
-}
-
-/*
- * We have three scenarios for checking for Deferred errors:
- *
- * 1) Non-SMCA systems check MCA_STATUS and log error if found.
- * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
- * clear MCA_DESTAT.
- * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
- * log it.
- */
-static void log_error_deferred(unsigned int bank)
-{
- if (_log_error_deferred(bank, 0))
- return;
-
- /*
- * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
- * for a valid error.
- */
- _log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank),
- MSR_AMD64_SMCA_MCx_DEADDR(bank), 0);
-}
-
/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
- unsigned int bank;
-
- for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
- log_error_deferred(bank);
+ machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
}
-static void log_error_thresholding(unsigned int bank, u64 misc)
+void mce_amd_handle_storm(unsigned int bank, bool on)
{
- _log_error_deferred(bank, misc);
+ threshold_restart_bank(bank, on);
}
-static void log_and_reset_block(struct threshold_block *block)
+static void amd_reset_thr_limit(unsigned int bank)
{
- struct thresh_restart tr;
- u32 low = 0, high = 0;
-
- if (!block)
- return;
-
- if (rdmsr_safe(block->address, &low, &high))
- return;
-
- if (!(high & MASK_OVERFLOW_HI))
- return;
-
- /* Log the MCE which caused the threshold event. */
- log_error_thresholding(block->bank, ((u64)high << 32) | low);
-
- /* Reset threshold block after logging error. */
- memset(&tr, 0, sizeof(tr));
- tr.b = block;
- threshold_restart_bank(&tr);
+ threshold_restart_bank(bank, true);
}
/*
@@ -931,34 +878,27 @@ static void log_and_reset_block(struct threshold_block *block)
*/
static void amd_threshold_interrupt(void)
{
- struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
- struct threshold_bank **bp = this_cpu_read(threshold_banks);
- unsigned int bank, cpu = smp_processor_id();
-
- /*
- * Validate that the threshold bank has been initialized already. The
- * handler is installed at boot time, but on a hotplug event the
- * interrupt might fire before the data has been initialized.
- */
- if (!bp)
- return;
-
- for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
- if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank)))
- continue;
+ machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->thr_intr_banks);
+}
- first_block = bp[bank]->blocks;
- if (!first_block)
- continue;
+void amd_clear_bank(struct mce *m)
+{
+ amd_reset_thr_limit(m->bank);
+ if (mce_flags.smca) {
/*
- * The first block is also the head of the list. Check it first
- * before iterating over the rest.
+ * Clear MCA_DESTAT for all deferred errors even those
+ * logged in MCA_STATUS.
*/
- log_and_reset_block(first_block);
- list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj)
- log_and_reset_block(block);
+ if (m->status & MCI_STATUS_DEFERRED)
+ mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0);
+
+ /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */
+ if (m->kflags & MCE_CHECK_DFR_REGS)
+ return;
}
+
+ mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
}
/*
@@ -996,7 +936,7 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
memset(&tr, 0, sizeof(tr));
tr.b = b;
- if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1))
+ if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1))
return -ENODEV;
return size;
@@ -1021,7 +961,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
b->threshold_limit = new;
tr.b = b;
- if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1))
+ if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1))
return -ENODEV;
return size;
@@ -1113,13 +1053,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol
}
bank_type = smca_get_bank_type(cpu, bank);
- if (bank_type >= N_SMCA_BANK_TYPES)
- return NULL;
if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
- return NULL;
+ }
+
+ if (b && b->block) {
+ snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block);
+ return buf_mcatype;
+ }
+
+ if (bank_type >= N_SMCA_BANK_TYPES) {
+ snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank);
+ return buf_mcatype;
}
if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1)
@@ -1156,7 +1103,7 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
(high & MASK_LOCKED_HI))
goto recurse;
- b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
+ b = kzalloc_obj(struct threshold_block);
if (!b)
return -ENOMEM;
@@ -1166,7 +1113,7 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
b->address = address;
b->interrupt_enable = 0;
b->interrupt_capable = lvt_interrupt_supported(bank, high);
- b->threshold_limit = THRESHOLD_MAX;
+ b->threshold_limit = get_thr_limit();
if (b->interrupt_capable) {
default_attrs[2] = &interrupt_enable.attr;
@@ -1175,13 +1122,9 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb
default_attrs[2] = NULL;
}
- INIT_LIST_HEAD(&b->miscj);
+ list_add(&b->miscj, &tb->miscj);
- /* This is safe as @tb is not visible yet */
- if (tb->blocks)
- list_add(&b->miscj, &tb->blocks->miscj);
- else
- tb->blocks = b;
+ mce_threshold_block_init(b, (high & MASK_LVTOFF_HI) >> 20);
err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b));
if (err)
@@ -1219,7 +1162,7 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
if (!dev)
return -ENODEV;
- b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
+ b = kzalloc_obj(struct threshold_bank);
if (!b) {
err = -ENOMEM;
goto out;
@@ -1232,6 +1175,8 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
goto out_free;
}
+ INIT_LIST_HEAD(&b->miscj);
+
err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC));
if (err)
goto out_kobj;
@@ -1252,26 +1197,15 @@ static void threshold_block_release(struct kobject *kobj)
kfree(to_block(kobj));
}
-static void deallocate_threshold_blocks(struct threshold_bank *bank)
+static void threshold_remove_bank(struct threshold_bank *bank)
{
struct threshold_block *pos, *tmp;
- list_for_each_entry_safe(pos, tmp, &bank->blocks->miscj, miscj) {
+ list_for_each_entry_safe(pos, tmp, &bank->miscj, miscj) {
list_del(&pos->miscj);
kobject_put(&pos->kobj);
}
- kobject_put(&bank->blocks->kobj);
-}
-
-static void threshold_remove_bank(struct threshold_bank *bank)
-{
- if (!bank->blocks)
- goto out_free;
-
- deallocate_threshold_blocks(bank);
-
-out_free:
kobject_put(bank->kobj);
kfree(bank);
}
@@ -1290,12 +1224,12 @@ static void __threshold_remove_device(struct threshold_bank **bp)
kfree(bp);
}
-int mce_threshold_remove_device(unsigned int cpu)
+void mce_threshold_remove_device(unsigned int cpu)
{
struct threshold_bank **bp = this_cpu_read(threshold_banks);
if (!bp)
- return 0;
+ return;
/*
* Clear the pointer before cleaning up, so that the interrupt won't
@@ -1304,7 +1238,7 @@ int mce_threshold_remove_device(unsigned int cpu)
this_cpu_write(threshold_banks, NULL);
__threshold_remove_device(bp);
- return 0;
+ return;
}
/**
@@ -1318,36 +1252,34 @@ int mce_threshold_remove_device(unsigned int cpu)
* thread running on @cpu. The callback is invoked on all CPUs which are
* online when the callback is installed or during a real hotplug event.
*/
-int mce_threshold_create_device(unsigned int cpu)
+void mce_threshold_create_device(unsigned int cpu)
{
unsigned int numbanks, bank;
struct threshold_bank **bp;
- int err;
if (!mce_flags.amd_threshold)
- return 0;
+ return;
bp = this_cpu_read(threshold_banks);
if (bp)
- return 0;
+ return;
numbanks = this_cpu_read(mce_num_banks);
- bp = kcalloc(numbanks, sizeof(*bp), GFP_KERNEL);
+ bp = kzalloc_objs(*bp, numbanks);
if (!bp)
- return -ENOMEM;
+ return;
for (bank = 0; bank < numbanks; ++bank) {
if (!(this_cpu_read(bank_map) & BIT_ULL(bank)))
continue;
- err = threshold_create_bank(bp, cpu, bank);
- if (err) {
+ if (threshold_create_bank(bp, cpu, bank)) {
__threshold_remove_device(bp);
- return err;
+ return;
}
}
this_cpu_write(threshold_banks, bp);
if (thresholding_irq_en)
mce_threshold_vector = amd_threshold_interrupt;
- return 0;
+ return;
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 1f14c3308b6b..8dd424ac5de8 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -45,6 +45,7 @@
#include <linux/task_work.h>
#include <linux/hardirq.h>
#include <linux/kexec.h>
+#include <linux/vmcore_info.h>
#include <asm/fred.h>
#include <asm/cpu_device_id.h>
@@ -121,7 +122,7 @@ void mce_prep_record_common(struct mce *m)
{
m->cpuid = cpuid_eax(1);
m->cpuvendor = boot_cpu_data.x86_vendor;
- m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
+ m->mcgcap = native_rdmsrq(MSR_IA32_MCG_CAP);
/* need the internal __ version to avoid deadlocks */
m->time = __ktime_get_real_seconds();
}
@@ -388,9 +389,9 @@ void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr)
}
/* MSR access wrappers used for error injection */
-noinstr u64 mce_rdmsrl(u32 msr)
+noinstr u64 mce_rdmsrq(u32 msr)
{
- DECLARE_ARGS(val, low, high);
+ EAX_EDX_DECLARE_ARGS(val, low, high);
if (__this_cpu_read(injectm.finished)) {
int offset;
@@ -423,7 +424,7 @@ noinstr u64 mce_rdmsrl(u32 msr)
return EAX_EDX_VAL(val, low, high);
}
-static noinstr void mce_wrmsrl(u32 msr, u64 v)
+noinstr void mce_wrmsrq(u32 msr, u64 v)
{
u32 low, high;
@@ -444,7 +445,7 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v)
low = (u32)v;
high = (u32)(v >> 32);
- /* See comment in mce_rdmsrl() */
+ /* See comment in mce_rdmsrq() */
asm volatile("1: wrmsr\n"
"2:\n"
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR_IN_MCE)
@@ -468,7 +469,7 @@ static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs
instrumentation_end();
m = &err->m;
- m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+ m->mcgstatus = mce_rdmsrq(MSR_IA32_MCG_STATUS);
if (regs) {
/*
* Get the address of the instruction at the time of
@@ -488,7 +489,7 @@ static noinstr void mce_gather_info(struct mce_hw_err *err, struct pt_regs *regs
}
/* Use accurate RIP reporting if available. */
if (mca_cfg.rip_msr)
- m->ip = mce_rdmsrl(mca_cfg.rip_msr);
+ m->ip = mce_rdmsrq(mca_cfg.rip_msr);
}
}
@@ -684,10 +685,13 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
struct mce *m = &err->m;
if (m->status & MCI_STATUS_MISCV)
- m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC));
+ m->misc = mce_rdmsrq(mca_msr_reg(i, MCA_MISC));
if (m->status & MCI_STATUS_ADDRV) {
- m->addr = mce_rdmsrl(mca_msr_reg(i, MCA_ADDR));
+ if (m->kflags & MCE_CHECK_DFR_REGS)
+ m->addr = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DEADDR(i));
+ else
+ m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR));
/*
* Mask the reported address by the reported granularity.
@@ -702,12 +706,12 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
}
if (mce_flags.smca) {
- m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
+ m->ipid = mce_rdmsrq(MSR_AMD64_SMCA_MCx_IPID(i));
if (m->status & MCI_STATUS_SYNDV) {
- m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
- err->vendor.amd.synd1 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND1(i));
- err->vendor.amd.synd2 = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND2(i));
+ m->synd = mce_rdmsrq(MSR_AMD64_SMCA_MCx_SYND(i));
+ err->vendor.amd.synd1 = mce_rdmsrq(MSR_AMD64_SMCA_MCx_SYND1(i));
+ err->vendor.amd.synd2 = mce_rdmsrq(MSR_AMD64_SMCA_MCx_SYND2(i));
}
}
}
@@ -715,6 +719,86 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
DEFINE_PER_CPU(unsigned, mce_poll_count);
/*
+ * We have three scenarios for checking for Deferred errors:
+ *
+ * 1) Non-SMCA systems check MCA_STATUS and log error if found.
+ * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
+ * clear MCA_DESTAT.
+ * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
+ * log it.
+ */
+static bool smca_should_log_poll_error(struct mce *m)
+{
+ if (m->status & MCI_STATUS_VAL)
+ return true;
+
+ m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank));
+ if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) {
+ m->kflags |= MCE_CHECK_DFR_REGS;
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Newer Intel systems that support software error
+ * recovery need to make additional checks. Other
+ * CPUs should skip over uncorrected errors, but log
+ * everything else.
+ */
+static bool ser_should_log_poll_error(struct mce *m)
+{
+ /* Log "not enabled" (speculative) errors */
+ if (!(m->status & MCI_STATUS_EN))
+ return true;
+
+ /*
+ * Log UCNA (SDM: 15.6.3 "UCR Error Classification")
+ * UC == 1 && PCC == 0 && S == 0
+ */
+ if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
+ return true;
+
+ return false;
+}
+
+static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
+{
+ struct mce *m = &err->m;
+
+ if (mce_flags.smca)
+ return smca_should_log_poll_error(m);
+
+ /* If this entry is not valid, ignore it. */
+ if (!(m->status & MCI_STATUS_VAL))
+ return false;
+
+ /*
+ * If we are logging everything (at CPU online) or this
+ * is a corrected error, then we must log it.
+ */
+ if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
+ return true;
+
+ if (mca_cfg.ser)
+ return ser_should_log_poll_error(m);
+
+ if (m->status & MCI_STATUS_UC)
+ return false;
+
+ return true;
+}
+
+static void clear_bank(struct mce *m)
+{
+ if (m->cpuvendor == X86_VENDOR_AMD)
+ return amd_clear_bank(m);
+
+ mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
+}
+
+/*
* Poll for corrected events or events that happened before reset.
* Those are just logged through /dev/mcelog.
*
@@ -753,7 +837,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m->bank = i;
barrier();
- m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
+ m->status = mce_rdmsrq(mca_msr_reg(i, MCA_STATUS));
/*
* Update storm tracking here, before checking for the
@@ -765,51 +849,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
if (!mca_cfg.cmci_disabled)
mce_track_storm(m);
- /* If this entry is not valid, ignore it */
- if (!(m->status & MCI_STATUS_VAL))
+ /* Verify that the error should be logged based on hardware conditions. */
+ if (!should_log_poll_error(flags, &err))
continue;
- /*
- * If we are logging everything (at CPU online) or this
- * is a corrected error, then we must log it.
- */
- if ((flags & MCP_UC) || !(m->status & MCI_STATUS_UC))
- goto log_it;
-
- /*
- * Newer Intel systems that support software error
- * recovery need to make additional checks. Other
- * CPUs should skip over uncorrected errors, but log
- * everything else.
- */
- if (!mca_cfg.ser) {
- if (m->status & MCI_STATUS_UC)
- continue;
- goto log_it;
- }
-
- /* Log "not enabled" (speculative) errors */
- if (!(m->status & MCI_STATUS_EN))
- goto log_it;
-
- /*
- * Log UCNA (SDM: 15.6.3 "UCR Error Classification")
- * UC == 1 && PCC == 0 && S == 0
- */
- if (!(m->status & MCI_STATUS_PCC) && !(m->status & MCI_STATUS_S))
- goto log_it;
-
- /*
- * Skip anything else. Presumption is that our read of this
- * bank is racing with a machine check. Leave the log alone
- * for do_machine_check() to deal with it.
- */
- continue;
-
-log_it:
- if (flags & MCP_DONTLOG)
- goto clear_it;
-
mce_read_aux(&err, i);
m->severity = mce_severity(m, NULL, NULL, false);
/*
@@ -826,10 +869,7 @@ log_it:
mce_log(&err);
clear_it:
- /*
- * Clear state for this bank.
- */
- mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
+ clear_bank(m);
}
/*
@@ -887,8 +927,8 @@ quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
*/
static noinstr bool quirk_skylake_repmov(void)
{
- u64 mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
- u64 misc_enable = mce_rdmsrl(MSR_IA32_MISC_ENABLE);
+ u64 mcgstatus = mce_rdmsrq(MSR_IA32_MCG_STATUS);
+ u64 misc_enable = mce_rdmsrq(MSR_IA32_MISC_ENABLE);
u64 mc1_status;
/*
@@ -899,7 +939,7 @@ static noinstr bool quirk_skylake_repmov(void)
!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING))
return false;
- mc1_status = mce_rdmsrl(MSR_IA32_MCx_STATUS(1));
+ mc1_status = mce_rdmsrq(MSR_IA32_MCx_STATUS(1));
/* Check for a software-recoverable data fetch error. */
if ((mc1_status &
@@ -910,8 +950,8 @@ static noinstr bool quirk_skylake_repmov(void)
MCI_STATUS_ADDRV | MCI_STATUS_MISCV |
MCI_STATUS_AR | MCI_STATUS_S)) {
misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING;
- mce_wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
- mce_wrmsrl(MSR_IA32_MCx_STATUS(1), 0);
+ mce_wrmsrq(MSR_IA32_MISC_ENABLE, misc_enable);
+ mce_wrmsrq(MSR_IA32_MCx_STATUS(1), 0);
instrumentation_begin();
pr_err_once("Erratum detected, disable fast string copy instructions.\n");
@@ -955,7 +995,7 @@ static __always_inline int mce_no_way_out(struct mce_hw_err *err, char **msg, un
int i;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
- m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
+ m->status = mce_rdmsrq(mca_msr_reg(i, MCA_STATUS));
if (!(m->status & MCI_STATUS_VAL))
continue;
@@ -1274,7 +1314,7 @@ static __always_inline void mce_clear_state(unsigned long *toclear)
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
if (arch_test_bit(i, toclear))
- mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
+ mce_wrmsrq(mca_msr_reg(i, MCA_STATUS), 0);
}
}
@@ -1298,7 +1338,7 @@ static noinstr bool mce_check_crashing_cpu(void)
(crashing_cpu != -1 && crashing_cpu != cpu)) {
u64 mcgstatus;
- mcgstatus = __rdmsr(MSR_IA32_MCG_STATUS);
+ mcgstatus = native_rdmsrq(MSR_IA32_MCG_STATUS);
if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
if (mcgstatus & MCG_STATUS_LMCES)
@@ -1306,7 +1346,7 @@ static noinstr bool mce_check_crashing_cpu(void)
}
if (mcgstatus & MCG_STATUS_RIPV) {
- __wrmsr(MSR_IA32_MCG_STATUS, 0, 0);
+ native_wrmsrq(MSR_IA32_MCG_STATUS, 0);
return true;
}
}
@@ -1335,7 +1375,7 @@ __mc_scan_banks(struct mce_hw_err *err, struct pt_regs *regs,
m->addr = 0;
m->bank = i;
- m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
+ m->status = mce_rdmsrq(mca_msr_reg(i, MCA_STATUS));
if (!(m->status & MCI_STATUS_VAL))
continue;
@@ -1690,10 +1730,13 @@ noinstr void do_machine_check(struct pt_regs *regs)
}
out:
+ /* Given it didn't panic, mark it as recoverable */
+ hwerr_log_error_type(HWERR_RECOV_OTHERS);
+
instrumentation_end();
clear:
- mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+ mce_wrmsrq(MSR_IA32_MCG_STATUS, 0);
}
EXPORT_SYMBOL_GPL(do_machine_check);
@@ -1740,6 +1783,11 @@ static void mc_poll_banks_default(void)
void (*mc_poll_banks)(void) = mc_poll_banks_default;
+static bool should_enable_timer(unsigned long iv)
+{
+ return !mca_cfg.ignore_ce && iv;
+}
+
static void mce_timer_fn(struct timer_list *t)
{
struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
@@ -1763,7 +1811,7 @@ static void mce_timer_fn(struct timer_list *t)
if (mce_get_storm_mode()) {
__start_timer(t, HZ);
- } else {
+ } else if (should_enable_timer(iv)) {
__this_cpu_write(mce_next_interval, iv);
__start_timer(t, iv);
}
@@ -1786,13 +1834,13 @@ void mce_timer_kick(bool storm)
__this_cpu_write(mce_next_interval, check_interval * HZ);
}
-/* Must not be called in IRQ context where del_timer_sync() can deadlock */
+/* Must not be called in IRQ context where timer_delete_sync() can deadlock */
static void mce_timer_delete_all(void)
{
int cpu;
for_each_online_cpu(cpu)
- del_timer_sync(&per_cpu(mce_timer, cpu));
+ timer_delete_sync(&per_cpu(mce_timer, cpu));
}
static void __mcheck_cpu_mce_banks_init(void)
@@ -1805,9 +1853,10 @@ static void __mcheck_cpu_mce_banks_init(void)
struct mce_bank *b = &mce_banks[i];
/*
- * Init them all, __mcheck_cpu_apply_quirks() is going to apply
- * the required vendor quirks before
- * __mcheck_cpu_init_clear_banks() does the final bank setup.
+ * Init them all by default.
+ *
+ * The required vendor quirks will be applied before
+ * __mcheck_cpu_init_prepare_banks() does the final bank setup.
*/
b->ctl = -1ULL;
b->init = true;
@@ -1822,7 +1871,7 @@ static void __mcheck_cpu_cap_init(void)
u64 cap;
u8 b;
- rdmsrl(MSR_IA32_MCG_CAP, cap);
+ rdmsrq(MSR_IA32_MCG_CAP, cap);
b = cap & MCG_BANKCNT_MASK;
@@ -1835,69 +1884,34 @@ static void __mcheck_cpu_cap_init(void)
this_cpu_write(mce_num_banks, b);
__mcheck_cpu_mce_banks_init();
-
- /* Use accurate RIP reporting if available. */
- if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
- mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
-
- if (cap & MCG_SER_P)
- mca_cfg.ser = 1;
}
static void __mcheck_cpu_init_generic(void)
{
- enum mcp_flags m_fl = 0;
- mce_banks_t all_banks;
u64 cap;
- if (!mca_cfg.bootlog)
- m_fl = MCP_DONTLOG;
-
- /*
- * Log the machine checks left over from the previous reset. Log them
- * only, do not start processing them. That will happen in mcheck_late_init()
- * when all consumers have been registered on the notifier chain.
- */
- bitmap_fill(all_banks, MAX_NR_BANKS);
- machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks);
-
- cr4_set_bits(X86_CR4_MCE);
-
- rdmsrl(MSR_IA32_MCG_CAP, cap);
+ rdmsrq(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
}
-static void __mcheck_cpu_init_clear_banks(void)
+static void __mcheck_cpu_init_prepare_banks(void)
{
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+ u64 msrval;
int i;
- for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
- struct mce_bank *b = &mce_banks[i];
+ /*
+ * Log the machine checks left over from the previous reset. Log them
+ * only, do not start processing them. That will happen in mcheck_late_init()
+ * when all consumers have been registered on the notifier chain.
+ */
+ if (mca_cfg.bootlog) {
+ mce_banks_t all_banks;
- if (!b->init)
- continue;
- wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl);
- wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
+ bitmap_fill(all_banks, MAX_NR_BANKS);
+ machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks);
}
-}
-
-/*
- * Do a final check to see if there are any unused/RAZ banks.
- *
- * This must be done after the banks have been initialized and any quirks have
- * been applied.
- *
- * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs.
- * Otherwise, a user who disables a bank will not be able to re-enable it
- * without a system reboot.
- */
-static void __mcheck_cpu_check_banks(void)
-{
- struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
- u64 msrval;
- int i;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
struct mce_bank *b = &mce_banks[i];
@@ -1905,25 +1919,16 @@ static void __mcheck_cpu_check_banks(void)
if (!b->init)
continue;
- rdmsrl(mca_msr_reg(i, MCA_CTL), msrval);
+ wrmsrq(mca_msr_reg(i, MCA_CTL), b->ctl);
+ wrmsrq(mca_msr_reg(i, MCA_STATUS), 0);
+
+ rdmsrq(mca_msr_reg(i, MCA_CTL), msrval);
b->init = !!msrval;
}
}
-static void apply_quirks_amd(struct cpuinfo_x86 *c)
+static void amd_apply_global_quirks(struct cpuinfo_x86 *c)
{
- struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
-
- /* This should be disabled by the BIOS, but isn't always */
- if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
- /*
- * disable GART TBL walk error reporting, which
- * trips off incorrectly with the IOMMU & 3ware
- * & Cerberus:
- */
- clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
- }
-
if (c->x86 < 0x11 && mca_cfg.bootlog < 0) {
/*
* Lots of broken BIOS around that don't clear them
@@ -1933,13 +1938,6 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
}
/*
- * Various K7s with broken bank 0 around. Always disable
- * by default.
- */
- if (c->x86 == 6 && this_cpu_read(mce_num_banks))
- mce_banks[0].ctl = 0;
-
- /*
* overflow_recov is supported for F15h Models 00h-0fh
* even though we don't have a CPUID bit for it.
*/
@@ -1950,26 +1948,13 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
mce_flags.zen_ifu_quirk = 1;
}
-static void apply_quirks_intel(struct cpuinfo_x86 *c)
+static void intel_apply_global_quirks(struct cpuinfo_x86 *c)
{
- struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
-
/* Older CPUs (prior to family 6) don't need quirks. */
if (c->x86_vfm < INTEL_PENTIUM_PRO)
return;
/*
- * SDM documents that on family 6 bank 0 should not be written
- * because it aliases to another special BIOS controlled
- * register.
- * But it's not aliased anymore on model 0x1a+
- * Don't ignore bank 0 completely because there could be a
- * valid event later, merely don't write CTL0.
- */
- if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
- mce_banks[0].init = false;
-
- /*
* All newer Intel systems support MCE broadcasting. Enable
* synchronization with a one second timeout.
*/
@@ -1994,7 +1979,7 @@ static void apply_quirks_intel(struct cpuinfo_x86 *c)
mce_flags.skx_repmov_quirk = 1;
}
-static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
+static void zhaoxin_apply_global_quirks(struct cpuinfo_x86 *c)
{
/*
* All newer Zhaoxin CPUs support MCE broadcasting. Enable
@@ -2006,34 +1991,6 @@ static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
}
}
-/* Add per CPU specific workarounds here */
-static bool __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
-{
- struct mca_config *cfg = &mca_cfg;
-
- switch (c->x86_vendor) {
- case X86_VENDOR_UNKNOWN:
- pr_info("unknown CPU type - not enabling MCE support\n");
- return false;
- case X86_VENDOR_AMD:
- apply_quirks_amd(c);
- break;
- case X86_VENDOR_INTEL:
- apply_quirks_intel(c);
- break;
- case X86_VENDOR_ZHAOXIN:
- apply_quirks_zhaoxin(c);
- break;
- }
-
- if (cfg->monarch_timeout < 0)
- cfg->monarch_timeout = 0;
- if (cfg->bootlog != 0)
- cfg->panic_timeout = 30;
-
- return true;
-}
-
static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
if (c->x86 != 5)
@@ -2055,19 +2012,6 @@ static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
return false;
}
-/*
- * Init basic CPU features needed for early decoding of MCEs.
- */
-static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
-{
- if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) {
- mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
- mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
- mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
- mce_flags.amd_threshold = 1;
- }
-}
-
static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
{
struct mca_config *cfg = &mca_cfg;
@@ -2156,11 +2100,10 @@ static void mce_start_timer(struct timer_list *t)
{
unsigned long iv = check_interval * HZ;
- if (mca_cfg.ignore_ce || !iv)
- return;
-
- this_cpu_write(mce_next_interval, iv);
- __start_timer(t, iv);
+ if (should_enable_timer(iv)) {
+ this_cpu_write(mce_next_interval, iv);
+ __start_timer(t, iv);
+ }
}
static void __mcheck_cpu_setup_timer(void)
@@ -2277,6 +2220,53 @@ DEFINE_IDTENTRY_RAW(exc_machine_check)
}
#endif
+void mca_bsp_init(struct cpuinfo_x86 *c)
+{
+ u64 cap;
+
+ if (!mce_available(c))
+ return;
+
+ if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
+ mca_cfg.disabled = 1;
+ pr_info("unknown CPU type - not enabling MCE support\n");
+ return;
+ }
+
+ mce_flags.overflow_recov = cpu_feature_enabled(X86_FEATURE_OVERFLOW_RECOV);
+ mce_flags.succor = cpu_feature_enabled(X86_FEATURE_SUCCOR);
+ mce_flags.smca = cpu_feature_enabled(X86_FEATURE_SMCA);
+
+ if (mce_flags.smca)
+ smca_bsp_init();
+
+ rdmsrq(MSR_IA32_MCG_CAP, cap);
+
+ /* Use accurate RIP reporting if available. */
+ if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
+ mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
+
+ if (cap & MCG_SER_P)
+ mca_cfg.ser = 1;
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_AMD:
+ amd_apply_global_quirks(c);
+ break;
+ case X86_VENDOR_INTEL:
+ intel_apply_global_quirks(c);
+ break;
+ case X86_VENDOR_ZHAOXIN:
+ zhaoxin_apply_global_quirks(c);
+ break;
+ }
+
+ if (mca_cfg.monarch_timeout < 0)
+ mca_cfg.monarch_timeout = 0;
+ if (mca_cfg.bootlog != 0)
+ mca_cfg.panic_timeout = 30;
+}
+
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
@@ -2294,11 +2284,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_cap_init();
- if (!__mcheck_cpu_apply_quirks(c)) {
- mca_cfg.disabled = 1;
- return;
- }
-
if (!mce_gen_pool_init()) {
mca_cfg.disabled = 1;
pr_emerg("Couldn't allocate MCE records pool!\n");
@@ -2307,12 +2292,11 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
mca_cfg.initialized = 1;
- __mcheck_cpu_init_early(c);
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
- __mcheck_cpu_init_clear_banks();
- __mcheck_cpu_check_banks();
+ __mcheck_cpu_init_prepare_banks();
__mcheck_cpu_setup_timer();
+ cr4_set_bits(X86_CR4_MCE);
}
/*
@@ -2436,7 +2420,7 @@ static void mce_disable_error_reporting(void)
struct mce_bank *b = &mce_banks[i];
if (b->init)
- wrmsrl(mca_msr_reg(i, MCA_CTL), 0);
+ wrmsrq(mca_msr_reg(i, MCA_CTL), 0);
}
return;
}
@@ -2459,13 +2443,13 @@ static void vendor_disable_error_reporting(void)
mce_disable_error_reporting();
}
-static int mce_syscore_suspend(void)
+static int mce_syscore_suspend(void *data)
{
vendor_disable_error_reporting();
return 0;
}
-static void mce_syscore_shutdown(void)
+static void mce_syscore_shutdown(void *data)
{
vendor_disable_error_reporting();
}
@@ -2475,19 +2459,24 @@ static void mce_syscore_shutdown(void)
* Only one CPU is active at this time, the others get re-added later using
* CPU hotplug:
*/
-static void mce_syscore_resume(void)
+static void mce_syscore_resume(void *data)
{
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
- __mcheck_cpu_init_clear_banks();
+ __mcheck_cpu_init_prepare_banks();
+ cr4_set_bits(X86_CR4_MCE);
}
-static struct syscore_ops mce_syscore_ops = {
+static const struct syscore_ops mce_syscore_ops = {
.suspend = mce_syscore_suspend,
.shutdown = mce_syscore_shutdown,
.resume = mce_syscore_resume,
};
+static struct syscore mce_syscore = {
+ .ops = &mce_syscore_ops,
+};
+
/*
* mce_device: Sysfs support
*/
@@ -2497,8 +2486,9 @@ static void mce_cpu_restart(void *data)
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
__mcheck_cpu_init_generic();
- __mcheck_cpu_init_clear_banks();
+ __mcheck_cpu_init_prepare_banks();
__mcheck_cpu_init_timer();
+ cr4_set_bits(X86_CR4_MCE);
}
/* Reinit MCEs after user configuration changes */
@@ -2702,7 +2692,7 @@ static int mce_device_create(unsigned int cpu)
if (dev)
return 0;
- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ dev = kzalloc_obj(*dev);
if (!dev)
return -ENOMEM;
dev->id = cpu;
@@ -2786,7 +2776,7 @@ static void mce_reenable_cpu(void)
struct mce_bank *b = &mce_banks[i];
if (b->init)
- wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl);
+ wrmsrq(mca_msr_reg(i, MCA_CTL), b->ctl);
}
}
@@ -2801,15 +2791,9 @@ static int mce_cpu_dead(unsigned int cpu)
static int mce_cpu_online(unsigned int cpu)
{
struct timer_list *t = this_cpu_ptr(&mce_timer);
- int ret;
mce_device_create(cpu);
-
- ret = mce_threshold_create_device(cpu);
- if (ret) {
- mce_device_remove(cpu);
- return ret;
- }
+ mce_threshold_create_device(cpu);
mce_reenable_cpu();
mce_start_timer(t);
return 0;
@@ -2820,7 +2804,7 @@ static int mce_cpu_pre_down(unsigned int cpu)
struct timer_list *t = this_cpu_ptr(&mce_timer);
mce_disable_cpu();
- del_timer_sync(t);
+ timer_delete_sync(t);
mce_threshold_remove_device(cpu);
mce_device_remove(cpu);
return 0;
@@ -2893,7 +2877,7 @@ static __init int mcheck_init_device(void)
if (err < 0)
goto err_out_online;
- register_syscore_ops(&mce_syscore_ops);
+ register_syscore(&mce_syscore);
return 0;
diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c
index 8d023239ce18..053555206d81 100644
--- a/arch/x86/kernel/cpu/mce/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c
@@ -338,7 +338,7 @@ static __init int dev_mcelog_init_device(void)
int err;
mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus());
- mcelog = kzalloc(struct_size(mcelog, entry, mce_log_len), GFP_KERNEL);
+ mcelog = kzalloc_flex(*mcelog, entry, mce_log_len);
if (!mcelog)
return -ENOMEM;
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 06e3cf7229ce..d02c4f556cd0 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -24,10 +24,11 @@
#include <linux/pci.h>
#include <linux/uaccess.h>
-#include <asm/amd_nb.h>
+#include <asm/amd/nb.h>
#include <asm/apic.h>
#include <asm/irq_vectors.h>
#include <asm/mce.h>
+#include <asm/msr.h>
#include <asm/nmi.h>
#include <asm/smp.h>
@@ -475,27 +476,27 @@ static void prepare_msrs(void *info)
struct mce m = *(struct mce *)info;
u8 b = m.bank;
- wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+ wrmsrq(MSR_IA32_MCG_STATUS, m.mcgstatus);
if (boot_cpu_has(X86_FEATURE_SMCA)) {
if (m.inject_flags == DFR_INT_INJ) {
- wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
- wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
+ wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
+ wrmsrq(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
} else {
- wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
- wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
+ wrmsrq(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
+ wrmsrq(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
}
- wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
+ wrmsrq(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
if (m.misc)
- wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
+ wrmsrq(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
} else {
- wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
- wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
+ wrmsrq(MSR_IA32_MCx_STATUS(b), m.status);
+ wrmsrq(MSR_IA32_MCx_ADDR(b), m.addr);
if (m.misc)
- wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
+ wrmsrq(MSR_IA32_MCx_MISC(b), m.misc);
}
}
@@ -589,7 +590,7 @@ static int inj_bank_set(void *data, u64 val)
u64 cap;
/* Get bank count on target CPU so we can handle non-uniform values. */
- rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
+ rdmsrq_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
n_banks = cap & MCG_BANKCNT_MASK;
if (val >= n_banks) {
@@ -613,7 +614,7 @@ static int inj_bank_set(void *data, u64 val)
if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
u64 ipid;
- if (rdmsrl_on_cpu(m->extcpu, MSR_AMD64_SMCA_MCx_IPID(val), &ipid)) {
+ if (rdmsrq_on_cpu(m->extcpu, MSR_AMD64_SMCA_MCx_IPID(val), &ipid)) {
pr_err("Error reading IPID on CPU%d\n", m->extcpu);
return -EINVAL;
}
@@ -741,15 +742,15 @@ static void check_hw_inj_possible(void)
u64 status = MCI_STATUS_VAL, ipid;
/* Check whether bank is populated */
- rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), ipid);
+ rdmsrq(MSR_AMD64_SMCA_MCx_IPID(bank), ipid);
if (!ipid)
continue;
toggle_hw_mce_inject(cpu, true);
- wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), status);
- rdmsrl_safe(mca_msr_reg(bank, MCA_STATUS), &status);
- wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), 0);
+ wrmsrq_safe(mca_msr_reg(bank, MCA_STATUS), status);
+ rdmsrq_safe(mca_msr_reg(bank, MCA_STATUS), &status);
+ wrmsrq_safe(mca_msr_reg(bank, MCA_STATUS), 0);
if (!status) {
hw_injection_possible = false;
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index f863df0ff42c..4655223ba560 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -94,7 +94,7 @@ static bool cmci_supported(int *banks)
if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
return false;
- rdmsrl(MSR_IA32_MCG_CAP, cap);
+ rdmsrq(MSR_IA32_MCG_CAP, cap);
*banks = min_t(unsigned, MAX_NR_BANKS, cap & MCG_BANKCNT_MASK);
return !!(cap & MCG_CMCI_P);
}
@@ -106,7 +106,7 @@ static bool lmce_supported(void)
if (mca_cfg.lmce_disabled)
return false;
- rdmsrl(MSR_IA32_MCG_CAP, tmp);
+ rdmsrq(MSR_IA32_MCG_CAP, tmp);
/*
* LMCE depends on recovery support in the processor. Hence both
@@ -123,7 +123,7 @@ static bool lmce_supported(void)
* WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally
* locks the MSR in the event that it wasn't already locked by BIOS.
*/
- rdmsrl(MSR_IA32_FEAT_CTL, tmp);
+ rdmsrq(MSR_IA32_FEAT_CTL, tmp);
if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED)))
return false;
@@ -141,9 +141,9 @@ static void cmci_set_threshold(int bank, int thresh)
u64 val;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
- rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
- wrmsrl(MSR_IA32_MCx_CTL2(bank), val | thresh);
+ wrmsrq(MSR_IA32_MCx_CTL2(bank), val | thresh);
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
@@ -184,7 +184,7 @@ static bool cmci_skip_bank(int bank, u64 *val)
if (test_bit(bank, mce_banks_ce_disabled))
return true;
- rdmsrl(MSR_IA32_MCx_CTL2(bank), *val);
+ rdmsrq(MSR_IA32_MCx_CTL2(bank), *val);
/* Already owned by someone else? */
if (*val & MCI_CTL2_CMCI_EN) {
@@ -232,8 +232,8 @@ static void cmci_claim_bank(int bank, u64 val, int bios_zero_thresh, int *bios_w
struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
val |= MCI_CTL2_CMCI_EN;
- wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
- rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ wrmsrq(MSR_IA32_MCx_CTL2(bank), val);
+ rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
/* If the enable bit did not stick, this bank should be polled. */
if (!(val & MCI_CTL2_CMCI_EN)) {
@@ -324,9 +324,9 @@ static void __cmci_disable_bank(int bank)
if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
return;
- rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ rdmsrq(MSR_IA32_MCx_CTL2(bank), val);
val &= ~MCI_CTL2_CMCI_EN;
- wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ wrmsrq(MSR_IA32_MCx_CTL2(bank), val);
__clear_bit(bank, this_cpu_ptr(mce_banks_owned));
if ((val & MCI_CTL2_CMCI_THRESHOLD_MASK) == CMCI_STORM_THRESHOLD)
@@ -430,10 +430,10 @@ void intel_init_lmce(void)
if (!lmce_supported())
return;
- rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
+ rdmsrq(MSR_IA32_MCG_EXT_CTL, val);
if (!(val & MCG_EXT_CTL_LMCE_EN))
- wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
+ wrmsrq(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
}
void intel_clear_lmce(void)
@@ -443,9 +443,9 @@ void intel_clear_lmce(void)
if (!lmce_supported())
return;
- rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
+ rdmsrq(MSR_IA32_MCG_EXT_CTL, val);
val &= ~MCG_EXT_CTL_LMCE_EN;
- wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
+ wrmsrq(MSR_IA32_MCG_EXT_CTL, val);
}
/*
@@ -460,16 +460,34 @@ static void intel_imc_init(struct cpuinfo_x86 *c)
case INTEL_SANDYBRIDGE_X:
case INTEL_IVYBRIDGE_X:
case INTEL_HASWELL_X:
- if (rdmsrl_safe(MSR_ERROR_CONTROL, &error_control))
+ if (rdmsrq_safe(MSR_ERROR_CONTROL, &error_control))
return;
error_control |= 2;
- wrmsrl_safe(MSR_ERROR_CONTROL, error_control);
+ wrmsrq_safe(MSR_ERROR_CONTROL, error_control);
break;
}
}
+static void intel_apply_cpu_quirks(struct cpuinfo_x86 *c)
+{
+ /*
+ * SDM documents that on family 6 bank 0 should not be written
+ * because it aliases to another special BIOS controlled
+ * register.
+ * But it's not aliased anymore on model 0x1a+
+ * Don't ignore bank 0 completely because there could be a
+ * valid event later, merely don't write CTL0.
+ *
+ * Older CPUs (prior to family 6) can't reach this point and already
+ * return early due to the check of __mcheck_cpu_ancient_init().
+ */
+ if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
+ this_cpu_ptr(mce_banks_array)[0].init = false;
+}
+
void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
+ intel_apply_cpu_quirks(c);
intel_init_cmci();
intel_init_lmce();
intel_imc_init(c);
@@ -478,6 +496,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c)
void mce_intel_feature_clear(struct cpuinfo_x86 *c)
{
intel_clear_lmce();
+ cmci_clear();
}
bool intel_filter_mce(struct mce *m)
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 95a504ece43e..a31cf984619c 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -67,6 +67,7 @@ void mce_track_storm(struct mce *mce);
void mce_inherit_storm(unsigned int bank);
bool mce_get_storm_mode(void);
void mce_set_storm_mode(bool storm);
+u32 mce_get_apei_thr_limit(void);
#else
static inline void cmci_storm_begin(unsigned int bank) {}
static inline void cmci_storm_end(unsigned int bank) {}
@@ -74,6 +75,7 @@ static inline void mce_track_storm(struct mce *mce) {}
static inline void mce_inherit_storm(unsigned int bank) {}
static inline bool mce_get_storm_mode(void) { return false; }
static inline void mce_set_storm_mode(bool storm) {}
+static inline u32 mce_get_apei_thr_limit(void) { return 0; }
#endif
/*
@@ -265,8 +267,12 @@ void mce_prep_record_common(struct mce *m);
void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m);
#ifdef CONFIG_X86_MCE_AMD
+void mce_threshold_create_device(unsigned int cpu);
+void mce_threshold_remove_device(unsigned int cpu);
+void mce_amd_handle_storm(unsigned int bank, bool on);
extern bool amd_filter_mce(struct mce *m);
bool amd_mce_usable_address(struct mce *m);
+void amd_clear_bank(struct mce *m);
/*
* If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits
@@ -292,10 +298,16 @@ static __always_inline void smca_extract_err_addr(struct mce *m)
m->addr &= GENMASK_ULL(55, lsb);
}
+void smca_bsp_init(void);
#else
+static inline void mce_threshold_create_device(unsigned int cpu) { }
+static inline void mce_threshold_remove_device(unsigned int cpu) { }
+static inline void mce_amd_handle_storm(unsigned int bank, bool on) { }
static inline bool amd_filter_mce(struct mce *m) { return false; }
static inline bool amd_mce_usable_address(struct mce *m) { return false; }
+static inline void amd_clear_bank(struct mce *m) { }
static inline void smca_extract_err_addr(struct mce *m) { }
+static inline void smca_bsp_init(void) { }
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
@@ -312,7 +324,8 @@ static __always_inline void pentium_machine_check(struct pt_regs *regs) {}
static __always_inline void winchip_machine_check(struct pt_regs *regs) {}
#endif
-noinstr u64 mce_rdmsrl(u32 msr);
+noinstr u64 mce_rdmsrq(u32 msr);
+noinstr void mce_wrmsrq(u32 msr, u64 v);
static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
{
diff --git a/arch/x86/kernel/cpu/mce/threshold.c b/arch/x86/kernel/cpu/mce/threshold.c
index f4a007616468..0d13c9ffcba0 100644
--- a/arch/x86/kernel/cpu/mce/threshold.c
+++ b/arch/x86/kernel/cpu/mce/threshold.c
@@ -13,6 +13,19 @@
#include "internal.h"
+static u32 mce_apei_thr_limit;
+
+void mce_save_apei_thr_limit(u32 thr_limit)
+{
+ mce_apei_thr_limit = thr_limit;
+ pr_info("HEST corrected error threshold limit: %u\n", thr_limit);
+}
+
+u32 mce_get_apei_thr_limit(void)
+{
+ return mce_apei_thr_limit;
+}
+
static void default_threshold_interrupt(void)
{
pr_err("Unexpected threshold interrupt at vector %x\n",
@@ -63,6 +76,9 @@ static void mce_handle_storm(unsigned int bank, bool on)
case X86_VENDOR_INTEL:
mce_intel_handle_storm(bank, on);
break;
+ case X86_VENDOR_AMD:
+ mce_amd_handle_storm(bank, on);
+ break;
}
}
@@ -85,7 +101,8 @@ void cmci_storm_end(unsigned int bank)
{
struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
- __clear_bit(bank, this_cpu_ptr(mce_poll_banks));
+ if (!mce_flags.amd_threshold)
+ __clear_bit(bank, this_cpu_ptr(mce_poll_banks));
storm->banks[bank].history = 0;
storm->banks[bank].in_storm_mode = false;