summaryrefslogtreecommitdiff
path: root/drivers/acpi/apei/ghes.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-12 14:47:31 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-12 14:47:31 -0800
commit7adcadb98405cb4ef56b2518164026c1069d8669 (patch)
tree0afb774a1872aabc016a65ee76342fce4a4c6f57 /drivers/acpi/apei/ghes.c
parent40deb5e41ac783d49371940581db2ae108a754d1 (diff)
parent3919430fe93bcfad5e34cdbb4d81cd17b3bbd27a (diff)
downloadlwn-7adcadb98405cb4ef56b2518164026c1069d8669.tar.gz
lwn-7adcadb98405cb4ef56b2518164026c1069d8669.zip
Merge tag 'edac_updates_for_6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull EDAC updates from Borislav Petkov: - Make ghes_edac a simple module like the rest of the EDAC drivers and drop the forced built-in only configuration by disentangling it from GHES (Jia He) - The usual small cleanups and improvements all over EDAC land * tag 'edac_updates_for_6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: EDAC/i10nm: fix refcount leak in pci_get_dev_wrapper() EDAC/i5400: Fix typo in comment: vaious -> various EDAC/mc_sysfs: Increase legacy channel support to 12 MAINTAINERS: Make Mauro EDAC reviewer MAINTAINERS: Make Manivannan Sadhasivam the maintainer of qcom_edac EDAC/igen6: Return the correct error type when not the MC owner apei/ghes: Use xchg_release() for updating new cache slot instead of cmpxchg() EDAC: Check for GHES preference in the chipset-specific EDAC drivers EDAC/ghes: Make ghes_edac a proper module EDAC/ghes: Prepare to make ghes_edac a proper module EDAC/ghes: Add a notifier for reporting memory errors efi/cper: Export several helpers for ghes_edac to use EDAC/i5000: Mark as BROKEN
Diffstat (limited to 'drivers/acpi/apei/ghes.c')
-rw-r--r--drivers/acpi/apei/ghes.c66
1 files changed, 63 insertions, 3 deletions
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 94c4872ae55f..066dc1f5c235 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -94,6 +94,8 @@
#define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses
#endif
+static ATOMIC_NOTIFIER_HEAD(ghes_report_chain);
+
static inline bool is_hest_type_generic_v2(struct ghes *ghes)
{
return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
@@ -108,6 +110,13 @@ bool ghes_disable;
module_param_named(disable, ghes_disable, bool, 0);
/*
+ * "ghes.edac_force_enable" forcibly enables ghes_edac and skips the platform
+ * check.
+ */
+static bool ghes_edac_force_enable;
+module_param_named(edac_force_enable, ghes_edac_force_enable, bool, 0);
+
+/*
* All error sources notified with HED (Hardware Error Device) share a
* single notifier callback, so they need to be linked and checked one
* by one. This holds true for NMI too.
@@ -119,6 +128,13 @@ static LIST_HEAD(ghes_hed);
static DEFINE_MUTEX(ghes_list_mutex);
/*
+ * A list of GHES devices which are given to the corresponding EDAC driver
+ * ghes_edac for further use.
+ */
+static LIST_HEAD(ghes_devs);
+static DEFINE_MUTEX(ghes_devs_mutex);
+
+/*
* Because the memory area used to transfer hardware error information
* from BIOS to Linux can be determined only in NMI, IRQ or timer
* handler, but general ioremap can not be used in atomic context, so
@@ -645,7 +661,7 @@ static bool ghes_do_proc(struct ghes *ghes,
if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
- ghes_edac_report_mem_error(sev, mem_err);
+ atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err);
arch_apei_report_mem_error(sev, mem_err);
queued = ghes_handle_memory_failure(gdata, sev);
@@ -1382,7 +1398,11 @@ static int ghes_probe(struct platform_device *ghes_dev)
platform_set_drvdata(ghes_dev, ghes);
- ghes_edac_register(ghes, &ghes_dev->dev);
+ ghes->dev = &ghes_dev->dev;
+
+ mutex_lock(&ghes_devs_mutex);
+ list_add_tail(&ghes->elist, &ghes_devs);
+ mutex_unlock(&ghes_devs_mutex);
/* Handle any pending errors right away */
spin_lock_irqsave(&ghes_notify_lock_irq, flags);
@@ -1446,7 +1466,9 @@ static int ghes_remove(struct platform_device *ghes_dev)
ghes_fini(ghes);
- ghes_edac_unregister(ghes);
+ mutex_lock(&ghes_devs_mutex);
+ list_del(&ghes->elist);
+ mutex_unlock(&ghes_devs_mutex);
kfree(ghes);
@@ -1501,3 +1523,41 @@ void __init acpi_ghes_init(void)
else
pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
}
+
+/*
+ * Known x86 systems that prefer GHES error reporting:
+ */
+static struct acpi_platform_list plat_list[] = {
+ {"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions},
+ { } /* End */
+};
+
+struct list_head *ghes_get_devices(void)
+{
+ int idx = -1;
+
+ if (IS_ENABLED(CONFIG_X86)) {
+ idx = acpi_match_platform_list(plat_list);
+ if (idx < 0) {
+ if (!ghes_edac_force_enable)
+ return NULL;
+
+ pr_warn_once("Force-loading ghes_edac on an unsupported platform. You're on your own!\n");
+ }
+ }
+
+ return &ghes_devs;
+}
+EXPORT_SYMBOL_GPL(ghes_get_devices);
+
+void ghes_register_report_chain(struct notifier_block *nb)
+{
+ atomic_notifier_chain_register(&ghes_report_chain, nb);
+}
+EXPORT_SYMBOL_GPL(ghes_register_report_chain);
+
+void ghes_unregister_report_chain(struct notifier_block *nb)
+{
+ atomic_notifier_chain_unregister(&ghes_report_chain, nb);
+}
+EXPORT_SYMBOL_GPL(ghes_unregister_report_chain);