From e147133a42cb9df6cbc99503fdf58d0e6388bf2a Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:40 +0000 Subject: ACPI / APEI: Make hest.c manage the estatus memory pool ghes.c has a memory pool it uses for the estatus cache and the estatus queue. The cache is initialised when registering the platform driver. For the queue, an NMI-like notification has to grow/shrink the pool as it is registered and unregistered. This is all pretty noisy when adding new NMI-like notifications, it would be better to replace this with a static pool size based on the number of users. As a precursor, move the call that creates the pool from ghes_init(), into hest.c. Later this will take the number of ghes entries and consolidate the queue allocations. Remove ghes_estatus_pool_exit() as hest.c doesn't have anywhere to put this. The pool is now initialised as part of ACPI's subsys_initcall(): (acpi_init(), acpi_scan_init(), acpi_pci_root_init(), acpi_hest_init()) Before this patch it happened later as a GHES specific device_initcall(). Signed-off-by: James Morse Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- include/acpi/ghes.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/acpi/ghes.h') diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 82cb4eb225a4..46ef5566e052 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -52,6 +52,8 @@ enum { GHES_SEV_PANIC = 0x3, }; +int ghes_estatus_pool_init(void); + /* From drivers/edac/ghes_edac.c */ #ifdef CONFIG_EDAC_GHES -- cgit v1.2.3 From fb7be08f1a091ec243780bfdad4bf0c492057808 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:41 +0000 Subject: ACPI / APEI: Make estatus pool allocation a static size Adding new NMI-like notifications duplicates the calls that grow and shrink the estatus pool. This is all pretty pointless, as the size is capped to 64K. Allocate this for each ghes and drop the code that grows and shrinks the pool. Suggested-by: Borislav Petkov Signed-off-by: James Morse Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 49 ++++++------------------------------------------ drivers/acpi/apei/hest.c | 2 +- include/acpi/ghes.h | 2 +- 3 files changed, 8 insertions(+), 45 deletions(-) (limited to 'include/acpi/ghes.h') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 4150c72c78cb..33144ab0661a 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -162,27 +162,18 @@ static void ghes_iounmap_irq(void) clear_fixmap(FIX_APEI_GHES_IRQ); } -static int ghes_estatus_pool_expand(unsigned long len); //temporary - -int ghes_estatus_pool_init(void) +int ghes_estatus_pool_init(int num_ghes) { + unsigned long addr, len; + ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); if (!ghes_estatus_pool) return -ENOMEM; - return ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * - GHES_ESTATUS_CACHE_ALLOCED_MAX); -} - -static int ghes_estatus_pool_expand(unsigned long len) -{ - unsigned long size, addr; - - ghes_estatus_pool_size_request += PAGE_ALIGN(len); - size = gen_pool_size(ghes_estatus_pool); - if (size >= ghes_estatus_pool_size_request) - return 0; + len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX; + len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE); + ghes_estatus_pool_size_request = PAGE_ALIGN(len); addr = (unsigned long)vmalloc(PAGE_ALIGN(len)); if (!addr) return -ENOMEM; @@ -956,32 +947,8 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) return ret; } -static unsigned long ghes_esource_prealloc_size( - const struct acpi_hest_generic *generic) -{ - unsigned long block_length, prealloc_records, prealloc_size; - - block_length = min_t(unsigned long, generic->error_block_length, - GHES_ESTATUS_MAX_SIZE); - prealloc_records = max_t(unsigned long, - generic->records_to_preallocate, 1); - prealloc_size = min_t(unsigned long, block_length * prealloc_records, - GHES_ESOURCE_PREALLOC_MAX_SIZE); - - return prealloc_size; -} - -static void ghes_estatus_pool_shrink(unsigned long len) -{ - ghes_estatus_pool_size_request -= PAGE_ALIGN(len); -} - static void ghes_nmi_add(struct ghes *ghes) { - unsigned long len; - - len = ghes_esource_prealloc_size(ghes->generic); - ghes_estatus_pool_expand(len); mutex_lock(&ghes_list_mutex); if (list_empty(&ghes_nmi)) register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes"); @@ -991,8 +958,6 @@ static void ghes_nmi_add(struct ghes *ghes) static void ghes_nmi_remove(struct ghes *ghes) { - unsigned long len; - mutex_lock(&ghes_list_mutex); list_del_rcu(&ghes->list); if (list_empty(&ghes_nmi)) @@ -1003,8 +968,6 @@ static void ghes_nmi_remove(struct ghes *ghes) * freed after NMI handler finishes. */ synchronize_rcu(); - len = ghes_esource_prealloc_size(ghes->generic); - ghes_estatus_pool_shrink(len); } static void ghes_nmi_init_cxt(void) diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index e33bfd9d256c..8113ddb14d28 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -211,7 +211,7 @@ static int __init hest_ghes_dev_register(unsigned int ghes_count) if (rc) goto err; - rc = ghes_estatus_pool_init(); + rc = ghes_estatus_pool_init(ghes_count); if (rc) goto err; diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 46ef5566e052..cd9ee507d860 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -52,7 +52,7 @@ enum { GHES_SEV_PANIC = 0x3, }; -int ghes_estatus_pool_init(void); +int ghes_estatus_pool_init(int num_ghes); /* From drivers/edac/ghes_edac.c */ -- cgit v1.2.3 From eeb2555779471abdbcc6289a52dc54ce513feaf2 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:42 +0000 Subject: ACPI / APEI: Don't store CPER records physical address in struct ghes When CPER records are found the address of the records is stashed in the struct ghes. Once the records have been processed, this address is overwritten with zero so that it won't be processed again without being re-populated by firmware. This goes wrong if a struct ghes can be processed concurrently, as can happen at probe time when an NMI occurs. If the NMI arrives on another CPU, the probing CPU may call ghes_clear_estatus() on the records before the handler had finished with them. Even on the same CPU, once the interrupted handler is resumed, it will call ghes_clear_estatus() on the NMIs records, this memory may have already been re-used by firmware. Avoid this stashing by letting the caller hold the address. A later patch will do away with the use of ghes->flags in the read/clear code too. Signed-off-by: James Morse Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 46 +++++++++++++++++++++++++++------------------- include/acpi/ghes.h | 1 - 2 files changed, 27 insertions(+), 20 deletions(-) (limited to 'include/acpi/ghes.h') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 33144ab0661a..a34f79153b1a 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -305,29 +305,30 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, } } -static int ghes_read_estatus(struct ghes *ghes) +static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr) { struct acpi_hest_generic *g = ghes->generic; - u64 buf_paddr; u32 len; int rc; - rc = apei_read(&buf_paddr, &g->error_status_address); + rc = apei_read(buf_paddr, &g->error_status_address); if (rc) { + *buf_paddr = 0; pr_warn_ratelimited(FW_WARN GHES_PFX "Failed to read error status block address for hardware error source: %d.\n", g->header.source_id); return -EIO; } - if (!buf_paddr) + if (!*buf_paddr) return -ENOENT; - ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, + ghes_copy_tofrom_phys(ghes->estatus, *buf_paddr, sizeof(*ghes->estatus), 1); - if (!ghes->estatus->block_status) + if (!ghes->estatus->block_status) { + *buf_paddr = 0; return -ENOENT; + } - ghes->buffer_paddr = buf_paddr; ghes->flags |= GHES_TO_CLEAR; rc = -EIO; @@ -339,7 +340,7 @@ static int ghes_read_estatus(struct ghes *ghes) if (cper_estatus_check_header(ghes->estatus)) goto err_read_block; ghes_copy_tofrom_phys(ghes->estatus + 1, - buf_paddr + sizeof(*ghes->estatus), + *buf_paddr + sizeof(*ghes->estatus), len - sizeof(*ghes->estatus), 1); if (cper_estatus_check(ghes->estatus)) goto err_read_block; @@ -349,15 +350,20 @@ err_read_block: if (rc) pr_warn_ratelimited(FW_WARN GHES_PFX "Failed to read error status block!\n"); + return rc; } -static void ghes_clear_estatus(struct ghes *ghes) +static void ghes_clear_estatus(struct ghes *ghes, u64 buf_paddr) { ghes->estatus->block_status = 0; if (!(ghes->flags & GHES_TO_CLEAR)) return; - ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr, + + if (!buf_paddr) + return; + + ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, sizeof(ghes->estatus->block_status), 0); ghes->flags &= ~GHES_TO_CLEAR; } @@ -666,11 +672,11 @@ static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2) return apei_write(val, &gv2->read_ack_register); } -static void __ghes_panic(struct ghes *ghes) +static void __ghes_panic(struct ghes *ghes, u64 buf_paddr) { __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); - ghes_clear_estatus(ghes); + ghes_clear_estatus(ghes, buf_paddr); /* reboot to log the error! */ if (!panic_timeout) @@ -680,14 +686,15 @@ static void __ghes_panic(struct ghes *ghes) static int ghes_proc(struct ghes *ghes) { + u64 buf_paddr; int rc; - rc = ghes_read_estatus(ghes); + rc = ghes_read_estatus(ghes, &buf_paddr); if (rc) goto out; if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) { - __ghes_panic(ghes); + __ghes_panic(ghes, buf_paddr); } if (!ghes_estatus_cached(ghes->estatus)) { @@ -697,7 +704,7 @@ static int ghes_proc(struct ghes *ghes) ghes_do_proc(ghes, ghes->estatus); out: - ghes_clear_estatus(ghes); + ghes_clear_estatus(ghes, buf_paddr); if (rc == -ENOENT) return rc; @@ -912,6 +919,7 @@ static void __process_error(struct ghes *ghes) static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) { + u64 buf_paddr; struct ghes *ghes; int sev, ret = NMI_DONE; @@ -919,8 +927,8 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) return ret; list_for_each_entry_rcu(ghes, &ghes_nmi, list) { - if (ghes_read_estatus(ghes)) { - ghes_clear_estatus(ghes); + if (ghes_read_estatus(ghes, &buf_paddr)) { + ghes_clear_estatus(ghes, buf_paddr); continue; } else { ret = NMI_HANDLED; @@ -929,14 +937,14 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) sev = ghes_severity(ghes->estatus->error_severity); if (sev >= GHES_SEV_PANIC) { ghes_print_queued_estatus(); - __ghes_panic(ghes); + __ghes_panic(ghes, buf_paddr); } if (!(ghes->flags & GHES_TO_CLEAR)) continue; __process_error(ghes); - ghes_clear_estatus(ghes); + ghes_clear_estatus(ghes, buf_paddr); } #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index cd9ee507d860..f82f4a7ddd90 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -22,7 +22,6 @@ struct ghes { struct acpi_hest_generic_v2 *generic_v2; }; struct acpi_hest_generic_status *estatus; - u64 buffer_paddr; unsigned long flags; union { struct list_head list; -- cgit v1.2.3 From 5cc6c68287ae4be22c40b41cf6844746cddebbcc Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:44 +0000 Subject: ACPI / APEI: Don't update struct ghes' flags in read/clear estatus ghes_read_estatus() sets a flag in struct ghes if the buffer of CPER records needs to be cleared once the records have been processed. This flag value is a problem if a struct ghes can be processed concurrently, as happens at probe time if an NMI arrives for the same error source. The NMI clears the flag, meaning the interrupted handler may never do the ghes_estatus_clear() work. The GHES_TO_CLEAR flags is only set at the same time as buffer_paddr, which is now owned by the caller and passed to ghes_clear_estatus(). Use this value as the flag. A non-zero buf_paddr returned by ghes_read_estatus() means ghes_clear_estatus() should clear this address. ghes_read_estatus() already checks for a read of error_status_address being zero, so CPER records cannot be written here. Signed-off-by: James Morse Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 5 ----- include/acpi/ghes.h | 1 - 2 files changed, 6 deletions(-) (limited to 'include/acpi/ghes.h') diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index c20e1d0947b1..af3c10f47f20 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -329,8 +329,6 @@ static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr) return -ENOENT; } - ghes->flags |= GHES_TO_CLEAR; - rc = -EIO; len = cper_estatus_len(ghes->estatus); if (len < sizeof(*ghes->estatus)) @@ -357,15 +355,12 @@ err_read_block: static void ghes_clear_estatus(struct ghes *ghes, u64 buf_paddr) { ghes->estatus->block_status = 0; - if (!(ghes->flags & GHES_TO_CLEAR)) - return; if (!buf_paddr) return; ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, sizeof(ghes->estatus->block_status), 0); - ghes->flags &= ~GHES_TO_CLEAR; } static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev) diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index f82f4a7ddd90..e3f1cddb4ac8 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -13,7 +13,6 @@ * estatus: memory buffer for error status block, allocated during * HEST parsing. */ -#define GHES_TO_CLEAR 0x0001 #define GHES_EXITING 0x0002 struct ghes { -- cgit v1.2.3