From 563ca40ddf400dbf8c6254077f9b6887101d0f08 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 8 May 2020 09:16:02 -0700 Subject: pstore/platform: Switch pstore_info::name to const In order to more cleanly pass around backend names, make the "name" member const. This means the module param needs to be dynamic (technically, it was before, so this actually cleans up a minor memory leak if a backend was specified and then gets unloaded.) Link: https://lore.kernel.org/lkml/20200510202436.63222-3-keescook@chromium.org/ Signed-off-by: Kees Cook --- include/linux/pstore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index e779441e6d26..f6f22b13e04f 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -170,7 +170,7 @@ struct pstore_record { */ struct pstore_info { struct module *owner; - char *name; + const char *name; struct semaphore buf_lock; char *buf; -- cgit v1.2.3 From 6d3cf962dd1a95df868c547b090bfc4c7977f4be Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 May 2020 11:05:43 -0700 Subject: printk: Collapse shutdown types into a single dump reason To turn the KMSG_DUMP_* reasons into a more ordered list, collapse the redundant KMSG_DUMP_(RESTART|HALT|POWEROFF) reasons into KMSG_DUMP_SHUTDOWN. The current users already don't meaningfully distinguish between them, so there's no need to, as discussed here: https://lore.kernel.org/lkml/CA+CK2bAPv5u1ih5y9t5FUnTyximtFCtDYXJCpuyjOyHNOkRdqw@mail.gmail.com/ Link: https://lore.kernel.org/lkml/20200515184434.8470-2-keescook@chromium.org/ Reviewed-by: Pavel Tatashin Reviewed-by: Petr Mladek Acked-by: Michael Ellerman (powerpc) Signed-off-by: Kees Cook --- arch/powerpc/kernel/nvram_64.c | 4 +--- fs/pstore/platform.c | 8 ++------ include/linux/kmsg_dump.h | 4 +--- kernel/reboot.c | 6 +++--- 4 files changed, 7 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index fb4f61096613..0cd1c88bfc8b 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -655,9 +655,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, int rc = -1; switch (reason) { - case KMSG_DUMP_RESTART: - case KMSG_DUMP_HALT: - case KMSG_DUMP_POWEROFF: + case KMSG_DUMP_SHUTDOWN: /* These are almost always orderly shutdowns. */ return; case KMSG_DUMP_OOPS: diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 072440457c08..90d74ebaa70a 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -144,12 +144,8 @@ static const char *get_reason_str(enum kmsg_dump_reason reason) return "Oops"; case KMSG_DUMP_EMERG: return "Emergency"; - case KMSG_DUMP_RESTART: - return "Restart"; - case KMSG_DUMP_HALT: - return "Halt"; - case KMSG_DUMP_POWEROFF: - return "Poweroff"; + case KMSG_DUMP_SHUTDOWN: + return "Shutdown"; default: return "Unknown"; } diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 2e7a1e032c71..3f82b5cb2d82 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -25,9 +25,7 @@ enum kmsg_dump_reason { KMSG_DUMP_PANIC, KMSG_DUMP_OOPS, KMSG_DUMP_EMERG, - KMSG_DUMP_RESTART, - KMSG_DUMP_HALT, - KMSG_DUMP_POWEROFF, + KMSG_DUMP_SHUTDOWN, }; /** diff --git a/kernel/reboot.c b/kernel/reboot.c index c4d472b7f1b4..491f1347bf43 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -250,7 +250,7 @@ void kernel_restart(char *cmd) pr_emerg("Restarting system\n"); else pr_emerg("Restarting system with command '%s'\n", cmd); - kmsg_dump(KMSG_DUMP_RESTART); + kmsg_dump(KMSG_DUMP_SHUTDOWN); machine_restart(cmd); } EXPORT_SYMBOL_GPL(kernel_restart); @@ -274,7 +274,7 @@ void kernel_halt(void) migrate_to_reboot_cpu(); syscore_shutdown(); pr_emerg("System halted\n"); - kmsg_dump(KMSG_DUMP_HALT); + kmsg_dump(KMSG_DUMP_SHUTDOWN); machine_halt(); } EXPORT_SYMBOL_GPL(kernel_halt); @@ -292,7 +292,7 @@ void kernel_power_off(void) migrate_to_reboot_cpu(); syscore_shutdown(); pr_emerg("Power down\n"); - kmsg_dump(KMSG_DUMP_POWEROFF); + kmsg_dump(KMSG_DUMP_SHUTDOWN); machine_power_off(); } EXPORT_SYMBOL_GPL(kernel_power_off); -- cgit v1.2.3 From b1f6f161b236d0e5a9222fb8b482e65aaff13689 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 5 May 2020 11:45:06 -0400 Subject: printk: honor the max_reason field in kmsg_dumper kmsg_dump() allows to dump kmesg buffer for various system events: oops, panic, reboot, etc. It provides an interface to register a callback call for clients, and in that callback interface there is a field "max_reason", but it was getting ignored when set to any "reason" higher than KMSG_DUMP_OOPS unless "always_kmsg_dump" was passed as kernel parameter. Allow clients to actually control their "max_reason", and keep the current behavior when "max_reason" is not set. Signed-off-by: Pavel Tatashin Link: https://lore.kernel.org/lkml/20200515184434.8470-3-keescook@chromium.org/ Reviewed-by: Petr Mladek Signed-off-by: Kees Cook --- include/linux/kmsg_dump.h | 1 + kernel/printk/printk.c | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 3f82b5cb2d82..9826014771ab 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -26,6 +26,7 @@ enum kmsg_dump_reason { KMSG_DUMP_OOPS, KMSG_DUMP_EMERG, KMSG_DUMP_SHUTDOWN, + KMSG_DUMP_MAX }; /** diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 9a9b6156270b..a121c2255737 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3157,12 +3157,19 @@ void kmsg_dump(enum kmsg_dump_reason reason) struct kmsg_dumper *dumper; unsigned long flags; - if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump) - return; - rcu_read_lock(); list_for_each_entry_rcu(dumper, &dump_list, list) { - if (dumper->max_reason && reason > dumper->max_reason) + enum kmsg_dump_reason max_reason = dumper->max_reason; + + /* + * If client has not provided a specific max_reason, default + * to KMSG_DUMP_OOPS, unless always_kmsg_dump was set. + */ + if (max_reason == KMSG_DUMP_UNDEF) { + max_reason = always_kmsg_dump ? KMSG_DUMP_MAX : + KMSG_DUMP_OOPS; + } + if (reason > max_reason) continue; /* initialize iterator with data about the stored records */ -- cgit v1.2.3 From fb13cb8a0482105a415e24042209d02a684255e2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 7 May 2020 19:36:22 -0700 Subject: printk: Introduce kmsg_dump_reason_str() The pstore subsystem already had a private version of this function. With the coming addition of the pstore/zone driver, this needs to be shared. As it really should live with printk, move it there instead. Link: https://lore.kernel.org/lkml/20200515184434.8470-4-keescook@chromium.org/ Acked-by: Petr Mladek Acked-by: Sergey Senozhatsky Reviewed-by: Pavel Tatashin Signed-off-by: Kees Cook --- fs/pstore/platform.c | 18 +----------------- include/linux/kmsg_dump.h | 7 +++++++ kernel/printk/printk.c | 17 +++++++++++++++++ 3 files changed, 25 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 90d74ebaa70a..5e6c6022deb9 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -135,22 +135,6 @@ enum pstore_type_id pstore_name_to_type(const char *name) } EXPORT_SYMBOL_GPL(pstore_name_to_type); -static const char *get_reason_str(enum kmsg_dump_reason reason) -{ - switch (reason) { - case KMSG_DUMP_PANIC: - return "Panic"; - case KMSG_DUMP_OOPS: - return "Oops"; - case KMSG_DUMP_EMERG: - return "Emergency"; - case KMSG_DUMP_SHUTDOWN: - return "Shutdown"; - default: - return "Unknown"; - } -} - static void pstore_timer_kick(void) { if (pstore_update_ms < 0) @@ -403,7 +387,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, unsigned int part = 1; int ret; - why = get_reason_str(reason); + why = kmsg_dump_reason_str(reason); if (down_trylock(&psinfo->buf_lock)) { /* Failed to acquire lock: give up if we cannot wait. */ diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 9826014771ab..3378bcbe585e 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -70,6 +70,8 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper); int kmsg_dump_register(struct kmsg_dumper *dumper); int kmsg_dump_unregister(struct kmsg_dumper *dumper); + +const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason); #else static inline void kmsg_dump(enum kmsg_dump_reason reason) { @@ -111,6 +113,11 @@ static inline int kmsg_dump_unregister(struct kmsg_dumper *dumper) { return -EINVAL; } + +static inline const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) +{ + return "Disabled"; +} #endif #endif /* _LINUX_KMSG_DUMP_H */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a121c2255737..14ca4d05d902 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3144,6 +3144,23 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister); static bool always_kmsg_dump; module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); +const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) +{ + switch (reason) { + case KMSG_DUMP_PANIC: + return "Panic"; + case KMSG_DUMP_OOPS: + return "Oops"; + case KMSG_DUMP_EMERG: + return "Emergency"; + case KMSG_DUMP_SHUTDOWN: + return "Shutdown"; + default: + return "Unknown"; + } +} +EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); + /** * kmsg_dump - dump kernel log to kernel message dumpers. * @reason: the reason (oops, panic etc) for dumping -- cgit v1.2.3 From 3524e688b8ee50b0edc76f0e020727eb6c684dbc Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 5 May 2020 11:45:07 -0400 Subject: pstore/platform: Pass max_reason to kmesg dump Add a new member to struct pstore_info for passing information about kmesg dump maximum reason. This allows a finer control of what kmesg dumps are sent to pstore storage backends. Those backends that do not explicitly set this field (keeping it equal to 0), get the default behavior: store only Oopses and Panics, or everything if the printk.always_kmsg_dump boot param is set. Signed-off-by: Pavel Tatashin Link: https://lore.kernel.org/lkml/20200515184434.8470-5-keescook@chromium.org/ Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- fs/pstore/platform.c | 4 +++- include/linux/pstore.h | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 5e6c6022deb9..a9e297eefdff 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -595,8 +595,10 @@ int pstore_register(struct pstore_info *psi) pstore_get_records(0); - if (psi->flags & PSTORE_FLAGS_DMESG) + if (psi->flags & PSTORE_FLAGS_DMESG) { + pstore_dumper.max_reason = psinfo->max_reason; pstore_register_kmsg(); + } if (psi->flags & PSTORE_FLAGS_CONSOLE) pstore_register_console(); if (psi->flags & PSTORE_FLAGS_FTRACE) diff --git a/include/linux/pstore.h b/include/linux/pstore.h index f6f22b13e04f..eb93a54cff31 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -96,6 +96,12 @@ struct pstore_record { * * @read_mutex: serializes @open, @read, @close, and @erase callbacks * @flags: bitfield of frontends the backend can accept writes for + * @max_reason: Used when PSTORE_FLAGS_DMESG is set. Contains the + * kmsg_dump_reason enum value. KMSG_DUMP_UNDEF means + * "use existing kmsg_dump() filtering, based on the + * printk.always_kmsg_dump boot param" (which is either + * KMSG_DUMP_OOPS when false, or KMSG_DUMP_MAX when + * true); see printk.always_kmsg_dump for more details. * @data: backend-private pointer passed back during callbacks * * Callbacks: @@ -179,6 +185,7 @@ struct pstore_info { struct mutex read_mutex; int flags; + int max_reason; void *data; int (*open)(struct pstore_info *psi); -- cgit v1.2.3 From 791205e3ec6081a8da6f00621e3453d622dc41e7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 13 May 2020 14:35:03 -0700 Subject: pstore/ram: Introduce max_reason and convert dump_oops Now that pstore_register() can correctly pass max_reason to the kmesg dump facility, introduce a new "max_reason" module parameter and "max-reason" Device Tree field. The "dump_oops" module parameter and "dump-oops" Device Tree field are now considered deprecated, but are now automatically converted to their corresponding max_reason values when present, though the new max_reason setting has precedence. For struct ramoops_platform_data, the "dump_oops" member is entirely replaced by a new "max_reason" member, with the only existing user updated in place. Additionally remove the "reason" filter logic from ramoops_pstore_write(), as that is not specifically needed anymore, though technically this is a change in behavior for any ramoops users also setting the printk.always_kmsg_dump boot param, which will cause ramoops to behave as if max_reason was set to KMSG_DUMP_MAX. Co-developed-by: Pavel Tatashin Signed-off-by: Pavel Tatashin Link: https://lore.kernel.org/lkml/20200515184434.8470-6-keescook@chromium.org/ Signed-off-by: Kees Cook --- Documentation/admin-guide/ramoops.rst | 14 +++++--- drivers/platform/chrome/chromeos_pstore.c | 2 +- fs/pstore/ram.c | 58 +++++++++++++++++++++---------- include/linux/pstore_ram.h | 2 +- 4 files changed, 51 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/Documentation/admin-guide/ramoops.rst b/Documentation/admin-guide/ramoops.rst index 6dbcc5481000..a60a96218ba9 100644 --- a/Documentation/admin-guide/ramoops.rst +++ b/Documentation/admin-guide/ramoops.rst @@ -32,11 +32,17 @@ memory to be mapped strongly ordered, and atomic operations on strongly ordered memory are implementation defined, and won't work on many ARMs such as omaps. The memory area is divided into ``record_size`` chunks (also rounded down to -power of two) and each oops/panic writes a ``record_size`` chunk of +power of two) and each kmesg dump writes a ``record_size`` chunk of information. -Dumping both oopses and panics can be done by setting 1 in the ``dump_oops`` -variable while setting 0 in that variable dumps only the panics. +Limiting which kinds of kmsg dumps are stored can be controlled via +the ``max_reason`` value, as defined in include/linux/kmsg_dump.h's +``enum kmsg_dump_reason``. For example, to store both Oopses and Panics, +``max_reason`` should be set to 2 (KMSG_DUMP_OOPS), to store only Panics +``max_reason`` should be set to 1 (KMSG_DUMP_PANIC). Setting this to 0 +(KMSG_DUMP_UNDEF), means the reason filtering will be controlled by the +``printk.always_kmsg_dump`` boot param: if unset, it'll be KMSG_DUMP_OOPS, +otherwise KMSG_DUMP_MAX. The module uses a counter to record multiple dumps but the counter gets reset on restart (i.e. new dumps after the restart will overwrite old ones). @@ -90,7 +96,7 @@ Setting the ramoops parameters can be done in several different manners: .mem_address = <...>, .mem_type = <...>, .record_size = <...>, - .dump_oops = <...>, + .max_reason = <...>, .ecc = <...>, }; diff --git a/drivers/platform/chrome/chromeos_pstore.c b/drivers/platform/chrome/chromeos_pstore.c index d13770785fb5..fa51153688b4 100644 --- a/drivers/platform/chrome/chromeos_pstore.c +++ b/drivers/platform/chrome/chromeos_pstore.c @@ -57,7 +57,7 @@ static struct ramoops_platform_data chromeos_ramoops_data = { .record_size = 0x40000, .console_size = 0x20000, .ftrace_size = 0x20000, - .dump_oops = 1, + .max_reason = KMSG_DUMP_OOPS, }; static struct platform_device chromeos_ramoops = { diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 26a1f502a3ea..ca6d8a867285 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -58,10 +58,10 @@ module_param(mem_type, uint, 0400); MODULE_PARM_DESC(mem_type, "set to 1 to try to use unbuffered memory (default 0)"); -static int dump_oops = 1; -module_param(dump_oops, int, 0400); -MODULE_PARM_DESC(dump_oops, - "set to 1 to dump oopses, 0 to only dump panics (default 1)"); +static int ramoops_max_reason = -1; +module_param_named(max_reason, ramoops_max_reason, int, 0400); +MODULE_PARM_DESC(max_reason, + "maximum reason for kmsg dump (default 2: Oops and Panic) "); static int ramoops_ecc; module_param_named(ecc, ramoops_ecc, int, 0400); @@ -70,6 +70,11 @@ MODULE_PARM_DESC(ramoops_ecc, "ECC buffer size in bytes (1 is a special value, means 16 " "bytes ECC)"); +static int ramoops_dump_oops = -1; +module_param_named(dump_oops, ramoops_dump_oops, int, 0400); +MODULE_PARM_DESC(dump_oops, + "(deprecated: use max_reason instead) set to 1 to dump oopses & panics, 0 to only dump panics"); + struct ramoops_context { struct persistent_ram_zone **dprzs; /* Oops dump zones */ struct persistent_ram_zone *cprz; /* Console zone */ @@ -82,7 +87,6 @@ struct ramoops_context { size_t console_size; size_t ftrace_size; size_t pmsg_size; - int dump_oops; u32 flags; struct persistent_ram_ecc_info ecc_info; unsigned int max_dump_cnt; @@ -336,16 +340,14 @@ static int notrace ramoops_pstore_write(struct pstore_record *record) return -EINVAL; /* - * Out of the various dmesg dump types, ramoops is currently designed - * to only store crash logs, rather than storing general kernel logs. + * We could filter on record->reason here if we wanted to (which + * would duplicate what happened before the "max_reason" setting + * was added), but that would defeat the purpose of a system + * changing printk.always_kmsg_dump, so instead log everything that + * the kmsg dumper sends us, since it should be doing the filtering + * based on the combination of printk.always_kmsg_dump and our + * requested "max_reason". */ - if (record->reason != KMSG_DUMP_OOPS && - record->reason != KMSG_DUMP_PANIC) - return -EINVAL; - - /* Skip Oopes when configured to do so. */ - if (record->reason == KMSG_DUMP_OOPS && !cxt->dump_oops) - return -EINVAL; /* * Explicitly only take the first part of any new crash. @@ -647,7 +649,14 @@ static int ramoops_parse_dt(struct platform_device *pdev, pdata->mem_size = resource_size(res); pdata->mem_address = res->start; pdata->mem_type = of_property_read_bool(of_node, "unbuffered"); - pdata->dump_oops = !of_property_read_bool(of_node, "no-dump-oops"); + /* + * Setting "no-dump-oops" is deprecated and will be ignored if + * "max_reason" is also specified. + */ + if (of_property_read_bool(of_node, "no-dump-oops")) + pdata->max_reason = KMSG_DUMP_PANIC; + else + pdata->max_reason = KMSG_DUMP_OOPS; #define parse_u32(name, field, default_value) { \ ret = ramoops_parse_dt_u32(pdev, name, default_value, \ @@ -663,6 +672,7 @@ static int ramoops_parse_dt(struct platform_device *pdev, parse_u32("pmsg-size", pdata->pmsg_size, 0); parse_u32("ecc-size", pdata->ecc_info.ecc_size, 0); parse_u32("flags", pdata->flags, 0); + parse_u32("max-reason", pdata->max_reason, pdata->max_reason); #undef parse_u32 @@ -746,7 +756,6 @@ static int ramoops_probe(struct platform_device *pdev) cxt->console_size = pdata->console_size; cxt->ftrace_size = pdata->ftrace_size; cxt->pmsg_size = pdata->pmsg_size; - cxt->dump_oops = pdata->dump_oops; cxt->flags = pdata->flags; cxt->ecc_info = pdata->ecc_info; @@ -789,8 +798,10 @@ static int ramoops_probe(struct platform_device *pdev) * the single region size is how to check. */ cxt->pstore.flags = 0; - if (cxt->max_dump_cnt) + if (cxt->max_dump_cnt) { cxt->pstore.flags |= PSTORE_FLAGS_DMESG; + cxt->pstore.max_reason = pdata->max_reason; + } if (cxt->console_size) cxt->pstore.flags |= PSTORE_FLAGS_CONSOLE; if (cxt->max_ftrace_cnt) @@ -826,7 +837,7 @@ static int ramoops_probe(struct platform_device *pdev) mem_size = pdata->mem_size; mem_address = pdata->mem_address; record_size = pdata->record_size; - dump_oops = pdata->dump_oops; + ramoops_max_reason = pdata->max_reason; ramoops_console_size = pdata->console_size; ramoops_pmsg_size = pdata->pmsg_size; ramoops_ftrace_size = pdata->ftrace_size; @@ -909,7 +920,16 @@ static void __init ramoops_register_dummy(void) pdata.console_size = ramoops_console_size; pdata.ftrace_size = ramoops_ftrace_size; pdata.pmsg_size = ramoops_pmsg_size; - pdata.dump_oops = dump_oops; + /* If "max_reason" is set, its value has priority over "dump_oops". */ + if (ramoops_max_reason >= 0) + pdata.max_reason = ramoops_max_reason; + /* Otherwise, if "dump_oops" is set, parse it into "max_reason". */ + else if (ramoops_dump_oops != -1) + pdata.max_reason = ramoops_dump_oops ? KMSG_DUMP_OOPS + : KMSG_DUMP_PANIC; + /* And if neither are explicitly set, use the default. */ + else + pdata.max_reason = KMSG_DUMP_OOPS; pdata.flags = RAMOOPS_FLAG_FTRACE_PER_CPU; /* diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 9cb9b9067298..9f16afec7290 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -133,7 +133,7 @@ struct ramoops_platform_data { unsigned long console_size; unsigned long ftrace_size; unsigned long pmsg_size; - int dump_oops; + int max_reason; u32 flags; struct persistent_ram_ecc_info ecc_info; }; -- cgit v1.2.3 From d26c3321fe18dc74517dc1f518d584aa33b0a851 Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:54:56 +0800 Subject: pstore/zone: Introduce common layer to manage storage zones Implement a common set of APIs needed to support pstore storage zones, based on how ramoops is designed. This will be used by pstore/blk with the intention of migrating pstore/ram in the future. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-2-keescook@chromium.org/ Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- fs/pstore/Kconfig | 7 + fs/pstore/Makefile | 3 + fs/pstore/zone.c | 985 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/pstore_zone.h | 44 ++ 4 files changed, 1039 insertions(+) create mode 100644 fs/pstore/zone.c create mode 100644 include/linux/pstore_zone.h (limited to 'include/linux') diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 8f0369aad22a..98d2457bdd9f 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -153,3 +153,10 @@ config PSTORE_RAM "ramoops.ko". For more information, see Documentation/admin-guide/ramoops.rst. + +config PSTORE_ZONE + tristate + depends on PSTORE + help + The common layer for pstore/blk (and pstore/ram in the future) + to manage storage in zones. diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile index 967b5891f325..58a967cbe4af 100644 --- a/fs/pstore/Makefile +++ b/fs/pstore/Makefile @@ -12,3 +12,6 @@ pstore-$(CONFIG_PSTORE_PMSG) += pmsg.o ramoops-objs += ram.o ram_core.o obj-$(CONFIG_PSTORE_RAM) += ramoops.o + +pstore_zone-objs += zone.o +obj-$(CONFIG_PSTORE_ZONE) += pstore_zone.o diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c new file mode 100644 index 000000000000..362d72a24012 --- /dev/null +++ b/fs/pstore/zone.c @@ -0,0 +1,985 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Provide a pstore intermediate backend, organized into kernel memory + * allocated zones that are then mapped and flushed into a single + * contiguous region on a storage backend of some kind (block, mtd, etc). + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +/** + * struct psz_head - header of zone to flush to storage + * + * @sig: signature to indicate header (PSZ_SIG xor PSZONE-type value) + * @datalen: length of data in @data + * @data: zone data. + */ +struct psz_buffer { +#define PSZ_SIG (0x43474244) /* DBGC */ + uint32_t sig; + atomic_t datalen; + uint8_t data[]; +}; + +/** + * struct psz_kmsg_header - kmsg dump-specific header to flush to storage + * + * @magic: magic num for kmsg dump header + * @time: kmsg dump trigger time + * @compressed: whether conpressed + * @counter: kmsg dump counter + * @reason: the kmsg dump reason (e.g. oops, panic, etc) + * @data: pointer to log data + * + * This is a sub-header for a kmsg dump, trailing after &psz_buffer. + */ +struct psz_kmsg_header { +#define PSTORE_KMSG_HEADER_MAGIC 0x4dfc3ae5 /* Just a random number */ + uint32_t magic; + struct timespec64 time; + bool compressed; + uint32_t counter; + enum kmsg_dump_reason reason; + uint8_t data[]; +}; + +/** + * struct pstore_zone - single stored buffer + * + * @off: zone offset of storage + * @type: front-end type for this zone + * @name: front-end name for this zone + * @buffer: pointer to data buffer managed by this zone + * @oldbuf: pointer to old data buffer + * @buffer_size: bytes in @buffer->data + * @should_recover: whether this zone should recover from storage + * @dirty: whether the data in @buffer dirty + * + * zone structure in memory. + */ +struct pstore_zone { + loff_t off; + const char *name; + enum pstore_type_id type; + + struct psz_buffer *buffer; + struct psz_buffer *oldbuf; + size_t buffer_size; + bool should_recover; + atomic_t dirty; +}; + +/** + * struct psz_context - all about running state of pstore/zone + * + * @kpszs: kmsg dump storage zones + * @kmsg_max_cnt: max count of @kpszs + * @kmsg_read_cnt: counter of total read kmsg dumps + * @kmsg_write_cnt: counter of total kmsg dump writes + * @oops_counter: counter of oops dumps + * @panic_counter: counter of panic dumps + * @recovered: whether finished recovering data from storage + * @on_panic: whether panic is happening + * @pstore_zone_info_lock: lock to @pstore_zone_info + * @pstore_zone_info: information from backend + * @pstore: structure for pstore + */ +struct psz_context { + struct pstore_zone **kpszs; + unsigned int kmsg_max_cnt; + unsigned int kmsg_read_cnt; + unsigned int kmsg_write_cnt; + /* + * These counters should be calculated during recovery. + * It records the oops/panic times after crashes rather than boots. + */ + unsigned int oops_counter; + unsigned int panic_counter; + atomic_t recovered; + atomic_t on_panic; + + /* + * pstore_zone_info_lock protects this entire structure during calls + * to register_pstore_zone()/unregister_pstore_zone(). + */ + struct mutex pstore_zone_info_lock; + struct pstore_zone_info *pstore_zone_info; + struct pstore_info pstore; +}; +static struct psz_context pstore_zone_cxt; + +/** + * enum psz_flush_mode - flush mode for psz_zone_write() + * + * @FLUSH_NONE: do not flush to storage but update data on memory + * @FLUSH_PART: just flush part of data including meta data to storage + * @FLUSH_META: just flush meta data of zone to storage + * @FLUSH_ALL: flush all of zone + */ +enum psz_flush_mode { + FLUSH_NONE = 0, + FLUSH_PART, + FLUSH_META, + FLUSH_ALL, +}; + +static inline int buffer_datalen(struct pstore_zone *zone) +{ + return atomic_read(&zone->buffer->datalen); +} + +static inline bool is_on_panic(void) +{ + return atomic_read(&pstore_zone_cxt.on_panic); +} + +static ssize_t psz_zone_read(struct pstore_zone *zone, char *buf, + size_t len, unsigned long off) +{ + if (!buf || !zone->buffer) + return -EINVAL; + if (off > zone->buffer_size) + return -EINVAL; + len = min_t(size_t, len, zone->buffer_size - off); + memcpy(buf, zone->buffer->data + off, len); + return len; +} + +static int psz_zone_write(struct pstore_zone *zone, + enum psz_flush_mode flush_mode, const char *buf, + size_t len, unsigned long off) +{ + struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info; + ssize_t wcnt = 0; + ssize_t (*writeop)(const char *buf, size_t bytes, loff_t pos); + size_t wlen; + + if (off > zone->buffer_size) + return -EINVAL; + + wlen = min_t(size_t, len, zone->buffer_size - off); + if (buf && wlen) { + memcpy(zone->buffer->data + off, buf, wlen); + atomic_set(&zone->buffer->datalen, wlen + off); + } + + /* avoid to damage old records */ + if (!is_on_panic() && !atomic_read(&pstore_zone_cxt.recovered)) + goto dirty; + + writeop = is_on_panic() ? info->panic_write : info->write; + if (!writeop) + goto dirty; + + switch (flush_mode) { + case FLUSH_NONE: + if (unlikely(buf && wlen)) + goto dirty; + return 0; + case FLUSH_PART: + wcnt = writeop((const char *)zone->buffer->data + off, wlen, + zone->off + sizeof(*zone->buffer) + off); + if (wcnt != wlen) + goto dirty; + fallthrough; + case FLUSH_META: + wlen = sizeof(struct psz_buffer); + wcnt = writeop((const char *)zone->buffer, wlen, zone->off); + if (wcnt != wlen) + goto dirty; + break; + case FLUSH_ALL: + wlen = zone->buffer_size + sizeof(*zone->buffer); + wcnt = writeop((const char *)zone->buffer, wlen, zone->off); + if (wcnt != wlen) + goto dirty; + break; + } + + return 0; +dirty: + atomic_set(&zone->dirty, true); + return -EBUSY; +} + +static int psz_flush_dirty_zone(struct pstore_zone *zone) +{ + int ret; + + if (unlikely(!zone)) + return -EINVAL; + + if (unlikely(!atomic_read(&pstore_zone_cxt.recovered))) + return -EBUSY; + + if (!atomic_xchg(&zone->dirty, false)) + return 0; + + ret = psz_zone_write(zone, FLUSH_ALL, NULL, 0, 0); + if (ret) + atomic_set(&zone->dirty, true); + return ret; +} + +static int psz_flush_dirty_zones(struct pstore_zone **zones, unsigned int cnt) +{ + int i, ret; + struct pstore_zone *zone; + + if (!zones) + return -EINVAL; + + for (i = 0; i < cnt; i++) { + zone = zones[i]; + if (!zone) + return -EINVAL; + ret = psz_flush_dirty_zone(zone); + if (ret) + return ret; + } + return 0; +} + +static int psz_move_zone(struct pstore_zone *old, struct pstore_zone *new) +{ + const char *data = (const char *)old->buffer->data; + int ret; + + ret = psz_zone_write(new, FLUSH_ALL, data, buffer_datalen(old), 0); + if (ret) { + atomic_set(&new->buffer->datalen, 0); + atomic_set(&new->dirty, false); + return ret; + } + atomic_set(&old->buffer->datalen, 0); + return 0; +} + +static int psz_kmsg_recover_data(struct psz_context *cxt) +{ + struct pstore_zone_info *info = cxt->pstore_zone_info; + struct pstore_zone *zone = NULL; + struct psz_buffer *buf; + unsigned long i; + ssize_t rcnt; + + if (!info->read) + return -EINVAL; + + for (i = 0; i < cxt->kmsg_max_cnt; i++) { + zone = cxt->kpszs[i]; + if (unlikely(!zone)) + return -EINVAL; + if (atomic_read(&zone->dirty)) { + unsigned int wcnt = cxt->kmsg_write_cnt; + struct pstore_zone *new = cxt->kpszs[wcnt]; + int ret; + + ret = psz_move_zone(zone, new); + if (ret) { + pr_err("move zone from %lu to %d failed\n", + i, wcnt); + return ret; + } + cxt->kmsg_write_cnt = (wcnt + 1) % cxt->kmsg_max_cnt; + } + if (!zone->should_recover) + continue; + buf = zone->buffer; + rcnt = info->read((char *)buf, zone->buffer_size + sizeof(*buf), + zone->off); + if (rcnt != zone->buffer_size + sizeof(*buf)) + return (int)rcnt < 0 ? (int)rcnt : -EIO; + } + return 0; +} + +static int psz_kmsg_recover_meta(struct psz_context *cxt) +{ + struct pstore_zone_info *info = cxt->pstore_zone_info; + struct pstore_zone *zone; + size_t rcnt, len; + struct psz_buffer *buf; + struct psz_kmsg_header *hdr; + struct timespec64 time = { }; + unsigned long i; + /* + * Recover may on panic, we can't allocate any memory by kmalloc. + * So, we use local array instead. + */ + char buffer_header[sizeof(*buf) + sizeof(*hdr)] = {0}; + + if (!info->read) + return -EINVAL; + + len = sizeof(*buf) + sizeof(*hdr); + buf = (struct psz_buffer *)buffer_header; + for (i = 0; i < cxt->kmsg_max_cnt; i++) { + zone = cxt->kpszs[i]; + if (unlikely(!zone)) + return -EINVAL; + + rcnt = info->read((char *)buf, len, zone->off); + if (rcnt != len) { + pr_err("read %s with id %lu failed\n", zone->name, i); + return (int)rcnt < 0 ? (int)rcnt : -EIO; + } + + if (buf->sig != zone->buffer->sig) { + pr_debug("no valid data in kmsg dump zone %lu\n", i); + continue; + } + + if (zone->buffer_size < atomic_read(&buf->datalen)) { + pr_info("found overtop zone: %s: id %lu, off %lld, size %zu\n", + zone->name, i, zone->off, + zone->buffer_size); + continue; + } + + hdr = (struct psz_kmsg_header *)buf->data; + if (hdr->magic != PSTORE_KMSG_HEADER_MAGIC) { + pr_info("found invalid zone: %s: id %lu, off %lld, size %zu\n", + zone->name, i, zone->off, + zone->buffer_size); + continue; + } + + /* + * we get the newest zone, and the next one must be the oldest + * or unused zone, because we do write one by one like a circle. + */ + if (hdr->time.tv_sec >= time.tv_sec) { + time.tv_sec = hdr->time.tv_sec; + cxt->kmsg_write_cnt = (i + 1) % cxt->kmsg_max_cnt; + } + + if (hdr->reason == KMSG_DUMP_OOPS) + cxt->oops_counter = + max(cxt->oops_counter, hdr->counter); + else if (hdr->reason == KMSG_DUMP_PANIC) + cxt->panic_counter = + max(cxt->panic_counter, hdr->counter); + + if (!atomic_read(&buf->datalen)) { + pr_debug("found erased zone: %s: id %lu, off %lld, size %zu, datalen %d\n", + zone->name, i, zone->off, + zone->buffer_size, + atomic_read(&buf->datalen)); + continue; + } + + if (!is_on_panic()) + zone->should_recover = true; + pr_debug("found nice zone: %s: id %lu, off %lld, size %zu, datalen %d\n", + zone->name, i, zone->off, + zone->buffer_size, atomic_read(&buf->datalen)); + } + + return 0; +} + +static int psz_kmsg_recover(struct psz_context *cxt) +{ + int ret; + + if (!cxt->kpszs) + return 0; + + ret = psz_kmsg_recover_meta(cxt); + if (ret) + goto recover_fail; + + ret = psz_kmsg_recover_data(cxt); + if (ret) + goto recover_fail; + + return 0; +recover_fail: + pr_debug("psz_recover_kmsg failed\n"); + return ret; +} + +/** + * psz_recovery() - recover data from storage + * @cxt: the context of pstore/zone + * + * recovery means reading data back from storage after rebooting + * + * Return: 0 on success, others on failure. + */ +static inline int psz_recovery(struct psz_context *cxt) +{ + int ret; + + if (atomic_read(&cxt->recovered)) + return 0; + + ret = psz_kmsg_recover(cxt); + + if (unlikely(ret)) + pr_err("recover failed\n"); + else { + pr_debug("recover end!\n"); + atomic_set(&cxt->recovered, 1); + } + return ret; +} + +static int psz_pstore_open(struct pstore_info *psi) +{ + struct psz_context *cxt = psi->data; + + cxt->kmsg_read_cnt = 0; + return 0; +} + +static inline bool psz_ok(struct pstore_zone *zone) +{ + if (zone && zone->buffer && buffer_datalen(zone)) + return true; + return false; +} + +static inline int psz_kmsg_erase(struct psz_context *cxt, + struct pstore_zone *zone, struct pstore_record *record) +{ + struct psz_buffer *buffer = zone->buffer; + struct psz_kmsg_header *hdr = + (struct psz_kmsg_header *)buffer->data; + + if (unlikely(!psz_ok(zone))) + return 0; + /* this zone is already updated, no need to erase */ + if (record->count != hdr->counter) + return 0; + + atomic_set(&zone->buffer->datalen, 0); + return psz_zone_write(zone, FLUSH_META, NULL, 0, 0); +} + +static int psz_pstore_erase(struct pstore_record *record) +{ + struct psz_context *cxt = record->psi->data; + + switch (record->type) { + case PSTORE_TYPE_DMESG: + if (record->id >= cxt->kmsg_max_cnt) + return -EINVAL; + return psz_kmsg_erase(cxt, cxt->kpszs[record->id], record); + default: + return -EINVAL; + } +} + +static void psz_write_kmsg_hdr(struct pstore_zone *zone, + struct pstore_record *record) +{ + struct psz_context *cxt = record->psi->data; + struct psz_buffer *buffer = zone->buffer; + struct psz_kmsg_header *hdr = + (struct psz_kmsg_header *)buffer->data; + + hdr->magic = PSTORE_KMSG_HEADER_MAGIC; + hdr->compressed = record->compressed; + hdr->time.tv_sec = record->time.tv_sec; + hdr->time.tv_nsec = record->time.tv_nsec; + hdr->reason = record->reason; + if (hdr->reason == KMSG_DUMP_OOPS) + hdr->counter = ++cxt->oops_counter; + else if (hdr->reason == KMSG_DUMP_PANIC) + hdr->counter = ++cxt->panic_counter; + else + hdr->counter = 0; +} + +static inline int notrace psz_kmsg_write_record(struct psz_context *cxt, + struct pstore_record *record) +{ + size_t size, hlen; + struct pstore_zone *zone; + unsigned int zonenum; + + zonenum = cxt->kmsg_write_cnt; + zone = cxt->kpszs[zonenum]; + if (unlikely(!zone)) + return -ENOSPC; + cxt->kmsg_write_cnt = (zonenum + 1) % cxt->kmsg_max_cnt; + + pr_debug("write %s to zone id %d\n", zone->name, zonenum); + psz_write_kmsg_hdr(zone, record); + hlen = sizeof(struct psz_kmsg_header); + size = min_t(size_t, record->size, zone->buffer_size - hlen); + return psz_zone_write(zone, FLUSH_ALL, record->buf, size, hlen); +} + +static int notrace psz_kmsg_write(struct psz_context *cxt, + struct pstore_record *record) +{ + int ret; + + /* + * Explicitly only take the first part of any new crash. + * If our buffer is larger than kmsg_bytes, this can never happen, + * and if our buffer is smaller than kmsg_bytes, we don't want the + * report split across multiple records. + */ + if (record->part != 1) + return -ENOSPC; + + if (!cxt->kpszs) + return -ENOSPC; + + ret = psz_kmsg_write_record(cxt, record); + if (!ret) { + pr_debug("try to flush other dirty zones\n"); + psz_flush_dirty_zones(cxt->kpszs, cxt->kmsg_max_cnt); + } + + /* always return 0 as we had handled it on buffer */ + return 0; +} + +static int notrace psz_pstore_write(struct pstore_record *record) +{ + struct psz_context *cxt = record->psi->data; + + if (record->type == PSTORE_TYPE_DMESG && + record->reason == KMSG_DUMP_PANIC) + atomic_set(&cxt->on_panic, 1); + + switch (record->type) { + case PSTORE_TYPE_DMESG: + return psz_kmsg_write(cxt, record); + default: + return -EINVAL; + } +} + +static struct pstore_zone *psz_read_next_zone(struct psz_context *cxt) +{ + struct pstore_zone *zone = NULL; + + while (cxt->kmsg_read_cnt < cxt->kmsg_max_cnt) { + zone = cxt->kpszs[cxt->kmsg_read_cnt++]; + if (psz_ok(zone)) + return zone; + } + + return NULL; +} + +static int psz_kmsg_read_hdr(struct pstore_zone *zone, + struct pstore_record *record) +{ + struct psz_buffer *buffer = zone->buffer; + struct psz_kmsg_header *hdr = + (struct psz_kmsg_header *)buffer->data; + + if (hdr->magic != PSTORE_KMSG_HEADER_MAGIC) + return -EINVAL; + record->compressed = hdr->compressed; + record->time.tv_sec = hdr->time.tv_sec; + record->time.tv_nsec = hdr->time.tv_nsec; + record->reason = hdr->reason; + record->count = hdr->counter; + return 0; +} + +static ssize_t psz_kmsg_read(struct pstore_zone *zone, + struct pstore_record *record) +{ + ssize_t size, hlen = 0; + + size = buffer_datalen(zone); + /* Clear and skip this kmsg dump record if it has no valid header */ + if (psz_kmsg_read_hdr(zone, record)) { + atomic_set(&zone->buffer->datalen, 0); + atomic_set(&zone->dirty, 0); + return -ENOMSG; + } + size -= sizeof(struct psz_kmsg_header); + + if (!record->compressed) { + char *buf = kasprintf(GFP_KERNEL, "%s: Total %d times\n", + kmsg_dump_reason_str(record->reason), + record->count); + hlen = strlen(buf); + record->buf = krealloc(buf, hlen + size, GFP_KERNEL); + if (!record->buf) { + kfree(buf); + return -ENOMEM; + } + } else { + record->buf = kmalloc(size, GFP_KERNEL); + if (!record->buf) + return -ENOMEM; + } + + size = psz_zone_read(zone, record->buf + hlen, size, + sizeof(struct psz_kmsg_header)); + if (unlikely(size < 0)) { + kfree(record->buf); + return -ENOMSG; + } + + return size + hlen; +} + +static ssize_t psz_pstore_read(struct pstore_record *record) +{ + struct psz_context *cxt = record->psi->data; + ssize_t (*readop)(struct pstore_zone *zone, + struct pstore_record *record); + struct pstore_zone *zone; + ssize_t ret; + + /* before read, we must recover from storage */ + ret = psz_recovery(cxt); + if (ret) + return ret; + +next_zone: + zone = psz_read_next_zone(cxt); + if (!zone) + return 0; + + record->type = zone->type; + switch (record->type) { + case PSTORE_TYPE_DMESG: + readop = psz_kmsg_read; + record->id = cxt->kmsg_read_cnt - 1; + break; + default: + goto next_zone; + } + + ret = readop(zone, record); + if (ret == -ENOMSG) + goto next_zone; + return ret; +} + +static struct psz_context pstore_zone_cxt = { + .pstore_zone_info_lock = + __MUTEX_INITIALIZER(pstore_zone_cxt.pstore_zone_info_lock), + .recovered = ATOMIC_INIT(0), + .on_panic = ATOMIC_INIT(0), + .pstore = { + .owner = THIS_MODULE, + .open = psz_pstore_open, + .read = psz_pstore_read, + .write = psz_pstore_write, + .erase = psz_pstore_erase, + }, +}; + +static void psz_free_zone(struct pstore_zone **pszone) +{ + struct pstore_zone *zone = *pszone; + + if (!zone) + return; + + kfree(zone->buffer); + kfree(zone); + *pszone = NULL; +} + +static void psz_free_zones(struct pstore_zone ***pszones, unsigned int *cnt) +{ + struct pstore_zone **zones = *pszones; + + if (!zones) + return; + + while (*cnt > 0) { + (*cnt)--; + psz_free_zone(&(zones[*cnt])); + } + kfree(zones); + *pszones = NULL; +} + +static void psz_free_all_zones(struct psz_context *cxt) +{ + if (cxt->kpszs) + psz_free_zones(&cxt->kpszs, &cxt->kmsg_max_cnt); +} + +static struct pstore_zone *psz_init_zone(enum pstore_type_id type, + loff_t *off, size_t size) +{ + struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info; + struct pstore_zone *zone; + const char *name = pstore_type_to_name(type); + + if (!size) + return NULL; + + if (*off + size > info->total_size) { + pr_err("no room for %s (0x%zx@0x%llx over 0x%lx)\n", + name, size, *off, info->total_size); + return ERR_PTR(-ENOMEM); + } + + zone = kzalloc(sizeof(struct pstore_zone), GFP_KERNEL); + if (!zone) + return ERR_PTR(-ENOMEM); + + zone->buffer = kmalloc(size, GFP_KERNEL); + if (!zone->buffer) { + kfree(zone); + return ERR_PTR(-ENOMEM); + } + memset(zone->buffer, 0xFF, size); + zone->off = *off; + zone->name = name; + zone->type = type; + zone->buffer_size = size - sizeof(struct psz_buffer); + zone->buffer->sig = type ^ PSZ_SIG; + atomic_set(&zone->dirty, 0); + atomic_set(&zone->buffer->datalen, 0); + + *off += size; + + pr_debug("pszone %s: off 0x%llx, %zu header, %zu data\n", zone->name, + zone->off, sizeof(*zone->buffer), zone->buffer_size); + return zone; +} + +static struct pstore_zone **psz_init_zones(enum pstore_type_id type, + loff_t *off, size_t total_size, ssize_t record_size, + unsigned int *cnt) +{ + struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info; + struct pstore_zone **zones, *zone; + const char *name = pstore_type_to_name(type); + int c, i; + + *cnt = 0; + if (!total_size || !record_size) + return NULL; + + if (*off + total_size > info->total_size) { + pr_err("no room for zones %s (0x%zx@0x%llx over 0x%lx)\n", + name, total_size, *off, info->total_size); + return ERR_PTR(-ENOMEM); + } + + c = total_size / record_size; + zones = kcalloc(c, sizeof(*zones), GFP_KERNEL); + if (!zones) { + pr_err("allocate for zones %s failed\n", name); + return ERR_PTR(-ENOMEM); + } + memset(zones, 0, c * sizeof(*zones)); + + for (i = 0; i < c; i++) { + zone = psz_init_zone(type, off, record_size); + if (!zone || IS_ERR(zone)) { + pr_err("initialize zones %s failed\n", name); + psz_free_zones(&zones, &i); + return (void *)zone; + } + zones[i] = zone; + } + + *cnt = c; + return zones; +} + +static int psz_alloc_zones(struct psz_context *cxt) +{ + struct pstore_zone_info *info = cxt->pstore_zone_info; + loff_t off = 0; + int err; + size_t size; + + size = info->total_size; + cxt->kpszs = psz_init_zones(PSTORE_TYPE_DMESG, &off, size, + info->kmsg_size, &cxt->kmsg_max_cnt); + if (IS_ERR(cxt->kpszs)) { + err = PTR_ERR(cxt->kpszs); + cxt->kpszs = NULL; + goto fail_out; + } + + return 0; +fail_out: + return err; +} + +/** + * register_pstore_zone() - register to pstore/zone + * + * @info: back-end driver information. See &struct pstore_zone_info. + * + * Only one back-end at one time. + * + * Return: 0 on success, others on failure. + */ +int register_pstore_zone(struct pstore_zone_info *info) +{ + int err = -EINVAL; + struct psz_context *cxt = &pstore_zone_cxt; + + if (info->total_size < 4096) { + pr_warn("total_size must be >= 4096\n"); + return -EINVAL; + } + + if (!info->kmsg_size) { + pr_warn("at least one record size must be non-zero\n"); + return -EINVAL; + } + + if (!info->name || !info->name[0]) + return -EINVAL; + +#define check_size(name, size) { \ + if (info->name > 0 && info->name < (size)) { \ + pr_err(#name " must be over %d\n", (size)); \ + return -EINVAL; \ + } \ + if (info->name & (size - 1)) { \ + pr_err(#name " must be a multiple of %d\n", \ + (size)); \ + return -EINVAL; \ + } \ + } + + check_size(total_size, 4096); + check_size(kmsg_size, SECTOR_SIZE); + +#undef check_size + + /* + * the @read and @write must be applied. + * if no @read, pstore may mount failed. + * if no @write, pstore do not support to remove record file. + */ + if (!info->read || !info->write) { + pr_err("no valid general read/write interface\n"); + return -EINVAL; + } + + mutex_lock(&cxt->pstore_zone_info_lock); + if (cxt->pstore_zone_info) { + pr_warn("'%s' already loaded: ignoring '%s'\n", + cxt->pstore_zone_info->name, info->name); + mutex_unlock(&cxt->pstore_zone_info_lock); + return -EBUSY; + } + cxt->pstore_zone_info = info; + + pr_debug("register %s with properties:\n", info->name); + pr_debug("\ttotal size : %ld Bytes\n", info->total_size); + pr_debug("\tkmsg size : %ld Bytes\n", info->kmsg_size); + + err = psz_alloc_zones(cxt); + if (err) { + pr_err("alloc zones failed\n"); + goto fail_out; + } + + if (info->kmsg_size) { + cxt->pstore.bufsize = cxt->kpszs[0]->buffer_size - + sizeof(struct psz_kmsg_header); + cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL); + if (!cxt->pstore.buf) { + err = -ENOMEM; + goto fail_free; + } + } + cxt->pstore.data = cxt; + + pr_info("registered %s as backend for", info->name); + cxt->pstore.max_reason = info->max_reason; + cxt->pstore.name = info->name; + if (info->kmsg_size) { + cxt->pstore.flags |= PSTORE_FLAGS_DMESG; + pr_cont(" kmsg(%s", + kmsg_dump_reason_str(cxt->pstore.max_reason)); + if (cxt->pstore_zone_info->panic_write) + pr_cont(",panic_write"); + pr_cont(")"); + } + pr_cont("\n"); + + err = pstore_register(&cxt->pstore); + if (err) { + pr_err("registering with pstore failed\n"); + goto fail_free; + } + mutex_unlock(&pstore_zone_cxt.pstore_zone_info_lock); + + return 0; + +fail_free: + kfree(cxt->pstore.buf); + cxt->pstore.buf = NULL; + cxt->pstore.bufsize = 0; + psz_free_all_zones(cxt); +fail_out: + pstore_zone_cxt.pstore_zone_info = NULL; + mutex_unlock(&pstore_zone_cxt.pstore_zone_info_lock); + return err; +} +EXPORT_SYMBOL_GPL(register_pstore_zone); + +/** + * unregister_pstore_zone() - unregister to pstore/zone + * + * @info: back-end driver information. See struct pstore_zone_info. + */ +void unregister_pstore_zone(struct pstore_zone_info *info) +{ + struct psz_context *cxt = &pstore_zone_cxt; + + mutex_lock(&cxt->pstore_zone_info_lock); + if (!cxt->pstore_zone_info) { + mutex_unlock(&cxt->pstore_zone_info_lock); + return; + } + + /* Stop incoming writes from pstore. */ + pstore_unregister(&cxt->pstore); + + /* Clean up allocations. */ + kfree(cxt->pstore.buf); + cxt->pstore.buf = NULL; + cxt->pstore.bufsize = 0; + cxt->pstore_zone_info = NULL; + + psz_free_all_zones(cxt); + + /* Clear counters and zone state. */ + cxt->oops_counter = 0; + cxt->panic_counter = 0; + atomic_set(&cxt->recovered, 0); + atomic_set(&cxt->on_panic, 0); + + mutex_unlock(&cxt->pstore_zone_info_lock); +} +EXPORT_SYMBOL_GPL(unregister_pstore_zone); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("WeiXiong Liao "); +MODULE_AUTHOR("Kees Cook "); +MODULE_DESCRIPTION("Storage Manager for pstore/blk"); diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h new file mode 100644 index 000000000000..eb005d9ae40c --- /dev/null +++ b/include/linux/pstore_zone.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __PSTORE_ZONE_H_ +#define __PSTORE_ZONE_H_ + +#include + +typedef ssize_t (*pstore_zone_read_op)(char *, size_t, loff_t); +typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t); +/** + * struct pstore_zone_info - pstore/zone back-end driver structure + * + * @owner: Module which is responsible for this back-end driver. + * @name: Name of the back-end driver. + * @total_size: The total size in bytes pstore/zone can use. It must be greater + * than 4096 and be multiple of 4096. + * @kmsg_size: The size of oops/panic zone. Zero means disabled, otherwise, + * it must be multiple of SECTOR_SIZE(512 Bytes). + * @max_reason: Maximum kmsg dump reason to store. + * @read: The general read operation. Both of the function parameters + * @size and @offset are relative value to storage. + * On success, the number of bytes should be returned, others + * means error. + * @write: The same as @read. + * @panic_write:The write operation only used for panic case. It's optional + * if you do not care panic log. The parameters and return value + * are the same as @read. + */ +struct pstore_zone_info { + struct module *owner; + const char *name; + + unsigned long total_size; + unsigned long kmsg_size; + int max_reason; + pstore_zone_read_op read; + pstore_zone_write_op write; + pstore_zone_write_op panic_write; +}; + +extern int register_pstore_zone(struct pstore_zone_info *info); +extern void unregister_pstore_zone(struct pstore_zone_info *info); + +#endif -- cgit v1.2.3 From 17639f67c1d61aba3c05e7703f75cd468f9d484f Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:54:57 +0800 Subject: pstore/blk: Introduce backend for block devices pstore/blk is similar to pstore/ram, but uses a block device as the storage rather than persistent ram. The pstore/blk backend solves two common use-cases that used to preclude using pstore/ram: - not all devices have a battery that could be used to persist regular RAM across power failures. - most embedded intelligent equipment have no persistent ram, which increases costs, instead preferring cheaper solutions, like block devices. pstore/blk provides separate configurations for the end user and for the block drivers. User configuration determines how pstore/blk operates, such as record sizes, max kmsg dump reasons, etc. These can be set by Kconfig and/or module parameters, but module parameter have priority over Kconfig. Driver configuration covers all the details about the target block device, such as total size of the device and how to perform read/write operations. These are provided by block drivers, calling pstore_register_blkdev(), including an optional panic_write callback used to bypass regular IO APIs in an effort to avoid potentially destabilized kernel code during a panic. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-3-keescook@chromium.org/ Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- fs/pstore/Kconfig | 64 +++++++ fs/pstore/Makefile | 3 + fs/pstore/blk.c | 434 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/pstore_blk.h | 51 ++++++ 4 files changed, 552 insertions(+) create mode 100644 fs/pstore/blk.c create mode 100644 include/linux/pstore_blk.h (limited to 'include/linux') diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 98d2457bdd9f..958bec75f907 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -160,3 +160,67 @@ config PSTORE_ZONE help The common layer for pstore/blk (and pstore/ram in the future) to manage storage in zones. + +config PSTORE_BLK + tristate "Log panic/oops to a block device" + depends on PSTORE + depends on BLOCK + select PSTORE_ZONE + default n + help + This enables panic and oops message to be logged to a block dev + where it can be read back at some later point. + + If unsure, say N. + +config PSTORE_BLK_BLKDEV + string "block device identifier" + depends on PSTORE_BLK + default "" + help + Which block device should be used for pstore/blk. + + It accept the following variants: + 1) device number in hexadecimal representation, + with no leading 0x, for example b302. + 2) /dev/ represents the device number of disk + 3) /dev/ represents the device number + of partition - device number of disk plus the partition number + 4) /dev/p - same as the above, this form is + used when disk name of partitioned disk ends with a digit. + 5) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the + unique id of a partition if the partition table provides it. + The UUID may be either an EFI/GPT UUID, or refer to an MSDOS + partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero- + filled hex representation of the 32-bit "NT disk signature", and PP + is a zero-filled hex representation of the 1-based partition number. + 6) PARTUUID=/PARTNROFF= to select a partition in relation + to a partition with a known unique id. + 7) : major and minor number of the device separated by + a colon. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_KMSG_SIZE + int "Size in Kbytes of kmsg dump log to store" + depends on PSTORE_BLK + default 64 + help + This just sets size of kmsg dump (oops, panic, etc) log for + pstore/blk. The size is in KB and must be a multiple of 4. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_MAX_REASON + int "Maximum kmsg dump reason to store" + depends on PSTORE_BLK + default 2 + help + The maximum reason for kmsg dumps to store. The default is + 2 (KMSG_DUMP_OOPS), see include/linux/kmsg_dump.h's + enum kmsg_dump_reason for more details. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile index 58a967cbe4af..c270467aeece 100644 --- a/fs/pstore/Makefile +++ b/fs/pstore/Makefile @@ -15,3 +15,6 @@ obj-$(CONFIG_PSTORE_RAM) += ramoops.o pstore_zone-objs += zone.o obj-$(CONFIG_PSTORE_ZONE) += pstore_zone.o + +pstore_blk-objs += blk.o +obj-$(CONFIG_PSTORE_BLK) += pstore_blk.o diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c new file mode 100644 index 000000000000..8f131c6e412e --- /dev/null +++ b/fs/pstore/blk.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Implements pstore backend driver that write to block (or non-block) storage + * devices, using the pstore/zone API. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include "../../block/blk.h" +#include +#include +#include +#include +#include +#include +#include +#include + +static long kmsg_size = CONFIG_PSTORE_BLK_KMSG_SIZE; +module_param(kmsg_size, long, 0400); +MODULE_PARM_DESC(kmsg_size, "kmsg dump record size in kbytes"); + +static int max_reason = CONFIG_PSTORE_BLK_MAX_REASON; +module_param(max_reason, int, 0400); +MODULE_PARM_DESC(max_reason, + "maximum reason for kmsg dump (default 2: Oops and Panic)"); + +/* + * blkdev - the block device to use for pstore storage + * + * Usually, this will be a partition of a block device. + * + * blkdev accepts the following variants: + * 1) device number in hexadecimal representation, + * with no leading 0x, for example b302. + * 2) /dev/ represents the device number of disk + * 3) /dev/ represents the device number + * of partition - device number of disk plus the partition number + * 4) /dev/p - same as the above, that form is + * used when disk name of partitioned disk ends on a digit. + * 5) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the + * unique id of a partition if the partition table provides it. + * The UUID may be either an EFI/GPT UUID, or refer to an MSDOS + * partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero- + * filled hex representation of the 32-bit "NT disk signature", and PP + * is a zero-filled hex representation of the 1-based partition number. + * 6) PARTUUID=/PARTNROFF= to select a partition in relation to + * a partition with a known unique id. + * 7) : major and minor number of the device separated by + * a colon. + */ +static char blkdev[80] = CONFIG_PSTORE_BLK_BLKDEV; +module_param_string(blkdev, blkdev, 80, 0400); +MODULE_PARM_DESC(blkdev, "block device for pstore storage"); + +/* + * All globals must only be accessed under the pstore_blk_lock + * during the register/unregister functions. + */ +static DEFINE_MUTEX(pstore_blk_lock); +static struct block_device *psblk_bdev; +static struct pstore_zone_info *pstore_zone_info; +static pstore_blk_panic_write_op blkdev_panic_write; + +struct bdev_info { + dev_t devt; + sector_t nr_sects; + sector_t start_sect; +}; + +/** + * struct pstore_device_info - back-end pstore/blk driver structure. + * + * @total_size: The total size in bytes pstore/blk can use. It must be greater + * than 4096 and be multiple of 4096. + * @flags: Refer to macro starting with PSTORE_FLAGS defined in + * linux/pstore.h. It means what front-ends this device support. + * Zero means all backends for compatible. + * @read: The general read operation. Both of the function parameters + * @size and @offset are relative value to bock device (not the + * whole disk). + * On success, the number of bytes should be returned, others + * means error. + * @write: The same as @read. + * @panic_write:The write operation only used for panic case. It's optional + * if you do not care panic log. The parameters and return value + * are the same as @read. + */ +struct pstore_device_info { + unsigned long total_size; + unsigned int flags; + pstore_zone_read_op read; + pstore_zone_write_op write; + pstore_zone_write_op panic_write; +}; + +static int psblk_register_do(struct pstore_device_info *dev) +{ + int ret; + + if (!dev || !dev->total_size || !dev->read || !dev->write) + return -EINVAL; + + mutex_lock(&pstore_blk_lock); + + /* someone already registered before */ + if (pstore_zone_info) { + mutex_unlock(&pstore_blk_lock); + return -EBUSY; + } + pstore_zone_info = kzalloc(sizeof(struct pstore_zone_info), GFP_KERNEL); + if (!pstore_zone_info) { + mutex_unlock(&pstore_blk_lock); + return -ENOMEM; + } + + /* zero means not limit on which backends to attempt to store. */ + if (!dev->flags) + dev->flags = UINT_MAX; + +#define verify_size(name, alignsize, enabled) { \ + long _##name_ = (enabled) ? (name) : 0; \ + _##name_ = _##name_ <= 0 ? 0 : (_##name_ * 1024); \ + if (_##name_ & ((alignsize) - 1)) { \ + pr_info(#name " must align to %d\n", \ + (alignsize)); \ + _##name_ = ALIGN(name, (alignsize)); \ + } \ + name = _##name_ / 1024; \ + pstore_zone_info->name = _##name_; \ + } + + verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG); +#undef verify_size + + pstore_zone_info->total_size = dev->total_size; + pstore_zone_info->max_reason = max_reason; + pstore_zone_info->read = dev->read; + pstore_zone_info->write = dev->write; + pstore_zone_info->panic_write = dev->panic_write; + pstore_zone_info->name = KBUILD_MODNAME; + pstore_zone_info->owner = THIS_MODULE; + + ret = register_pstore_zone(pstore_zone_info); + if (ret) { + kfree(pstore_zone_info); + pstore_zone_info = NULL; + } + mutex_unlock(&pstore_blk_lock); + return ret; +} + +static void psblk_unregister_do(struct pstore_device_info *dev) +{ + mutex_lock(&pstore_blk_lock); + if (pstore_zone_info && pstore_zone_info->read == dev->read) { + unregister_pstore_zone(pstore_zone_info); + kfree(pstore_zone_info); + pstore_zone_info = NULL; + } + mutex_unlock(&pstore_blk_lock); +} + +/** + * psblk_get_bdev() - open block device + * + * @holder: Exclusive holder identifier + * @info: Information about bdev to fill in + * + * Return: pointer to block device on success and others on error. + * + * On success, the returned block_device has reference count of one. + */ +static struct block_device *psblk_get_bdev(void *holder, + struct bdev_info *info) +{ + struct block_device *bdev = ERR_PTR(-ENODEV); + fmode_t mode = FMODE_READ | FMODE_WRITE; + sector_t nr_sects; + + lockdep_assert_held(&pstore_blk_lock); + + if (pstore_zone_info) + return ERR_PTR(-EBUSY); + + if (!blkdev[0]) + return ERR_PTR(-ENODEV); + + if (holder) + mode |= FMODE_EXCL; + bdev = blkdev_get_by_path(blkdev, mode, holder); + if (IS_ERR(bdev)) { + dev_t devt; + + devt = name_to_dev_t(blkdev); + if (devt == 0) + return ERR_PTR(-ENODEV); + bdev = blkdev_get_by_dev(devt, mode, holder); + if (IS_ERR(bdev)) + return bdev; + } + + nr_sects = part_nr_sects_read(bdev->bd_part); + if (!nr_sects) { + pr_err("not enough space for '%s'\n", blkdev); + blkdev_put(bdev, mode); + return ERR_PTR(-ENOSPC); + } + + if (info) { + info->devt = bdev->bd_dev; + info->nr_sects = nr_sects; + info->start_sect = get_start_sect(bdev); + } + + return bdev; +} + +static void psblk_put_bdev(struct block_device *bdev, void *holder) +{ + fmode_t mode = FMODE_READ | FMODE_WRITE; + + lockdep_assert_held(&pstore_blk_lock); + + if (!bdev) + return; + + if (holder) + mode |= FMODE_EXCL; + blkdev_put(bdev, mode); +} + +static ssize_t psblk_generic_blk_read(char *buf, size_t bytes, loff_t pos) +{ + struct block_device *bdev = psblk_bdev; + struct file file; + struct kiocb kiocb; + struct iov_iter iter; + struct kvec iov = {.iov_base = buf, .iov_len = bytes}; + + if (!bdev) + return -ENODEV; + + memset(&file, 0, sizeof(struct file)); + file.f_mapping = bdev->bd_inode->i_mapping; + file.f_flags = O_DSYNC | __O_SYNC | O_NOATIME; + file.f_inode = bdev->bd_inode; + file_ra_state_init(&file.f_ra, file.f_mapping); + + init_sync_kiocb(&kiocb, &file); + kiocb.ki_pos = pos; + iov_iter_kvec(&iter, READ, &iov, 1, bytes); + + return generic_file_read_iter(&kiocb, &iter); +} + +static ssize_t psblk_generic_blk_write(const char *buf, size_t bytes, + loff_t pos) +{ + struct block_device *bdev = psblk_bdev; + struct iov_iter iter; + struct kiocb kiocb; + struct file file; + ssize_t ret; + struct kvec iov = {.iov_base = (void *)buf, .iov_len = bytes}; + + if (!bdev) + return -ENODEV; + + /* Console/Ftrace backend may handle buffer until flush dirty zones */ + if (in_interrupt() || irqs_disabled()) + return -EBUSY; + + memset(&file, 0, sizeof(struct file)); + file.f_mapping = bdev->bd_inode->i_mapping; + file.f_flags = O_DSYNC | __O_SYNC | O_NOATIME; + file.f_inode = bdev->bd_inode; + + init_sync_kiocb(&kiocb, &file); + kiocb.ki_pos = pos; + iov_iter_kvec(&iter, WRITE, &iov, 1, bytes); + + inode_lock(bdev->bd_inode); + ret = generic_write_checks(&kiocb, &iter); + if (ret > 0) + ret = generic_perform_write(&file, &iter, pos); + inode_unlock(bdev->bd_inode); + + if (likely(ret > 0)) { + const struct file_operations f_op = {.fsync = blkdev_fsync}; + + file.f_op = &f_op; + kiocb.ki_pos += ret; + ret = generic_write_sync(&kiocb, ret); + } + return ret; +} + +static ssize_t psblk_blk_panic_write(const char *buf, size_t size, + loff_t off) +{ + int ret; + + if (!blkdev_panic_write) + return -EOPNOTSUPP; + + /* size and off must align to SECTOR_SIZE for block device */ + ret = blkdev_panic_write(buf, off >> SECTOR_SHIFT, + size >> SECTOR_SHIFT); + return ret ? -EIO : size; +} + +static int __register_pstore_blk(struct pstore_blk_info *info) +{ + char bdev_name[BDEVNAME_SIZE]; + struct block_device *bdev; + struct pstore_device_info dev; + struct bdev_info binfo; + void *holder = blkdev; + int ret = -ENODEV; + + lockdep_assert_held(&pstore_blk_lock); + + /* hold bdev exclusively */ + memset(&binfo, 0, sizeof(binfo)); + bdev = psblk_get_bdev(holder, &binfo); + if (IS_ERR(bdev)) { + pr_err("failed to open '%s'!\n", blkdev); + return PTR_ERR(bdev); + } + + /* only allow driver matching the @blkdev */ + if (!binfo.devt || MAJOR(binfo.devt) != info->major) { + pr_debug("invalid major %u (expect %u)\n", + info->major, MAJOR(binfo.devt)); + ret = -ENODEV; + goto err_put_bdev; + } + + /* psblk_bdev must be assigned before register to pstore/blk */ + psblk_bdev = bdev; + blkdev_panic_write = info->panic_write; + + /* Copy back block device details. */ + info->devt = binfo.devt; + info->nr_sects = binfo.nr_sects; + info->start_sect = binfo.start_sect; + + memset(&dev, 0, sizeof(dev)); + dev.total_size = info->nr_sects << SECTOR_SHIFT; + dev.flags = info->flags; + dev.read = psblk_generic_blk_read; + dev.write = psblk_generic_blk_write; + dev.panic_write = info->panic_write ? psblk_blk_panic_write : NULL; + + ret = psblk_register_do(&dev); + if (ret) + goto err_put_bdev; + + bdevname(bdev, bdev_name); + pr_info("attached %s%s\n", bdev_name, + info->panic_write ? "" : " (no dedicated panic_write!)"); + return 0; + +err_put_bdev: + psblk_bdev = NULL; + blkdev_panic_write = NULL; + psblk_put_bdev(bdev, holder); + return ret; +} + +/** + * register_pstore_blk() - register block device to pstore/blk + * + * @info: details on the desired block device interface + * + * Return: + * * 0 - OK + * * Others - something error. + */ +int register_pstore_blk(struct pstore_blk_info *info) +{ + int ret; + + mutex_lock(&pstore_blk_lock); + ret = __register_pstore_blk(info); + mutex_unlock(&pstore_blk_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(register_pstore_blk); + +static void __unregister_pstore_blk(unsigned int major) +{ + struct pstore_device_info dev = { .read = psblk_generic_blk_read }; + void *holder = blkdev; + + lockdep_assert_held(&pstore_blk_lock); + if (psblk_bdev && MAJOR(psblk_bdev->bd_dev) == major) { + psblk_unregister_do(&dev); + psblk_put_bdev(psblk_bdev, holder); + blkdev_panic_write = NULL; + psblk_bdev = NULL; + } +} + +/** + * unregister_pstore_blk() - unregister block device from pstore/blk + * + * @major: the major device number of device + */ +void unregister_pstore_blk(unsigned int major) +{ + mutex_lock(&pstore_blk_lock); + __unregister_pstore_blk(major); + mutex_unlock(&pstore_blk_lock); +} +EXPORT_SYMBOL_GPL(unregister_pstore_blk); + +static void __exit pstore_blk_exit(void) +{ + mutex_lock(&pstore_blk_lock); + if (psblk_bdev) + __unregister_pstore_blk(MAJOR(psblk_bdev->bd_dev)); + mutex_unlock(&pstore_blk_lock); +} +module_exit(pstore_blk_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("WeiXiong Liao "); +MODULE_AUTHOR("Kees Cook "); +MODULE_DESCRIPTION("pstore backend for block devices"); diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h new file mode 100644 index 000000000000..4501977b1336 --- /dev/null +++ b/include/linux/pstore_blk.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __PSTORE_BLK_H_ +#define __PSTORE_BLK_H_ + +#include +#include +#include + +/** + * typedef pstore_blk_panic_write_op - panic write operation to block device + * + * @buf: the data to write + * @start_sect: start sector to block device + * @sects: sectors count on buf + * + * Return: On success, zero should be returned. Others mean error. + * + * Panic write to block device must be aligned to SECTOR_SIZE. + */ +typedef int (*pstore_blk_panic_write_op)(const char *buf, sector_t start_sect, + sector_t sects); + +/** + * struct pstore_blk_info - pstore/blk registration details + * + * @major: Which major device number to support with pstore/blk + * @flags: The supported PSTORE_FLAGS_* from linux/pstore.h. + * @panic_write:The write operation only used for the panic case. + * This can be NULL, but is recommended to avoid losing + * crash data if the kernel's IO path or work queues are + * broken during a panic. + * @devt: The dev_t that pstore/blk has attached to. + * @nr_sects: Number of sectors on @devt. + * @start_sect: Starting sector on @devt. + */ +struct pstore_blk_info { + unsigned int major; + unsigned int flags; + pstore_blk_panic_write_op panic_write; + + /* Filled in by pstore/blk after registration. */ + dev_t devt; + sector_t nr_sects; + sector_t start_sect; +}; + +int register_pstore_blk(struct pstore_blk_info *info); +void unregister_pstore_blk(unsigned int major); + +#endif -- cgit v1.2.3 From 0dc068265a1c5923ffebf40388fbe93050a77ad1 Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:54:59 +0800 Subject: pstore/zone,blk: Add support for pmsg frontend Add pmsg support to pstore/blk (through pstore/zone). To enable, pmsg_size must be greater than 0 and a multiple of 4096. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-4-keescook@chromium.org/ Co-developed-by: Colin Ian King Signed-off-by: Colin Ian King Link: https://lore.kernel.org/lkml/20200512171932.222102-1-colin.king@canonical.com Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- fs/pstore/Kconfig | 12 ++ fs/pstore/blk.c | 9 ++ fs/pstore/zone.c | 268 ++++++++++++++++++++++++++++++++++++++++++-- include/linux/pstore_zone.h | 2 + 4 files changed, 282 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 958bec75f907..ef01c48f0ff7 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -224,3 +224,15 @@ config PSTORE_BLK_MAX_REASON NOTE that, both Kconfig and module parameters can configure pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_PMSG_SIZE + int "Size in Kbytes of pmsg to store" + depends on PSTORE_BLK + depends on PSTORE_PMSG + default 64 + help + This just sets size of pmsg (pmsg_size) for pstore/blk. The size is + in KB and must be a multiple of 4. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index 8f131c6e412e..a9e7078f08d6 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -27,6 +27,14 @@ module_param(max_reason, int, 0400); MODULE_PARM_DESC(max_reason, "maximum reason for kmsg dump (default 2: Oops and Panic)"); +#if IS_ENABLED(CONFIG_PSTORE_PMSG) +static long pmsg_size = CONFIG_PSTORE_BLK_PMSG_SIZE; +#else +static long pmsg_size = -1; +#endif +module_param(pmsg_size, long, 0400); +MODULE_PARM_DESC(pmsg_size, "pmsg size in kbytes"); + /* * blkdev - the block device to use for pstore storage * @@ -133,6 +141,7 @@ static int psblk_register_do(struct pstore_device_info *dev) } verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG); + verify_size(pmsg_size, 4096, dev->flags & PSTORE_FLAGS_PMSG); #undef verify_size pstore_zone_info->total_size = dev->total_size; diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index 362d72a24012..2f8e3b349edb 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -27,12 +27,14 @@ * * @sig: signature to indicate header (PSZ_SIG xor PSZONE-type value) * @datalen: length of data in @data + * @start: offset into @data where the beginning of the stored bytes begin * @data: zone data. */ struct psz_buffer { #define PSZ_SIG (0x43474244) /* DBGC */ uint32_t sig; atomic_t datalen; + atomic_t start; uint8_t data[]; }; @@ -88,9 +90,11 @@ struct pstore_zone { * struct psz_context - all about running state of pstore/zone * * @kpszs: kmsg dump storage zones + * @ppsz: pmsg storage zone * @kmsg_max_cnt: max count of @kpszs * @kmsg_read_cnt: counter of total read kmsg dumps * @kmsg_write_cnt: counter of total kmsg dump writes + * @pmsg_read_cnt: counter of total read pmsg zone * @oops_counter: counter of oops dumps * @panic_counter: counter of panic dumps * @recovered: whether finished recovering data from storage @@ -101,9 +105,11 @@ struct pstore_zone { */ struct psz_context { struct pstore_zone **kpszs; + struct pstore_zone *ppsz; unsigned int kmsg_max_cnt; unsigned int kmsg_read_cnt; unsigned int kmsg_write_cnt; + unsigned int pmsg_read_cnt; /* * These counters should be calculated during recovery. * It records the oops/panic times after crashes rather than boots. @@ -143,15 +149,20 @@ static inline int buffer_datalen(struct pstore_zone *zone) return atomic_read(&zone->buffer->datalen); } +static inline int buffer_start(struct pstore_zone *zone) +{ + return atomic_read(&zone->buffer->start); +} + static inline bool is_on_panic(void) { return atomic_read(&pstore_zone_cxt.on_panic); } -static ssize_t psz_zone_read(struct pstore_zone *zone, char *buf, +static ssize_t psz_zone_read_buffer(struct pstore_zone *zone, char *buf, size_t len, unsigned long off) { - if (!buf || !zone->buffer) + if (!buf || !zone || !zone->buffer) return -EINVAL; if (off > zone->buffer_size) return -EINVAL; @@ -160,6 +171,18 @@ static ssize_t psz_zone_read(struct pstore_zone *zone, char *buf, return len; } +static int psz_zone_read_oldbuf(struct pstore_zone *zone, char *buf, + size_t len, unsigned long off) +{ + if (!buf || !zone || !zone->oldbuf) + return -EINVAL; + if (off > zone->buffer_size) + return -EINVAL; + len = min_t(size_t, len, zone->buffer_size - off); + memcpy(buf, zone->oldbuf->data + off, len); + return 0; +} + static int psz_zone_write(struct pstore_zone *zone, enum psz_flush_mode flush_mode, const char *buf, size_t len, unsigned long off) @@ -415,6 +438,93 @@ recover_fail: return ret; } +static int psz_recover_zone(struct psz_context *cxt, struct pstore_zone *zone) +{ + struct pstore_zone_info *info = cxt->pstore_zone_info; + struct psz_buffer *oldbuf, tmpbuf; + int ret = 0; + char *buf; + ssize_t rcnt, len, start, off; + + if (!zone || zone->oldbuf) + return 0; + + if (is_on_panic()) { + /* save data as much as possible */ + psz_flush_dirty_zone(zone); + return 0; + } + + if (unlikely(!info->read)) + return -EINVAL; + + len = sizeof(struct psz_buffer); + rcnt = info->read((char *)&tmpbuf, len, zone->off); + if (rcnt != len) { + pr_debug("read zone %s failed\n", zone->name); + return (int)rcnt < 0 ? (int)rcnt : -EIO; + } + + if (tmpbuf.sig != zone->buffer->sig) { + pr_debug("no valid data in zone %s\n", zone->name); + return 0; + } + + if (zone->buffer_size < atomic_read(&tmpbuf.datalen) || + zone->buffer_size < atomic_read(&tmpbuf.start)) { + pr_info("found overtop zone: %s: off %lld, size %zu\n", + zone->name, zone->off, zone->buffer_size); + /* just keep going */ + return 0; + } + + if (!atomic_read(&tmpbuf.datalen)) { + pr_debug("found erased zone: %s: off %lld, size %zu, datalen %d\n", + zone->name, zone->off, zone->buffer_size, + atomic_read(&tmpbuf.datalen)); + return 0; + } + + pr_debug("found nice zone: %s: off %lld, size %zu, datalen %d\n", + zone->name, zone->off, zone->buffer_size, + atomic_read(&tmpbuf.datalen)); + + len = atomic_read(&tmpbuf.datalen) + sizeof(*oldbuf); + oldbuf = kzalloc(len, GFP_KERNEL); + if (!oldbuf) + return -ENOMEM; + + memcpy(oldbuf, &tmpbuf, sizeof(*oldbuf)); + buf = (char *)oldbuf + sizeof(*oldbuf); + len = atomic_read(&oldbuf->datalen); + start = atomic_read(&oldbuf->start); + off = zone->off + sizeof(*oldbuf); + + /* get part of data */ + rcnt = info->read(buf, len - start, off + start); + if (rcnt != len - start) { + pr_err("read zone %s failed\n", zone->name); + ret = (int)rcnt < 0 ? (int)rcnt : -EIO; + goto free_oldbuf; + } + + /* get the rest of data */ + rcnt = info->read(buf + len - start, start, off); + if (rcnt != start) { + pr_err("read zone %s failed\n", zone->name); + ret = (int)rcnt < 0 ? (int)rcnt : -EIO; + goto free_oldbuf; + } + + zone->oldbuf = oldbuf; + psz_flush_dirty_zone(zone); + return 0; + +free_oldbuf: + kfree(oldbuf); + return ret; +} + /** * psz_recovery() - recover data from storage * @cxt: the context of pstore/zone @@ -431,7 +541,12 @@ static inline int psz_recovery(struct psz_context *cxt) return 0; ret = psz_kmsg_recover(cxt); + if (ret) + goto out; + ret = psz_recover_zone(cxt, cxt->ppsz); + +out: if (unlikely(ret)) pr_err("recover failed\n"); else { @@ -446,9 +561,17 @@ static int psz_pstore_open(struct pstore_info *psi) struct psz_context *cxt = psi->data; cxt->kmsg_read_cnt = 0; + cxt->pmsg_read_cnt = 0; return 0; } +static inline bool psz_old_ok(struct pstore_zone *zone) +{ + if (zone && zone->oldbuf && atomic_read(&zone->oldbuf->datalen)) + return true; + return false; +} + static inline bool psz_ok(struct pstore_zone *zone) { if (zone && zone->buffer && buffer_datalen(zone)) @@ -473,6 +596,25 @@ static inline int psz_kmsg_erase(struct psz_context *cxt, return psz_zone_write(zone, FLUSH_META, NULL, 0, 0); } +static inline int psz_record_erase(struct psz_context *cxt, + struct pstore_zone *zone) +{ + if (unlikely(!psz_old_ok(zone))) + return 0; + + kfree(zone->oldbuf); + zone->oldbuf = NULL; + /* + * if there are new data in zone buffer, that means the old data + * are already invalid. It is no need to flush 0 (erase) to + * block device. + */ + if (!buffer_datalen(zone)) + return psz_zone_write(zone, FLUSH_META, NULL, 0, 0); + psz_flush_dirty_zone(zone); + return 0; +} + static int psz_pstore_erase(struct pstore_record *record) { struct psz_context *cxt = record->psi->data; @@ -482,6 +624,8 @@ static int psz_pstore_erase(struct pstore_record *record) if (record->id >= cxt->kmsg_max_cnt) return -EINVAL; return psz_kmsg_erase(cxt, cxt->kpszs[record->id], record); + case PSTORE_TYPE_PMSG: + return psz_record_erase(cxt, cxt->ppsz); default: return -EINVAL; } @@ -555,6 +699,55 @@ static int notrace psz_kmsg_write(struct psz_context *cxt, return 0; } +static int notrace psz_record_write(struct pstore_zone *zone, + struct pstore_record *record) +{ + size_t start, rem; + bool is_full_data = false; + char *buf; + int cnt; + + if (!zone || !record) + return -ENOSPC; + + if (atomic_read(&zone->buffer->datalen) >= zone->buffer_size) + is_full_data = true; + + cnt = record->size; + buf = record->buf; + if (unlikely(cnt > zone->buffer_size)) { + buf += cnt - zone->buffer_size; + cnt = zone->buffer_size; + } + + start = buffer_start(zone); + rem = zone->buffer_size - start; + if (unlikely(rem < cnt)) { + psz_zone_write(zone, FLUSH_PART, buf, rem, start); + buf += rem; + cnt -= rem; + start = 0; + is_full_data = true; + } + + atomic_set(&zone->buffer->start, cnt + start); + psz_zone_write(zone, FLUSH_PART, buf, cnt, start); + + /** + * psz_zone_write will set datalen as start + cnt. + * It work if actual data length lesser than buffer size. + * If data length greater than buffer size, pmsg will rewrite to + * beginning of zone, which make buffer->datalen wrongly. + * So we should reset datalen as buffer size once actual data length + * greater than buffer size. + */ + if (is_full_data) { + atomic_set(&zone->buffer->datalen, zone->buffer_size); + psz_zone_write(zone, FLUSH_META, NULL, 0, 0); + } + return 0; +} + static int notrace psz_pstore_write(struct pstore_record *record) { struct psz_context *cxt = record->psi->data; @@ -566,6 +759,8 @@ static int notrace psz_pstore_write(struct pstore_record *record) switch (record->type) { case PSTORE_TYPE_DMESG: return psz_kmsg_write(cxt, record); + case PSTORE_TYPE_PMSG: + return psz_record_write(cxt->ppsz, record); default: return -EINVAL; } @@ -581,6 +776,13 @@ static struct pstore_zone *psz_read_next_zone(struct psz_context *cxt) return zone; } + if (cxt->pmsg_read_cnt == 0) { + cxt->pmsg_read_cnt++; + zone = cxt->ppsz; + if (psz_old_ok(zone)) + return zone; + } + return NULL; } @@ -631,7 +833,7 @@ static ssize_t psz_kmsg_read(struct pstore_zone *zone, return -ENOMEM; } - size = psz_zone_read(zone, record->buf + hlen, size, + size = psz_zone_read_buffer(zone, record->buf + hlen, size, sizeof(struct psz_kmsg_header)); if (unlikely(size < 0)) { kfree(record->buf); @@ -641,6 +843,32 @@ static ssize_t psz_kmsg_read(struct pstore_zone *zone, return size + hlen; } +static ssize_t psz_record_read(struct pstore_zone *zone, + struct pstore_record *record) +{ + size_t len; + struct psz_buffer *buf; + + if (!zone || !record) + return -ENOSPC; + + buf = (struct psz_buffer *)zone->oldbuf; + if (!buf) + return -ENOMSG; + + len = atomic_read(&buf->datalen); + record->buf = kmalloc(len, GFP_KERNEL); + if (!record->buf) + return -ENOMEM; + + if (unlikely(psz_zone_read_oldbuf(zone, record->buf, len, 0))) { + kfree(record->buf); + return -ENOMSG; + } + + return len; +} + static ssize_t psz_pstore_read(struct pstore_record *record) { struct psz_context *cxt = record->psi->data; @@ -665,6 +893,9 @@ next_zone: readop = psz_kmsg_read; record->id = cxt->kmsg_read_cnt - 1; break; + case PSTORE_TYPE_PMSG: + readop = psz_record_read; + break; default: goto next_zone; } @@ -720,6 +951,8 @@ static void psz_free_all_zones(struct psz_context *cxt) { if (cxt->kpszs) psz_free_zones(&cxt->kpszs, &cxt->kmsg_max_cnt); + if (cxt->ppsz) + psz_free_zone(&cxt->ppsz); } static struct pstore_zone *psz_init_zone(enum pstore_type_id type, @@ -753,8 +986,10 @@ static struct pstore_zone *psz_init_zone(enum pstore_type_id type, zone->type = type; zone->buffer_size = size - sizeof(struct psz_buffer); zone->buffer->sig = type ^ PSZ_SIG; + zone->oldbuf = NULL; atomic_set(&zone->dirty, 0); atomic_set(&zone->buffer->datalen, 0); + atomic_set(&zone->buffer->start, 0); *off += size; @@ -809,19 +1044,28 @@ static int psz_alloc_zones(struct psz_context *cxt) struct pstore_zone_info *info = cxt->pstore_zone_info; loff_t off = 0; int err; - size_t size; + size_t off_size = 0; + + off_size += info->pmsg_size; + cxt->ppsz = psz_init_zone(PSTORE_TYPE_PMSG, &off, info->pmsg_size); + if (IS_ERR(cxt->ppsz)) { + err = PTR_ERR(cxt->ppsz); + cxt->ppsz = NULL; + goto free_out; + } - size = info->total_size; - cxt->kpszs = psz_init_zones(PSTORE_TYPE_DMESG, &off, size, + cxt->kpszs = psz_init_zones(PSTORE_TYPE_DMESG, &off, + info->total_size - off_size, info->kmsg_size, &cxt->kmsg_max_cnt); if (IS_ERR(cxt->kpszs)) { err = PTR_ERR(cxt->kpszs); cxt->kpszs = NULL; - goto fail_out; + goto free_out; } return 0; -fail_out: +free_out: + psz_free_all_zones(cxt); return err; } @@ -844,7 +1088,7 @@ int register_pstore_zone(struct pstore_zone_info *info) return -EINVAL; } - if (!info->kmsg_size) { + if (!info->kmsg_size && !info->pmsg_size) { pr_warn("at least one record size must be non-zero\n"); return -EINVAL; } @@ -866,6 +1110,7 @@ int register_pstore_zone(struct pstore_zone_info *info) check_size(total_size, 4096); check_size(kmsg_size, SECTOR_SIZE); + check_size(pmsg_size, SECTOR_SIZE); #undef check_size @@ -891,6 +1136,7 @@ int register_pstore_zone(struct pstore_zone_info *info) pr_debug("register %s with properties:\n", info->name); pr_debug("\ttotal size : %ld Bytes\n", info->total_size); pr_debug("\tkmsg size : %ld Bytes\n", info->kmsg_size); + pr_debug("\tpmsg size : %ld Bytes\n", info->pmsg_size); err = psz_alloc_zones(cxt); if (err) { @@ -920,6 +1166,10 @@ int register_pstore_zone(struct pstore_zone_info *info) pr_cont(",panic_write"); pr_cont(")"); } + if (info->pmsg_size) { + cxt->pstore.flags |= PSTORE_FLAGS_PMSG; + pr_cont(" pmsg"); + } pr_cont("\n"); err = pstore_register(&cxt->pstore); diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h index eb005d9ae40c..29c367a3bd80 100644 --- a/include/linux/pstore_zone.h +++ b/include/linux/pstore_zone.h @@ -17,6 +17,7 @@ typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t); * @kmsg_size: The size of oops/panic zone. Zero means disabled, otherwise, * it must be multiple of SECTOR_SIZE(512 Bytes). * @max_reason: Maximum kmsg dump reason to store. + * @pmsg_size: The size of pmsg zone which is the same as @kmsg_size. * @read: The general read operation. Both of the function parameters * @size and @offset are relative value to storage. * On success, the number of bytes should be returned, others @@ -33,6 +34,7 @@ struct pstore_zone_info { unsigned long total_size; unsigned long kmsg_size; int max_reason; + unsigned long pmsg_size; pstore_zone_read_op read; pstore_zone_write_op write; pstore_zone_write_op panic_write; -- cgit v1.2.3 From cc9c4d1b5597167f8e8c92f6b61e1cda6d01884d Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:55:00 +0800 Subject: pstore/zone,blk: Add console frontend support Support backend for console. To enable console backend, just make console_size be greater than 0 and a multiple of 4096. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-5-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/Kconfig | 18 ++++++++-- fs/pstore/blk.c | 12 ++++++- fs/pstore/zone.c | 81 ++++++++++++++++++++++++++++++++++++++++++--- include/linux/pstore_zone.h | 4 ++- 4 files changed, 105 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index ef01c48f0ff7..126aa6c3ecf2 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -180,11 +180,11 @@ config PSTORE_BLK_BLKDEV help Which block device should be used for pstore/blk. - It accept the following variants: + It accepts the following variants: 1) device number in hexadecimal representation, with no leading 0x, for example b302. - 2) /dev/ represents the device number of disk - 3) /dev/ represents the device number + 2) /dev/ represents the device name of disk + 3) /dev/ represents the device name and number of partition - device number of disk plus the partition number 4) /dev/p - same as the above, this form is used when disk name of partitioned disk ends with a digit. @@ -236,3 +236,15 @@ config PSTORE_BLK_PMSG_SIZE NOTE that, both Kconfig and module parameters can configure pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_CONSOLE_SIZE + int "Size in Kbytes of console log to store" + depends on PSTORE_BLK + depends on PSTORE_CONSOLE + default 64 + help + This just sets size of console log (console_size) to store via + pstore/blk. The size is in KB and must be a multiple of 4. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index a9e7078f08d6..082f6cdaf4bd 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -35,6 +35,14 @@ static long pmsg_size = -1; module_param(pmsg_size, long, 0400); MODULE_PARM_DESC(pmsg_size, "pmsg size in kbytes"); +#if IS_ENABLED(CONFIG_PSTORE_CONSOLE) +static long console_size = CONFIG_PSTORE_BLK_CONSOLE_SIZE; +#else +static long console_size = -1; +#endif +module_param(console_size, long, 0400); +MODULE_PARM_DESC(console_size, "console size in kbytes"); + /* * blkdev - the block device to use for pstore storage * @@ -91,7 +99,8 @@ struct bdev_info { * whole disk). * On success, the number of bytes should be returned, others * means error. - * @write: The same as @read. + * @write: The same as @read, but the following error number: + * -EBUSY means try to write again later. * @panic_write:The write operation only used for panic case. It's optional * if you do not care panic log. The parameters and return value * are the same as @read. @@ -142,6 +151,7 @@ static int psblk_register_do(struct pstore_device_info *dev) verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG); verify_size(pmsg_size, 4096, dev->flags & PSTORE_FLAGS_PMSG); + verify_size(console_size, 4096, dev->flags & PSTORE_FLAGS_CONSOLE); #undef verify_size pstore_zone_info->total_size = dev->total_size; diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index 2f8e3b349edb..ee0b64da410d 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -91,10 +91,12 @@ struct pstore_zone { * * @kpszs: kmsg dump storage zones * @ppsz: pmsg storage zone + * @cpsz: console storage zone * @kmsg_max_cnt: max count of @kpszs * @kmsg_read_cnt: counter of total read kmsg dumps * @kmsg_write_cnt: counter of total kmsg dump writes * @pmsg_read_cnt: counter of total read pmsg zone + * @console_read_cnt: counter of total read console zone * @oops_counter: counter of oops dumps * @panic_counter: counter of panic dumps * @recovered: whether finished recovering data from storage @@ -106,10 +108,12 @@ struct pstore_zone { struct psz_context { struct pstore_zone **kpszs; struct pstore_zone *ppsz; + struct pstore_zone *cpsz; unsigned int kmsg_max_cnt; unsigned int kmsg_read_cnt; unsigned int kmsg_write_cnt; unsigned int pmsg_read_cnt; + unsigned int console_read_cnt; /* * These counters should be calculated during recovery. * It records the oops/panic times after crashes rather than boots. @@ -129,6 +133,9 @@ struct psz_context { }; static struct psz_context pstore_zone_cxt; +static void psz_flush_all_dirty_zones(struct work_struct *); +static DECLARE_DELAYED_WORK(psz_cleaner, psz_flush_all_dirty_zones); + /** * enum psz_flush_mode - flush mode for psz_zone_write() * @@ -237,6 +244,9 @@ static int psz_zone_write(struct pstore_zone *zone, return 0; dirty: atomic_set(&zone->dirty, true); + /* flush dirty zones nicely */ + if (wcnt == -EBUSY && !is_on_panic()) + schedule_delayed_work(&psz_cleaner, msecs_to_jiffies(500)); return -EBUSY; } @@ -293,6 +303,21 @@ static int psz_move_zone(struct pstore_zone *old, struct pstore_zone *new) return 0; } +static void psz_flush_all_dirty_zones(struct work_struct *work) +{ + struct psz_context *cxt = &pstore_zone_cxt; + int ret = 0; + + if (cxt->ppsz) + ret |= psz_flush_dirty_zone(cxt->ppsz); + if (cxt->cpsz) + ret |= psz_flush_dirty_zone(cxt->cpsz); + if (cxt->kpszs) + ret |= psz_flush_dirty_zones(cxt->kpszs, cxt->kmsg_max_cnt); + if (ret && cxt->pstore_zone_info) + schedule_delayed_work(&psz_cleaner, msecs_to_jiffies(1000)); +} + static int psz_kmsg_recover_data(struct psz_context *cxt) { struct pstore_zone_info *info = cxt->pstore_zone_info; @@ -545,6 +570,10 @@ static inline int psz_recovery(struct psz_context *cxt) goto out; ret = psz_recover_zone(cxt, cxt->ppsz); + if (ret) + goto out; + + ret = psz_recover_zone(cxt, cxt->cpsz); out: if (unlikely(ret)) @@ -562,6 +591,7 @@ static int psz_pstore_open(struct pstore_info *psi) cxt->kmsg_read_cnt = 0; cxt->pmsg_read_cnt = 0; + cxt->console_read_cnt = 0; return 0; } @@ -626,8 +656,9 @@ static int psz_pstore_erase(struct pstore_record *record) return psz_kmsg_erase(cxt, cxt->kpszs[record->id], record); case PSTORE_TYPE_PMSG: return psz_record_erase(cxt, cxt->ppsz); - default: - return -EINVAL; + case PSTORE_TYPE_CONSOLE: + return psz_record_erase(cxt, cxt->cpsz); + default: return -EINVAL; } } @@ -690,9 +721,10 @@ static int notrace psz_kmsg_write(struct psz_context *cxt, return -ENOSPC; ret = psz_kmsg_write_record(cxt, record); - if (!ret) { + if (!ret && is_on_panic()) { + /* ensure all data are flushed to storage when panic */ pr_debug("try to flush other dirty zones\n"); - psz_flush_dirty_zones(cxt->kpszs, cxt->kmsg_max_cnt); + psz_flush_all_dirty_zones(NULL); } /* always return 0 as we had handled it on buffer */ @@ -756,9 +788,18 @@ static int notrace psz_pstore_write(struct pstore_record *record) record->reason == KMSG_DUMP_PANIC) atomic_set(&cxt->on_panic, 1); + /* + * if on panic, do not write except panic records + * Fix case that panic_write prints log which wakes up console backend. + */ + if (is_on_panic() && record->type != PSTORE_TYPE_DMESG) + return -EBUSY; + switch (record->type) { case PSTORE_TYPE_DMESG: return psz_kmsg_write(cxt, record); + case PSTORE_TYPE_CONSOLE: + return psz_record_write(cxt->cpsz, record); case PSTORE_TYPE_PMSG: return psz_record_write(cxt->ppsz, record); default: @@ -783,6 +824,13 @@ static struct pstore_zone *psz_read_next_zone(struct psz_context *cxt) return zone; } + if (cxt->console_read_cnt == 0) { + cxt->console_read_cnt++; + zone = cxt->cpsz; + if (psz_old_ok(zone)) + return zone; + } + return NULL; } @@ -893,6 +941,8 @@ next_zone: readop = psz_kmsg_read; record->id = cxt->kmsg_read_cnt - 1; break; + case PSTORE_TYPE_CONSOLE: + fallthrough; case PSTORE_TYPE_PMSG: readop = psz_record_read; break; @@ -953,6 +1003,8 @@ static void psz_free_all_zones(struct psz_context *cxt) psz_free_zones(&cxt->kpszs, &cxt->kmsg_max_cnt); if (cxt->ppsz) psz_free_zone(&cxt->ppsz); + if (cxt->cpsz) + psz_free_zone(&cxt->cpsz); } static struct pstore_zone *psz_init_zone(enum pstore_type_id type, @@ -1054,6 +1106,15 @@ static int psz_alloc_zones(struct psz_context *cxt) goto free_out; } + off_size += info->console_size; + cxt->cpsz = psz_init_zone(PSTORE_TYPE_CONSOLE, &off, + info->console_size); + if (IS_ERR(cxt->cpsz)) { + err = PTR_ERR(cxt->cpsz); + cxt->cpsz = NULL; + goto free_out; + } + cxt->kpszs = psz_init_zones(PSTORE_TYPE_DMESG, &off, info->total_size - off_size, info->kmsg_size, &cxt->kmsg_max_cnt); @@ -1088,7 +1149,7 @@ int register_pstore_zone(struct pstore_zone_info *info) return -EINVAL; } - if (!info->kmsg_size && !info->pmsg_size) { + if (!info->kmsg_size && !info->pmsg_size && !info->console_size) { pr_warn("at least one record size must be non-zero\n"); return -EINVAL; } @@ -1111,6 +1172,7 @@ int register_pstore_zone(struct pstore_zone_info *info) check_size(total_size, 4096); check_size(kmsg_size, SECTOR_SIZE); check_size(pmsg_size, SECTOR_SIZE); + check_size(console_size, SECTOR_SIZE); #undef check_size @@ -1137,6 +1199,7 @@ int register_pstore_zone(struct pstore_zone_info *info) pr_debug("\ttotal size : %ld Bytes\n", info->total_size); pr_debug("\tkmsg size : %ld Bytes\n", info->kmsg_size); pr_debug("\tpmsg size : %ld Bytes\n", info->pmsg_size); + pr_debug("\tconsole size : %ld Bytes\n", info->console_size); err = psz_alloc_zones(cxt); if (err) { @@ -1170,6 +1233,10 @@ int register_pstore_zone(struct pstore_zone_info *info) cxt->pstore.flags |= PSTORE_FLAGS_PMSG; pr_cont(" pmsg"); } + if (info->console_size) { + cxt->pstore.flags |= PSTORE_FLAGS_CONSOLE; + pr_cont(" console"); + } pr_cont("\n"); err = pstore_register(&cxt->pstore); @@ -1211,6 +1278,10 @@ void unregister_pstore_zone(struct pstore_zone_info *info) /* Stop incoming writes from pstore. */ pstore_unregister(&cxt->pstore); + /* Flush any pending writes. */ + psz_flush_all_dirty_zones(NULL); + flush_delayed_work(&psz_cleaner); + /* Clean up allocations. */ kfree(cxt->pstore.buf); cxt->pstore.buf = NULL; diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h index 29c367a3bd80..904ee67f4ba2 100644 --- a/include/linux/pstore_zone.h +++ b/include/linux/pstore_zone.h @@ -18,11 +18,12 @@ typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t); * it must be multiple of SECTOR_SIZE(512 Bytes). * @max_reason: Maximum kmsg dump reason to store. * @pmsg_size: The size of pmsg zone which is the same as @kmsg_size. + * @console_size:The size of console zone which is the same as @kmsg_size. * @read: The general read operation. Both of the function parameters * @size and @offset are relative value to storage. * On success, the number of bytes should be returned, others * means error. - * @write: The same as @read. + * @write: The same as @read, but -EBUSY means try to write again later. * @panic_write:The write operation only used for panic case. It's optional * if you do not care panic log. The parameters and return value * are the same as @read. @@ -35,6 +36,7 @@ struct pstore_zone_info { unsigned long kmsg_size; int max_reason; unsigned long pmsg_size; + unsigned long console_size; pstore_zone_read_op read; pstore_zone_write_op write; pstore_zone_write_op panic_write; -- cgit v1.2.3 From 34327e9fd213414b35eb70aa512c4e39b2095907 Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:55:01 +0800 Subject: pstore/zone,blk: Add ftrace frontend support Support backend for ftrace. To enable ftrace backend, just make ftrace_size be greater than 0 and a multiple of 4096. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-6-keescook@chromium.org/ Co-developed-by: Colin Ian King Signed-off-by: Colin Ian King Link: https://lore.kernel.org/lkml/20200512170719.221514-1-colin.king@canonical.com Signed-off-by: Kees Cook --- fs/pstore/Kconfig | 12 +++++ fs/pstore/blk.c | 9 ++++ fs/pstore/zone.c | 114 +++++++++++++++++++++++++++++++++++++++++++- include/linux/pstore_zone.h | 2 + 4 files changed, 136 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 126aa6c3ecf2..c2237984b407 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -248,3 +248,15 @@ config PSTORE_BLK_CONSOLE_SIZE NOTE that, both Kconfig and module parameters can configure pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_FTRACE_SIZE + int "Size in Kbytes of ftrace log to store" + depends on PSTORE_BLK + depends on PSTORE_FTRACE + default 64 + help + This just sets size of ftrace log (ftrace_size) for pstore/blk. The + size is in KB and must be a multiple of 4. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index 082f6cdaf4bd..e0d95fb48428 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -43,6 +43,14 @@ static long console_size = -1; module_param(console_size, long, 0400); MODULE_PARM_DESC(console_size, "console size in kbytes"); +#if IS_ENABLED(CONFIG_PSTORE_FTRACE) +static long ftrace_size = CONFIG_PSTORE_BLK_FTRACE_SIZE; +#else +static long ftrace_size = -1; +#endif +module_param(ftrace_size, long, 0400); +MODULE_PARM_DESC(ftrace_size, "ftrace size in kbytes"); + /* * blkdev - the block device to use for pstore storage * @@ -152,6 +160,7 @@ static int psblk_register_do(struct pstore_device_info *dev) verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG); verify_size(pmsg_size, 4096, dev->flags & PSTORE_FLAGS_PMSG); verify_size(console_size, 4096, dev->flags & PSTORE_FLAGS_CONSOLE); + verify_size(ftrace_size, 4096, dev->flags & PSTORE_FLAGS_FTRACE); #undef verify_size pstore_zone_info->total_size = dev->total_size; diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index ee0b64da410d..2d9f452360bb 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -92,11 +92,14 @@ struct pstore_zone { * @kpszs: kmsg dump storage zones * @ppsz: pmsg storage zone * @cpsz: console storage zone + * @fpszs: ftrace storage zones * @kmsg_max_cnt: max count of @kpszs * @kmsg_read_cnt: counter of total read kmsg dumps * @kmsg_write_cnt: counter of total kmsg dump writes * @pmsg_read_cnt: counter of total read pmsg zone * @console_read_cnt: counter of total read console zone + * @ftrace_max_cnt: max count of @fpszs + * @ftrace_read_cnt: counter of max read ftrace zone * @oops_counter: counter of oops dumps * @panic_counter: counter of panic dumps * @recovered: whether finished recovering data from storage @@ -109,11 +112,14 @@ struct psz_context { struct pstore_zone **kpszs; struct pstore_zone *ppsz; struct pstore_zone *cpsz; + struct pstore_zone **fpszs; unsigned int kmsg_max_cnt; unsigned int kmsg_read_cnt; unsigned int kmsg_write_cnt; unsigned int pmsg_read_cnt; unsigned int console_read_cnt; + unsigned int ftrace_max_cnt; + unsigned int ftrace_read_cnt; /* * These counters should be calculated during recovery. * It records the oops/panic times after crashes rather than boots. @@ -314,6 +320,8 @@ static void psz_flush_all_dirty_zones(struct work_struct *work) ret |= psz_flush_dirty_zone(cxt->cpsz); if (cxt->kpszs) ret |= psz_flush_dirty_zones(cxt->kpszs, cxt->kmsg_max_cnt); + if (cxt->fpszs) + ret |= psz_flush_dirty_zones(cxt->fpszs, cxt->ftrace_max_cnt); if (ret && cxt->pstore_zone_info) schedule_delayed_work(&psz_cleaner, msecs_to_jiffies(1000)); } @@ -550,6 +558,31 @@ free_oldbuf: return ret; } +static int psz_recover_zones(struct psz_context *cxt, + struct pstore_zone **zones, unsigned int cnt) +{ + int ret; + unsigned int i; + struct pstore_zone *zone; + + if (!zones) + return 0; + + for (i = 0; i < cnt; i++) { + zone = zones[i]; + if (unlikely(!zone)) + continue; + ret = psz_recover_zone(cxt, zone); + if (ret) + goto recover_fail; + } + + return 0; +recover_fail: + pr_debug("recover %s[%u] failed\n", zone->name, i); + return ret; +} + /** * psz_recovery() - recover data from storage * @cxt: the context of pstore/zone @@ -574,6 +607,10 @@ static inline int psz_recovery(struct psz_context *cxt) goto out; ret = psz_recover_zone(cxt, cxt->cpsz); + if (ret) + goto out; + + ret = psz_recover_zones(cxt, cxt->fpszs, cxt->ftrace_max_cnt); out: if (unlikely(ret)) @@ -592,6 +629,7 @@ static int psz_pstore_open(struct pstore_info *psi) cxt->kmsg_read_cnt = 0; cxt->pmsg_read_cnt = 0; cxt->console_read_cnt = 0; + cxt->ftrace_read_cnt = 0; return 0; } @@ -658,6 +696,10 @@ static int psz_pstore_erase(struct pstore_record *record) return psz_record_erase(cxt, cxt->ppsz); case PSTORE_TYPE_CONSOLE: return psz_record_erase(cxt, cxt->cpsz); + case PSTORE_TYPE_FTRACE: + if (record->id >= cxt->ftrace_max_cnt) + return -EINVAL; + return psz_record_erase(cxt, cxt->fpszs[record->id]); default: return -EINVAL; } } @@ -802,6 +844,13 @@ static int notrace psz_pstore_write(struct pstore_record *record) return psz_record_write(cxt->cpsz, record); case PSTORE_TYPE_PMSG: return psz_record_write(cxt->ppsz, record); + case PSTORE_TYPE_FTRACE: { + int zonenum = smp_processor_id(); + + if (!cxt->fpszs) + return -ENOSPC; + return psz_record_write(cxt->fpszs[zonenum], record); + } default: return -EINVAL; } @@ -817,6 +866,14 @@ static struct pstore_zone *psz_read_next_zone(struct psz_context *cxt) return zone; } + if (cxt->ftrace_read_cnt < cxt->ftrace_max_cnt) + /* + * No need psz_old_ok(). Let psz_ftrace_read() do so for + * combination. psz_ftrace_read() should traverse over + * all zones in case of some zone without data. + */ + return cxt->fpszs[cxt->ftrace_read_cnt++]; + if (cxt->pmsg_read_cnt == 0) { cxt->pmsg_read_cnt++; zone = cxt->ppsz; @@ -891,6 +948,38 @@ static ssize_t psz_kmsg_read(struct pstore_zone *zone, return size + hlen; } +/* try to combine all ftrace zones */ +static ssize_t psz_ftrace_read(struct pstore_zone *zone, + struct pstore_record *record) +{ + struct psz_context *cxt; + struct psz_buffer *buf; + int ret; + + if (!zone || !record) + return -ENOSPC; + + if (!psz_old_ok(zone)) + goto out; + + buf = (struct psz_buffer *)zone->oldbuf; + if (!buf) + return -ENOMSG; + + ret = pstore_ftrace_combine_log(&record->buf, &record->size, + (char *)buf->data, atomic_read(&buf->datalen)); + if (unlikely(ret)) + return ret; + +out: + cxt = record->psi->data; + if (cxt->ftrace_read_cnt < cxt->ftrace_max_cnt) + /* then, read next ftrace zone */ + return -ENOMSG; + record->id = 0; + return record->size ? record->size : -ENOMSG; +} + static ssize_t psz_record_read(struct pstore_zone *zone, struct pstore_record *record) { @@ -941,6 +1030,9 @@ next_zone: readop = psz_kmsg_read; record->id = cxt->kmsg_read_cnt - 1; break; + case PSTORE_TYPE_FTRACE: + readop = psz_ftrace_read; + break; case PSTORE_TYPE_CONSOLE: fallthrough; case PSTORE_TYPE_PMSG: @@ -1005,6 +1097,8 @@ static void psz_free_all_zones(struct psz_context *cxt) psz_free_zone(&cxt->ppsz); if (cxt->cpsz) psz_free_zone(&cxt->cpsz); + if (cxt->fpszs) + psz_free_zones(&cxt->fpszs, &cxt->ftrace_max_cnt); } static struct pstore_zone *psz_init_zone(enum pstore_type_id type, @@ -1115,6 +1209,17 @@ static int psz_alloc_zones(struct psz_context *cxt) goto free_out; } + off_size += info->ftrace_size; + cxt->fpszs = psz_init_zones(PSTORE_TYPE_FTRACE, &off, + info->ftrace_size, + info->ftrace_size / nr_cpu_ids, + &cxt->ftrace_max_cnt); + if (IS_ERR(cxt->fpszs)) { + err = PTR_ERR(cxt->fpszs); + cxt->fpszs = NULL; + goto free_out; + } + cxt->kpszs = psz_init_zones(PSTORE_TYPE_DMESG, &off, info->total_size - off_size, info->kmsg_size, &cxt->kmsg_max_cnt); @@ -1149,7 +1254,8 @@ int register_pstore_zone(struct pstore_zone_info *info) return -EINVAL; } - if (!info->kmsg_size && !info->pmsg_size && !info->console_size) { + if (!info->kmsg_size && !info->pmsg_size && !info->console_size && + !info->ftrace_size) { pr_warn("at least one record size must be non-zero\n"); return -EINVAL; } @@ -1173,6 +1279,7 @@ int register_pstore_zone(struct pstore_zone_info *info) check_size(kmsg_size, SECTOR_SIZE); check_size(pmsg_size, SECTOR_SIZE); check_size(console_size, SECTOR_SIZE); + check_size(ftrace_size, SECTOR_SIZE); #undef check_size @@ -1200,6 +1307,7 @@ int register_pstore_zone(struct pstore_zone_info *info) pr_debug("\tkmsg size : %ld Bytes\n", info->kmsg_size); pr_debug("\tpmsg size : %ld Bytes\n", info->pmsg_size); pr_debug("\tconsole size : %ld Bytes\n", info->console_size); + pr_debug("\tftrace size : %ld Bytes\n", info->ftrace_size); err = psz_alloc_zones(cxt); if (err) { @@ -1237,6 +1345,10 @@ int register_pstore_zone(struct pstore_zone_info *info) cxt->pstore.flags |= PSTORE_FLAGS_CONSOLE; pr_cont(" console"); } + if (info->ftrace_size) { + cxt->pstore.flags |= PSTORE_FLAGS_FTRACE; + pr_cont(" ftrace"); + } pr_cont("\n"); err = pstore_register(&cxt->pstore); diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h index 904ee67f4ba2..6f16b0dd834a 100644 --- a/include/linux/pstore_zone.h +++ b/include/linux/pstore_zone.h @@ -19,6 +19,7 @@ typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t); * @max_reason: Maximum kmsg dump reason to store. * @pmsg_size: The size of pmsg zone which is the same as @kmsg_size. * @console_size:The size of console zone which is the same as @kmsg_size. + * @ftrace_size:The size of ftrace zone which is the same as @kmsg_size. * @read: The general read operation. Both of the function parameters * @size and @offset are relative value to storage. * On success, the number of bytes should be returned, others @@ -37,6 +38,7 @@ struct pstore_zone_info { int max_reason; unsigned long pmsg_size; unsigned long console_size; + unsigned long ftrace_size; pstore_zone_read_op read; pstore_zone_write_op write; pstore_zone_write_op panic_write; -- cgit v1.2.3 From 335426c6dcdd338d6b7c939c2da15fc9c5dd4959 Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:55:03 +0800 Subject: pstore/zone: Provide way to skip "broken" zone for MTD devices One requirement to support MTD devices in pstore/zone is having a way to declare certain regions as broken. Add this support to pstore/zone. The MTD driver should return -ENOMSG when encountering a bad region, which tells pstore/zone to skip and try the next one. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-8-keescook@chromium.org/ Co-developed-by: Colin Ian King Signed-off-by: Colin Ian King Link: //lore.kernel.org/lkml/20200512173801.222666-1-colin.king@canonical.com Signed-off-by: Kees Cook --- fs/pstore/blk.c | 10 +++++-- fs/pstore/zone.c | 65 ++++++++++++++++++++++++++++++++++++--------- include/linux/pstore_blk.h | 3 ++- include/linux/pstore_zone.h | 12 ++++++--- 4 files changed, 71 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index e0d95fb48428..67343d6aa27a 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -109,9 +109,12 @@ struct bdev_info { * means error. * @write: The same as @read, but the following error number: * -EBUSY means try to write again later. + * -ENOMSG means to try next zone. * @panic_write:The write operation only used for panic case. It's optional - * if you do not care panic log. The parameters and return value - * are the same as @read. + * if you do not care panic log. The parameters are relative + * value to storage. + * On success, the number of bytes should be returned, others + * excluding -ENOMSG mean error. -ENOMSG means to try next zone. */ struct pstore_device_info { unsigned long total_size; @@ -337,6 +340,9 @@ static ssize_t psblk_blk_panic_write(const char *buf, size_t size, /* size and off must align to SECTOR_SIZE for block device */ ret = blkdev_panic_write(buf, off >> SECTOR_SHIFT, size >> SECTOR_SHIFT); + /* try next zone */ + if (ret == -ENOMSG) + return ret; return ret ? -EIO : size; } diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index 2d9f452360bb..add26b125984 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -249,6 +249,9 @@ static int psz_zone_write(struct pstore_zone *zone, return 0; dirty: + /* no need to mark dirty if going to try next zone */ + if (wcnt == -ENOMSG) + return -ENOMSG; atomic_set(&zone->dirty, true); /* flush dirty zones nicely */ if (wcnt == -EBUSY && !is_on_panic()) @@ -391,7 +394,11 @@ static int psz_kmsg_recover_meta(struct psz_context *cxt) return -EINVAL; rcnt = info->read((char *)buf, len, zone->off); - if (rcnt != len) { + if (rcnt == -ENOMSG) { + pr_debug("%s with id %lu may be broken, skip\n", + zone->name, i); + continue; + } else if (rcnt != len) { pr_err("read %s with id %lu failed\n", zone->name, i); return (int)rcnt < 0 ? (int)rcnt : -EIO; } @@ -725,24 +732,58 @@ static void psz_write_kmsg_hdr(struct pstore_zone *zone, hdr->counter = 0; } +/* + * In case zone is broken, which may occur to MTD device, we try each zones, + * start at cxt->kmsg_write_cnt. + */ static inline int notrace psz_kmsg_write_record(struct psz_context *cxt, struct pstore_record *record) { size_t size, hlen; struct pstore_zone *zone; - unsigned int zonenum; + unsigned int i; - zonenum = cxt->kmsg_write_cnt; - zone = cxt->kpszs[zonenum]; - if (unlikely(!zone)) - return -ENOSPC; - cxt->kmsg_write_cnt = (zonenum + 1) % cxt->kmsg_max_cnt; + for (i = 0; i < cxt->kmsg_max_cnt; i++) { + unsigned int zonenum, len; + int ret; - pr_debug("write %s to zone id %d\n", zone->name, zonenum); - psz_write_kmsg_hdr(zone, record); - hlen = sizeof(struct psz_kmsg_header); - size = min_t(size_t, record->size, zone->buffer_size - hlen); - return psz_zone_write(zone, FLUSH_ALL, record->buf, size, hlen); + zonenum = (cxt->kmsg_write_cnt + i) % cxt->kmsg_max_cnt; + zone = cxt->kpszs[zonenum]; + if (unlikely(!zone)) + return -ENOSPC; + + /* avoid destroying old data, allocate a new one */ + len = zone->buffer_size + sizeof(*zone->buffer); + zone->oldbuf = zone->buffer; + zone->buffer = kzalloc(len, GFP_KERNEL); + if (!zone->buffer) { + zone->buffer = zone->oldbuf; + return -ENOMEM; + } + zone->buffer->sig = zone->oldbuf->sig; + + pr_debug("write %s to zone id %d\n", zone->name, zonenum); + psz_write_kmsg_hdr(zone, record); + hlen = sizeof(struct psz_kmsg_header); + size = min_t(size_t, record->size, zone->buffer_size - hlen); + ret = psz_zone_write(zone, FLUSH_ALL, record->buf, size, hlen); + if (likely(!ret || ret != -ENOMSG)) { + cxt->kmsg_write_cnt = zonenum + 1; + cxt->kmsg_write_cnt %= cxt->kmsg_max_cnt; + /* no need to try next zone, free last zone buffer */ + kfree(zone->oldbuf); + zone->oldbuf = NULL; + return ret; + } + + pr_debug("zone %u may be broken, try next dmesg zone\n", + zonenum); + kfree(zone->buffer); + zone->buffer = zone->oldbuf; + zone->oldbuf = NULL; + } + + return -EBUSY; } static int notrace psz_kmsg_write(struct psz_context *cxt, diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h index 4501977b1336..ccba8c068752 100644 --- a/include/linux/pstore_blk.h +++ b/include/linux/pstore_blk.h @@ -14,7 +14,8 @@ * @start_sect: start sector to block device * @sects: sectors count on buf * - * Return: On success, zero should be returned. Others mean error. + * Return: On success, zero should be returned. Others excluding -ENOMSG + * mean error. -ENOMSG means to try next zone. * * Panic write to block device must be aligned to SECTOR_SIZE. */ diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h index 6f16b0dd834a..e79a18e41064 100644 --- a/include/linux/pstore_zone.h +++ b/include/linux/pstore_zone.h @@ -23,11 +23,15 @@ typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t); * @read: The general read operation. Both of the function parameters * @size and @offset are relative value to storage. * On success, the number of bytes should be returned, others - * means error. - * @write: The same as @read, but -EBUSY means try to write again later. + * mean error. + * @write: The same as @read, but the following error number: + * -EBUSY means try to write again later. + * -ENOMSG means to try next zone. * @panic_write:The write operation only used for panic case. It's optional - * if you do not care panic log. The parameters and return value - * are the same as @read. + * if you do not care panic log. The parameters are relative + * value to storage. + * On success, the number of bytes should be returned, others + * excluding -ENOMSG mean error. -ENOMSG means to try next zone. */ struct pstore_zone_info { struct module *owner; -- cgit v1.2.3 From 1525fb3bb6d69028b3941d34363397c28345ffab Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:55:04 +0800 Subject: pstore/blk: Provide way to query pstore configuration In order to configure itself, the MTD backend needs to be able to query the current pstore configuration. Introduce pstore_blk_get_config() for this purpose. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-9-keescook@chromium.org/ Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- fs/pstore/blk.c | 37 ++++++++++++++++++++++++++++++------- include/linux/pstore_blk.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index 67343d6aa27a..631bc27c8661 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -94,6 +94,17 @@ struct bdev_info { sector_t start_sect; }; +#define check_size(name, alignsize) ({ \ + long _##name_ = (name); \ + _##name_ = _##name_ <= 0 ? 0 : (_##name_ * 1024); \ + if (_##name_ & ((alignsize) - 1)) { \ + pr_info(#name " must align to %d\n", \ + (alignsize)); \ + _##name_ = ALIGN(name, (alignsize)); \ + } \ + _##name_; \ +}) + /** * struct pstore_device_info - back-end pstore/blk driver structure. * @@ -149,13 +160,11 @@ static int psblk_register_do(struct pstore_device_info *dev) dev->flags = UINT_MAX; #define verify_size(name, alignsize, enabled) { \ - long _##name_ = (enabled) ? (name) : 0; \ - _##name_ = _##name_ <= 0 ? 0 : (_##name_ * 1024); \ - if (_##name_ & ((alignsize) - 1)) { \ - pr_info(#name " must align to %d\n", \ - (alignsize)); \ - _##name_ = ALIGN(name, (alignsize)); \ - } \ + long _##name_; \ + if (enabled) \ + _##name_ = check_size(name, alignsize); \ + else \ + _##name_ = 0; \ name = _##name_ / 1024; \ pstore_zone_info->name = _##name_; \ } @@ -453,6 +462,20 @@ void unregister_pstore_blk(unsigned int major) } EXPORT_SYMBOL_GPL(unregister_pstore_blk); +/* get information of pstore/blk */ +int pstore_blk_get_config(struct pstore_blk_config *info) +{ + strncpy(info->device, blkdev, 80); + info->max_reason = max_reason; + info->kmsg_size = check_size(kmsg_size, 4096); + info->pmsg_size = check_size(pmsg_size, 4096); + info->ftrace_size = check_size(ftrace_size, 4096); + info->console_size = check_size(console_size, 4096); + + return 0; +} +EXPORT_SYMBOL_GPL(pstore_blk_get_config); + static void __exit pstore_blk_exit(void) { mutex_lock(&pstore_blk_lock); diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h index ccba8c068752..0c40774e71e0 100644 --- a/include/linux/pstore_blk.h +++ b/include/linux/pstore_blk.h @@ -49,4 +49,32 @@ struct pstore_blk_info { int register_pstore_blk(struct pstore_blk_info *info); void unregister_pstore_blk(unsigned int major); +/** + * struct pstore_blk_config - the pstore_blk backend configuration + * + * @device: Name of the desired block device + * @max_reason: Maximum kmsg dump reason to store to block device + * @kmsg_size: Total size of for kmsg dumps + * @pmsg_size: Total size of the pmsg storage area + * @console_size: Total size of the console storage area + * @ftrace_size: Total size for ftrace logging data (for all CPUs) + */ +struct pstore_blk_config { + char device[80]; + enum kmsg_dump_reason max_reason; + unsigned long kmsg_size; + unsigned long pmsg_size; + unsigned long console_size; + unsigned long ftrace_size; +}; + +/** + * pstore_blk_get_config - get a copy of the pstore_blk backend configuration + * + * @info: The sturct pstore_blk_config to be filled in + * + * Failure returns negative error code, and success returns 0. + */ +int pstore_blk_get_config(struct pstore_blk_config *info); + #endif -- cgit v1.2.3 From 7dcb7848ba110ff192efc917d1a6de66b4c9ca4f Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:55:05 +0800 Subject: pstore/blk: Support non-block storage devices Add support for non-block devices (e.g. MTD). A non-block driver calls pstore_blk_register_device() to register iself. In addition, pstore/zone is updated to handle non-block devices, where an erase must be done before a write. Without this, there is no way to remove records stored to an MTD. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-10-keescook@chromium.org/ Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- Documentation/admin-guide/pstore-blk.rst | 17 ++++-- fs/pstore/blk.c | 93 +++++++++++++++++--------------- fs/pstore/zone.c | 8 ++- include/linux/pstore_blk.h | 38 +++++++++++++ include/linux/pstore_zone.h | 6 +++ 5 files changed, 114 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/Documentation/admin-guide/pstore-blk.rst b/Documentation/admin-guide/pstore-blk.rst index bef8c7436721..d45341e55e82 100644 --- a/Documentation/admin-guide/pstore-blk.rst +++ b/Documentation/admin-guide/pstore-blk.rst @@ -7,8 +7,8 @@ Introduction ------------ pstore block (pstore/blk) is an oops/panic logger that writes its logs to a -block device before the system crashes. You can get these log files by -mounting pstore filesystem like:: +block device and non-block device before the system crashes. You can get +these log files by mounting pstore filesystem like:: mount -t pstore pstore /sys/fs/pstore @@ -24,8 +24,8 @@ Configurations for user determine how pstore/blk works, such as pmsg_size, kmsg_size and so on. All of them support both Kconfig and module parameters, but module parameters have priority over Kconfig. -Configurations for driver are all about block device, such as total_size -of block device and read/write operations. +Configurations for driver are all about block device and non-block device, +such as total_size of block device and read/write operations. Configurations for user ----------------------- @@ -152,6 +152,15 @@ driver uses ``register_pstore_blk`` to register to pstore/blk. .. kernel-doc:: fs/pstore/blk.c :identifiers: register_pstore_blk +A non-block device driver uses ``register_pstore_device`` with +``struct pstore_device_info`` to register to pstore/blk. + +.. kernel-doc:: fs/pstore/blk.c + :identifiers: register_pstore_device + +.. kernel-doc:: include/linux/pstore_blk.h + :identifiers: pstore_device_info + Compression and header ---------------------- diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index 631bc27c8661..881b40ed8142 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -105,55 +105,22 @@ struct bdev_info { _##name_; \ }) -/** - * struct pstore_device_info - back-end pstore/blk driver structure. - * - * @total_size: The total size in bytes pstore/blk can use. It must be greater - * than 4096 and be multiple of 4096. - * @flags: Refer to macro starting with PSTORE_FLAGS defined in - * linux/pstore.h. It means what front-ends this device support. - * Zero means all backends for compatible. - * @read: The general read operation. Both of the function parameters - * @size and @offset are relative value to bock device (not the - * whole disk). - * On success, the number of bytes should be returned, others - * means error. - * @write: The same as @read, but the following error number: - * -EBUSY means try to write again later. - * -ENOMSG means to try next zone. - * @panic_write:The write operation only used for panic case. It's optional - * if you do not care panic log. The parameters are relative - * value to storage. - * On success, the number of bytes should be returned, others - * excluding -ENOMSG mean error. -ENOMSG means to try next zone. - */ -struct pstore_device_info { - unsigned long total_size; - unsigned int flags; - pstore_zone_read_op read; - pstore_zone_write_op write; - pstore_zone_write_op panic_write; -}; - -static int psblk_register_do(struct pstore_device_info *dev) +static int __register_pstore_device(struct pstore_device_info *dev) { int ret; + lockdep_assert_held(&pstore_blk_lock); + if (!dev || !dev->total_size || !dev->read || !dev->write) return -EINVAL; - mutex_lock(&pstore_blk_lock); - /* someone already registered before */ - if (pstore_zone_info) { - mutex_unlock(&pstore_blk_lock); + if (pstore_zone_info) return -EBUSY; - } + pstore_zone_info = kzalloc(sizeof(struct pstore_zone_info), GFP_KERNEL); - if (!pstore_zone_info) { - mutex_unlock(&pstore_blk_lock); + if (!pstore_zone_info) return -ENOMEM; - } /* zero means not limit on which backends to attempt to store. */ if (!dev->flags) @@ -179,6 +146,7 @@ static int psblk_register_do(struct pstore_device_info *dev) pstore_zone_info->max_reason = max_reason; pstore_zone_info->read = dev->read; pstore_zone_info->write = dev->write; + pstore_zone_info->erase = dev->erase; pstore_zone_info->panic_write = dev->panic_write; pstore_zone_info->name = KBUILD_MODNAME; pstore_zone_info->owner = THIS_MODULE; @@ -188,20 +156,51 @@ static int psblk_register_do(struct pstore_device_info *dev) kfree(pstore_zone_info); pstore_zone_info = NULL; } + return ret; +} +/** + * register_pstore_device() - register non-block device to pstore/blk + * + * @dev: non-block device information + * + * Return: + * * 0 - OK + * * Others - something error. + */ +int register_pstore_device(struct pstore_device_info *dev) +{ + int ret; + + mutex_lock(&pstore_blk_lock); + ret = __register_pstore_device(dev); mutex_unlock(&pstore_blk_lock); + return ret; } +EXPORT_SYMBOL_GPL(register_pstore_device); -static void psblk_unregister_do(struct pstore_device_info *dev) +static void __unregister_pstore_device(struct pstore_device_info *dev) { - mutex_lock(&pstore_blk_lock); + lockdep_assert_held(&pstore_blk_lock); if (pstore_zone_info && pstore_zone_info->read == dev->read) { unregister_pstore_zone(pstore_zone_info); kfree(pstore_zone_info); pstore_zone_info = NULL; } +} + +/** + * unregister_pstore_device() - unregister non-block device from pstore/blk + * + * @dev: non-block device information + */ +void unregister_pstore_device(struct pstore_device_info *dev) +{ + mutex_lock(&pstore_blk_lock); + __unregister_pstore_device(dev); mutex_unlock(&pstore_blk_lock); } +EXPORT_SYMBOL_GPL(unregister_pstore_device); /** * psblk_get_bdev() - open block device @@ -396,9 +395,10 @@ static int __register_pstore_blk(struct pstore_blk_info *info) dev.flags = info->flags; dev.read = psblk_generic_blk_read; dev.write = psblk_generic_blk_write; + dev.erase = NULL; dev.panic_write = info->panic_write ? psblk_blk_panic_write : NULL; - ret = psblk_register_do(&dev); + ret = __register_pstore_device(&dev); if (ret) goto err_put_bdev; @@ -442,7 +442,7 @@ static void __unregister_pstore_blk(unsigned int major) lockdep_assert_held(&pstore_blk_lock); if (psblk_bdev && MAJOR(psblk_bdev->bd_dev) == major) { - psblk_unregister_do(&dev); + __unregister_pstore_device(&dev); psblk_put_bdev(psblk_bdev, holder); blkdev_panic_write = NULL; psblk_bdev = NULL; @@ -481,6 +481,13 @@ static void __exit pstore_blk_exit(void) mutex_lock(&pstore_blk_lock); if (psblk_bdev) __unregister_pstore_blk(MAJOR(psblk_bdev->bd_dev)); + else { + struct pstore_device_info dev = { }; + + if (pstore_zone_info) + dev.read = pstore_zone_info->read; + __unregister_pstore_device(&dev); + } mutex_unlock(&pstore_blk_lock); } module_exit(pstore_blk_exit); diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index add26b125984..819428dfa32f 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -660,15 +660,21 @@ static inline int psz_kmsg_erase(struct psz_context *cxt, struct psz_buffer *buffer = zone->buffer; struct psz_kmsg_header *hdr = (struct psz_kmsg_header *)buffer->data; + size_t size; if (unlikely(!psz_ok(zone))) return 0; + /* this zone is already updated, no need to erase */ if (record->count != hdr->counter) return 0; + size = buffer_datalen(zone) + sizeof(*zone->buffer); atomic_set(&zone->buffer->datalen, 0); - return psz_zone_write(zone, FLUSH_META, NULL, 0, 0); + if (cxt->pstore_zone_info->erase) + return cxt->pstore_zone_info->erase(size, zone->off); + else + return psz_zone_write(zone, FLUSH_META, NULL, 0, 0); } static inline int psz_record_erase(struct psz_context *cxt, diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h index 0c40774e71e0..61e914522b01 100644 --- a/include/linux/pstore_blk.h +++ b/include/linux/pstore_blk.h @@ -49,6 +49,44 @@ struct pstore_blk_info { int register_pstore_blk(struct pstore_blk_info *info); void unregister_pstore_blk(unsigned int major); +/** + * struct pstore_device_info - back-end pstore/blk driver structure. + * + * @total_size: The total size in bytes pstore/blk can use. It must be greater + * than 4096 and be multiple of 4096. + * @flags: Refer to macro starting with PSTORE_FLAGS defined in + * linux/pstore.h. It means what front-ends this device support. + * Zero means all backends for compatible. + * @read: The general read operation. Both of the function parameters + * @size and @offset are relative value to bock device (not the + * whole disk). + * On success, the number of bytes should be returned, others + * means error. + * @write: The same as @read, but the following error number: + * -EBUSY means try to write again later. + * -ENOMSG means to try next zone. + * @erase: The general erase operation for device with special removing + * job. Both of the function parameters @size and @offset are + * relative value to storage. + * Return 0 on success and others on failure. + * @panic_write:The write operation only used for panic case. It's optional + * if you do not care panic log. The parameters are relative + * value to storage. + * On success, the number of bytes should be returned, others + * excluding -ENOMSG mean error. -ENOMSG means to try next zone. + */ +struct pstore_device_info { + unsigned long total_size; + unsigned int flags; + pstore_zone_read_op read; + pstore_zone_write_op write; + pstore_zone_erase_op erase; + pstore_zone_write_op panic_write; +}; + +int register_pstore_device(struct pstore_device_info *dev); +void unregister_pstore_device(struct pstore_device_info *dev); + /** * struct pstore_blk_config - the pstore_blk backend configuration * diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h index e79a18e41064..1e35eaa33e5e 100644 --- a/include/linux/pstore_zone.h +++ b/include/linux/pstore_zone.h @@ -7,6 +7,7 @@ typedef ssize_t (*pstore_zone_read_op)(char *, size_t, loff_t); typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t); +typedef ssize_t (*pstore_zone_erase_op)(size_t, loff_t); /** * struct pstore_zone_info - pstore/zone back-end driver structure * @@ -27,6 +28,10 @@ typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t); * @write: The same as @read, but the following error number: * -EBUSY means try to write again later. * -ENOMSG means to try next zone. + * @erase: The general erase operation for device with special removing + * job. Both of the function parameters @size and @offset are + * relative value to storage. + * Return 0 on success and others on failure. * @panic_write:The write operation only used for panic case. It's optional * if you do not care panic log. The parameters are relative * value to storage. @@ -45,6 +50,7 @@ struct pstore_zone_info { unsigned long ftrace_size; pstore_zone_read_op read; pstore_zone_write_op write; + pstore_zone_erase_op erase; pstore_zone_write_op panic_write; }; -- cgit v1.2.3