diff options
| -rw-r--r-- | Documentation/admin-guide/kernel-parameters.txt | 16 | ||||
| -rw-r--r-- | arch/arm64/kernel/machine_kexec_file.c | 40 | ||||
| -rw-r--r-- | arch/arm64/mm/init.c | 5 | ||||
| -rw-r--r-- | arch/loongarch/kernel/machine_kexec_file.c | 40 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/kexec_ranges.h | 1 | ||||
| -rw-r--r-- | arch/powerpc/kexec/crash.c | 5 | ||||
| -rw-r--r-- | arch/powerpc/kexec/ranges.c | 101 | ||||
| -rw-r--r-- | arch/riscv/kernel/machine_kexec_file.c | 39 | ||||
| -rw-r--r-- | arch/riscv/mm/init.c | 5 | ||||
| -rw-r--r-- | arch/x86/kernel/crash.c | 89 | ||||
| -rw-r--r-- | drivers/of/fdt.c | 9 | ||||
| -rw-r--r-- | drivers/of/kexec.c | 9 | ||||
| -rw-r--r-- | include/linux/crash_core.h | 9 | ||||
| -rw-r--r-- | include/linux/crash_reserve.h | 4 | ||||
| -rw-r--r-- | include/linux/kexec.h | 2 | ||||
| -rw-r--r-- | kernel/crash_core.c | 89 | ||||
| -rw-r--r-- | kernel/kexec_file.c | 27 |
17 files changed, 208 insertions, 282 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index eff079f648b7..731d39b7a18e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1089,14 +1089,14 @@ Kernel parameters It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. crashkernel=size[KMG],cma - [KNL, X86, ppc] Reserve additional crash kernel memory from - CMA. This reservation is usable by the first system's - userspace memory and kernel movable allocations (memory - balloon, zswap). Pages allocated from this memory range - will not be included in the vmcore so this should not - be used if dumping of userspace memory is intended and - it has to be expected that some movable kernel pages - may be missing from the dump. + [KNL, X86, ARM64, RISCV, PPC] Reserve additional crash + kernel memory from CMA. This reservation is usable by + the first system's userspace memory and kernel movable + allocations (memory balloon, zswap). Pages allocated + from this memory range will not be included in the vmcore + so this should not be used if dumping of userspace memory + is intended and it has to be expected that some movable + kernel pages may be missing from the dump. A standard crashkernel reservation, as described above, is still needed to hold the crash kernel and initrd. diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index e31fabed378a..854d872dfd0f 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -40,46 +40,30 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) } #ifdef CONFIG_CRASH_DUMP -static int prepare_elf_headers(void **addr, unsigned long *sz) +unsigned int arch_get_system_nr_ranges(void) { - struct crash_mem *cmem; - unsigned int nr_ranges; - int ret; - u64 i; + unsigned int nr_ranges = 2 + crashk_cma_cnt; /* for exclusion of crashkernel region */ phys_addr_t start, end; + u64 i; - nr_ranges = 2; /* for exclusion of crashkernel region */ for_each_mem_range(i, &start, &end) nr_ranges++; - cmem = kmalloc_flex(*cmem, ranges, nr_ranges); - if (!cmem) - return -ENOMEM; + return nr_ranges; +} + +int arch_crash_populate_cmem(struct crash_mem *cmem) +{ + phys_addr_t start, end; + u64 i; - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; for_each_mem_range(i, &start, &end) { cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->nr_ranges++; } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret) - goto out; - - if (crashk_low_res.end) { - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); - if (ret) - goto out; - } - - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; + return 0; } #endif @@ -109,7 +93,7 @@ int load_other_segments(struct kimage *image, void *headers; unsigned long headers_sz; if (image->type == KEXEC_TYPE_CRASH) { - ret = prepare_elf_headers(&headers, &headers_sz); + ret = crash_prepare_headers(true, &headers, &headers_sz, NULL); if (ret) { pr_err("Preparing elf core header failed\n"); goto out_err; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 97987f850a33..227f58522dad 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -96,8 +96,8 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit; static void __init arch_reserve_crashkernel(void) { + unsigned long long crash_base, crash_size, cma_size = 0; unsigned long long low_size = 0; - unsigned long long crash_base, crash_size; bool high = false; int ret; @@ -106,11 +106,12 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, NULL, &high); + &low_size, &cma_size, &high); if (ret) return; reserve_crashkernel_generic(crash_size, crash_base, low_size, high); + reserve_crashkernel_cma(cma_size); } static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c index 5584b798ba46..5412aa9f3568 100644 --- a/arch/loongarch/kernel/machine_kexec_file.c +++ b/arch/loongarch/kernel/machine_kexec_file.c @@ -56,46 +56,30 @@ static void cmdline_add_initrd(struct kimage *image, unsigned long *cmdline_tmpl } #ifdef CONFIG_CRASH_DUMP - -static int prepare_elf_headers(void **addr, unsigned long *sz) +unsigned int arch_get_system_nr_ranges(void) { - int ret, nr_ranges; - uint64_t i; + int nr_ranges = 2; /* for exclusion of crashkernel region */ phys_addr_t start, end; - struct crash_mem *cmem; + uint64_t i; - nr_ranges = 2; /* for exclusion of crashkernel region */ for_each_mem_range(i, &start, &end) nr_ranges++; - cmem = kmalloc_flex(*cmem, ranges, nr_ranges); - if (!cmem) - return -ENOMEM; + return nr_ranges; +} + +int arch_crash_populate_cmem(struct crash_mem *cmem) +{ + phys_addr_t start, end; + uint64_t i; - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; for_each_mem_range(i, &start, &end) { cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->nr_ranges++; } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret < 0) - goto out; - - if (crashk_low_res.end) { - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); - if (ret < 0) - goto out; - } - - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; + return 0; } /* @@ -163,7 +147,7 @@ int load_other_segments(struct kimage *image, void *headers; unsigned long headers_sz; - ret = prepare_elf_headers(&headers, &headers_sz); + ret = crash_prepare_headers(true, &headers, &headers_sz, NULL); if (ret < 0) { pr_err("Preparing elf core header failed\n"); goto out_err; diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h index 14055896cbcb..8489e844b447 100644 --- a/arch/powerpc/include/asm/kexec_ranges.h +++ b/arch/powerpc/include/asm/kexec_ranges.h @@ -7,7 +7,6 @@ void sort_memory_ranges(struct crash_mem *mrngs, bool merge); struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges); int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); -int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); int get_exclude_memory_ranges(struct crash_mem **mem_ranges); int get_reserved_memory_ranges(struct crash_mem **mem_ranges); int get_crash_memory_ranges(struct crash_mem **mem_ranges); diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index e6539f213b3d..60a917a6beaa 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -493,7 +493,7 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify * struct crash_mem *cmem = NULL; struct kexec_segment *ksegment; void *ptr, *mem, *elfbuf = NULL; - unsigned long elfsz, memsz, base_addr, size; + unsigned long elfsz, memsz, base_addr, size, end; ksegment = &image->segment[image->elfcorehdr_index]; mem = (void *) ksegment->mem; @@ -512,7 +512,8 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify * if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) { base_addr = PFN_PHYS(mn->start_pfn); size = mn->nr_pages * PAGE_SIZE; - ret = remove_mem_range(&cmem, base_addr, size); + end = base_addr + size - 1; + ret = arch_crash_exclude_mem_range(&cmem, base_addr, end); if (ret) { pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n"); goto out; diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c index 867135560e5c..e5fea23b191b 100644 --- a/arch/powerpc/kexec/ranges.c +++ b/arch/powerpc/kexec/ranges.c @@ -553,9 +553,9 @@ out: #endif /* CONFIG_KEXEC_FILE */ #ifdef CONFIG_CRASH_DUMP -static int crash_exclude_mem_range_guarded(struct crash_mem **mem_ranges, - unsigned long long mstart, - unsigned long long mend) +int arch_crash_exclude_mem_range(struct crash_mem **mem_ranges, + unsigned long long mstart, + unsigned long long mend) { struct crash_mem *tmem = *mem_ranges; @@ -604,18 +604,10 @@ int get_crash_memory_ranges(struct crash_mem **mem_ranges) sort_memory_ranges(*mem_ranges, true); } - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range_guarded(mem_ranges, crashk_res.start, crashk_res.end); + ret = crash_exclude_core_ranges(mem_ranges); if (ret) goto out; - for (i = 0; i < crashk_cma_cnt; ++i) { - ret = crash_exclude_mem_range_guarded(mem_ranges, crashk_cma_ranges[i].start, - crashk_cma_ranges[i].end); - if (ret) - goto out; - } - /* * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL * regions are exported to save their context at the time of @@ -641,89 +633,4 @@ out: pr_err("Failed to setup crash memory ranges\n"); return ret; } - -/** - * remove_mem_range - Removes the given memory range from the range list. - * @mem_ranges: Range list to remove the memory range to. - * @base: Base address of the range to remove. - * @size: Size of the memory range to remove. - * - * (Re)allocates memory, if needed. - * - * Returns 0 on success, negative errno on error. - */ -int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) -{ - u64 end; - int ret = 0; - unsigned int i; - u64 mstart, mend; - struct crash_mem *mem_rngs = *mem_ranges; - - if (!size) - return 0; - - /* - * Memory range are stored as start and end address, use - * the same format to do remove operation. - */ - end = base + size - 1; - - for (i = 0; i < mem_rngs->nr_ranges; i++) { - mstart = mem_rngs->ranges[i].start; - mend = mem_rngs->ranges[i].end; - - /* - * Memory range to remove is not part of this range entry - * in the memory range list - */ - if (!(base >= mstart && end <= mend)) - continue; - - /* - * Memory range to remove is equivalent to this entry in the - * memory range list. Remove the range entry from the list. - */ - if (base == mstart && end == mend) { - for (; i < mem_rngs->nr_ranges - 1; i++) { - mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start; - mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end; - } - mem_rngs->nr_ranges--; - goto out; - } - /* - * Start address of the memory range to remove and the - * current memory range entry in the list is same. Just - * move the start address of the current memory range - * entry in the list to end + 1. - */ - else if (base == mstart) { - mem_rngs->ranges[i].start = end + 1; - goto out; - } - /* - * End address of the memory range to remove and the - * current memory range entry in the list is same. - * Just move the end address of the current memory - * range entry in the list to base - 1. - */ - else if (end == mend) { - mem_rngs->ranges[i].end = base - 1; - goto out; - } - /* - * Memory range to remove is not at the edge of current - * memory range entry. Split the current memory entry into - * two half. - */ - else { - size = mem_rngs->ranges[i].end - end + 1; - mem_rngs->ranges[i].end = base - 1; - ret = add_mem_range(mem_ranges, end + 1, size); - } - } -out: - return ret; -} #endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index 59d4bbc848a8..26cd2a8bd0cd 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -45,6 +45,15 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg) return 0; } +unsigned int arch_get_system_nr_ranges(void) +{ + unsigned int nr_ranges = 2 + crashk_cma_cnt; /* For exclusion of crashkernel region */ + + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + + return nr_ranges; +} + static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) { struct crash_mem *cmem = arg; @@ -56,33 +65,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) return 0; } -static int prepare_elf_headers(void **addr, unsigned long *sz) +int arch_crash_populate_cmem(struct crash_mem *cmem) { - struct crash_mem *cmem; - unsigned int nr_ranges; - int ret; - - nr_ranges = 1; /* For exclusion of crashkernel region */ - walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); - - cmem = kmalloc_flex(*cmem, ranges, nr_ranges); - if (!cmem) - return -ENOMEM; - - cmem->max_nr_ranges = nr_ranges; - cmem->nr_ranges = 0; - ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); - if (ret) - goto out; - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (!ret) - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); - -out: - kfree(cmem); - return ret; + return walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); } static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, @@ -274,7 +259,7 @@ int load_extra_segments(struct kimage *image, unsigned long kernel_start, if (image->type == KEXEC_TYPE_CRASH) { void *headers; unsigned long headers_sz; - ret = prepare_elf_headers(&headers, &headers_sz); + ret = crash_prepare_headers(true, &headers, &headers_sz, NULL); if (ret) { pr_err("Preparing elf core header failed\n"); goto out; diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 800cb5c007d1..e80b8b03d93e 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1320,7 +1320,7 @@ static inline void setup_vm_final(void) */ static void __init arch_reserve_crashkernel(void) { - unsigned long long low_size = 0; + unsigned long long low_size = 0, cma_size = 0; unsigned long long crash_base, crash_size; bool high = false; int ret; @@ -1330,11 +1330,12 @@ static void __init arch_reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base, - &low_size, NULL, &high); + &low_size, &cma_size, &high); if (ret) return; reserve_crashkernel_generic(crash_size, crash_base, low_size, high); + reserve_crashkernel_cma(cma_size); } void __init paging_init(void) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 623d4474631a..e681ec9cf1dc 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -155,16 +155,8 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg) return 0; } -/* Gather all the required information to prepare elf headers for ram regions */ -static struct crash_mem *fill_up_crash_elf_data(void) +unsigned int arch_get_system_nr_ranges(void) { - unsigned int nr_ranges = 0; - struct crash_mem *cmem; - - walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); - if (!nr_ranges) - return NULL; - /* * Exclusion of crash region, crashk_low_res and/or crashk_cma_ranges * may cause range splits. So add extra slots here. @@ -179,49 +171,16 @@ static struct crash_mem *fill_up_crash_elf_data(void) * But in order to lest the low 1M could be changed in the future, * (e.g. [start, 1M]), add a extra slot. */ - nr_ranges += 3 + crashk_cma_cnt; - cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); - if (!cmem) - return NULL; - - cmem->max_nr_ranges = nr_ranges; + unsigned int nr_ranges = 3 + crashk_cma_cnt; - return cmem; + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + return nr_ranges; } -/* - * Look for any unwanted ranges between mstart, mend and remove them. This - * might lead to split and split ranges are put in cmem->ranges[] array - */ -static int elf_header_exclude_ranges(struct crash_mem *cmem) +int arch_crash_exclude_ranges(struct crash_mem *cmem) { - int ret = 0; - int i; - /* Exclude the low 1M because it is always reserved */ - ret = crash_exclude_mem_range(cmem, 0, SZ_1M - 1); - if (ret) - return ret; - - /* Exclude crashkernel region */ - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); - if (ret) - return ret; - - if (crashk_low_res.end) - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, - crashk_low_res.end); - if (ret) - return ret; - - for (i = 0; i < crashk_cma_cnt; ++i) { - ret = crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, - crashk_cma_ranges[i].end); - if (ret) - return ret; - } - - return 0; + return crash_exclude_mem_range(cmem, 0, SZ_1M - 1); } static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) @@ -235,35 +194,9 @@ static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) return 0; } -/* Prepare elf headers. Return addr and size */ -static int prepare_elf_headers(void **addr, unsigned long *sz, - unsigned long *nr_mem_ranges) +int arch_crash_populate_cmem(struct crash_mem *cmem) { - struct crash_mem *cmem; - int ret; - - cmem = fill_up_crash_elf_data(); - if (!cmem) - return -ENOMEM; - - ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); - if (ret) - goto out; - - /* Exclude unwanted mem ranges */ - ret = elf_header_exclude_ranges(cmem); - if (ret) - goto out; - - /* Return the computed number of memory ranges, for hotplug usage */ - *nr_mem_ranges = cmem->nr_ranges; - - /* By default prepare 64bit headers */ - ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz); - -out: - vfree(cmem); - return ret; + return walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); } #endif @@ -421,7 +354,8 @@ int crash_load_segments(struct kimage *image) .buf_max = ULONG_MAX, .top_down = false }; /* Prepare elf headers and add a segment */ - ret = prepare_elf_headers(&kbuf.buffer, &kbuf.bufsz, &pnum); + ret = crash_prepare_headers(IS_ENABLED(CONFIG_X86_64), &kbuf.buffer, + &kbuf.bufsz, &pnum); if (ret) return ret; @@ -514,7 +448,6 @@ unsigned int arch_crash_get_elfcorehdr_size(void) void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { void *elfbuf = NULL, *old_elfcorehdr; - unsigned long nr_mem_ranges; unsigned long mem, memsz; unsigned long elfsz = 0; @@ -532,7 +465,7 @@ void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) * Create the new elfcorehdr reflecting the changes to CPU and/or * memory resources. */ - if (prepare_elf_headers(&elfbuf, &elfsz, &nr_mem_ranges)) { + if (crash_prepare_headers(IS_ENABLED(CONFIG_X86_64), &elfbuf, &elfsz, NULL)) { pr_err("unable to create new elfcorehdr"); goto out; } diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 26f66046cc32..a64afc3ded3d 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -878,11 +878,12 @@ static unsigned long chosen_node_offset = -FDT_ERR_NOTFOUND; /* * The main usage of linux,usable-memory-range is for crash dump kernel. * Originally, the number of usable-memory regions is one. Now there may - * be two regions, low region and high region. - * To make compatibility with existing user-space and older kdump, the low - * region is always the last range of linux,usable-memory-range if exist. + * be 2 + CRASHK_CMA_RANGES_MAX regions, low region, high region and cma + * regions. To make compatibility with existing user-space and older kdump, + * the high and low region are always the first two ranges of + * linux,usable-memory-range if exist. */ -#define MAX_USABLE_RANGES 2 +#define MAX_USABLE_RANGES (2 + CRASHK_CMA_RANGES_MAX) /** * early_init_dt_check_for_usable_mem_range - Decode usable memory range diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index b6837e299e7f..029903b986cb 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -458,6 +458,15 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, if (ret) goto out; } + + for (int i = 0; i < crashk_cma_cnt; i++) { + ret = fdt_appendprop_addrrange(fdt, 0, chosen_node, + "linux,usable-memory-range", + crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end - crashk_cma_ranges[i].start + 1); + if (ret) + goto out; + } #endif } diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index c1dee3f971a9..bc087124cd78 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -59,6 +59,9 @@ extern int crash_exclude_mem_range(struct crash_mem *mem, unsigned long long mend); extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz); +extern int crash_prepare_headers(int need_kernel_map, void **addr, + unsigned long *sz, unsigned long *nr_mem_ranges); +extern int crash_exclude_core_ranges(struct crash_mem **cmem); struct kimage; struct kexec_segment; @@ -76,6 +79,12 @@ int kexec_should_crash(struct task_struct *p); int kexec_crash_loaded(void); void crash_save_cpu(struct pt_regs *regs, int cpu); extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); +extern unsigned int arch_get_system_nr_ranges(void); +extern int arch_crash_populate_cmem(struct crash_mem *cmem); +extern int arch_crash_exclude_ranges(struct crash_mem *cmem); +extern int arch_crash_exclude_mem_range(struct crash_mem **mem, + unsigned long long mstart, + unsigned long long mend); #else /* !CONFIG_CRASH_DUMP*/ struct pt_regs; diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index f0dc03d94ca2..30864d90d7f5 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -14,9 +14,11 @@ extern struct resource crashk_res; extern struct resource crashk_low_res; extern struct range crashk_cma_ranges[]; + +#define CRASHK_CMA_RANGES_MAX 4 #if defined(CONFIG_CMA) && defined(CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION) #define CRASHKERNEL_CMA -#define CRASHKERNEL_CMA_RANGES_MAX 4 +#define CRASHKERNEL_CMA_RANGES_MAX (CRASHK_CMA_RANGES_MAX) extern int crashk_cma_cnt; #else #define crashk_cma_cnt 0 diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 8a22bc9b8c6c..0af8ae4fdd08 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -13,7 +13,7 @@ #define IND_SOURCE (1 << IND_SOURCE_BIT) #define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE) -#if !defined(__ASSEMBLY__) +#if !defined(__ASSEMBLER__) #include <linux/vmcore_info.h> #include <linux/crash_reserve.h> diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 4f21fc3b108b..2b36aa9fade0 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -168,9 +168,6 @@ static inline resource_size_t crash_resource_size(const struct resource *res) return !res->end ? 0 : resource_size(res); } - - - int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, void **addr, unsigned long *sz) { @@ -272,6 +269,92 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, return 0; } +static struct crash_mem *alloc_cmem(unsigned int nr_ranges) +{ + struct crash_mem *cmem; + + cmem = kvzalloc_flex(*cmem, ranges, nr_ranges); + if (!cmem) + return NULL; + + cmem->max_nr_ranges = nr_ranges; + return cmem; +} + +unsigned int __weak arch_get_system_nr_ranges(void) { return 0; } +int __weak arch_crash_populate_cmem(struct crash_mem *cmem) { return -1; } +int __weak arch_crash_exclude_ranges(struct crash_mem *cmem) { return 0; } + +int __weak arch_crash_exclude_mem_range(struct crash_mem **mem, + unsigned long long mstart, + unsigned long long mend) +{ + return crash_exclude_mem_range(*mem, mstart, mend); +} + +int crash_exclude_core_ranges(struct crash_mem **cmem) +{ + int ret, i; + + /* Exclude crashkernel region */ + ret = arch_crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (ret) + return ret; + + if (crashk_low_res.end) { + ret = arch_crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + if (ret) + return ret; + } + + for (i = 0; i < crashk_cma_cnt; ++i) { + ret = arch_crash_exclude_mem_range(cmem, crashk_cma_ranges[i].start, + crashk_cma_ranges[i].end); + if (ret) + return ret; + } + + return 0; +} + +int crash_prepare_headers(int need_kernel_map, void **addr, unsigned long *sz, + unsigned long *nr_mem_ranges) +{ + unsigned int max_nr_ranges; + struct crash_mem *cmem; + int ret; + + max_nr_ranges = arch_get_system_nr_ranges(); + if (!max_nr_ranges) + return -ENOMEM; + + cmem = alloc_cmem(max_nr_ranges); + if (!cmem) + return -ENOMEM; + + ret = arch_crash_populate_cmem(cmem); + if (ret) + goto out; + + ret = crash_exclude_core_ranges(&cmem); + if (ret) + goto out; + + ret = arch_crash_exclude_ranges(cmem); + if (ret) + goto out; + + /* Return the computed number of memory ranges, for hotplug usage */ + if (nr_mem_ranges) + *nr_mem_ranges = cmem->nr_ranges; + + ret = crash_prepare_elf64_headers(cmem, need_kernel_map, addr, sz); + +out: + kvfree(cmem); + return ret; +} + /** * crash_exclude_mem_range - exclude a mem range for existing ranges * @mem: mem->range contains an array of ranges sorted in ascending order diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 2bfbb2d144e6..59fb9d71e9d8 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -27,6 +27,7 @@ #include <linux/syscalls.h> #include <linux/vmalloc.h> #include <linux/dma-map-ops.h> +#include <linux/kexec_handover.h> #include "kexec_internal.h" #ifdef CONFIG_KEXEC_SIG @@ -798,6 +799,16 @@ int kexec_add_buffer(struct kexec_buf *kbuf) return 0; } +static bool kexec_only_cma_segments(struct kimage *image) +{ + for (int i = 0; i < image->nr_segments; i++) { + if (!image->segment_cma[i]) + return false; + } + + return true; +} + /* Calculate and store the digest of segments */ static int kexec_calculate_store_digests(struct kimage *image) { @@ -822,6 +833,21 @@ static int kexec_calculate_store_digests(struct kimage *image) sha256_init(&sctx); + /* + * If KHO is enabled, the destinations are located in KHO scratch. + * KHO scratch can only contain early boot allocations and movable + * allocations. That means there is no risk of memory corruption by + * uncancelled DMA. + * + * If all segments were loaded into contiguous memory, there will be no + * relocations at all, so also no risk of corruption. + */ + if (image->type != KEXEC_TYPE_CRASH && + (kho_is_enabled() || kexec_only_cma_segments(image))) { + pr_debug("disabling checksum verification in purgatory\n"); + goto skip_checksum; + } + for (j = i = 0; i < image->nr_segments; i++) { struct kexec_segment *ksegment; @@ -867,6 +893,7 @@ static int kexec_calculate_store_digests(struct kimage *image) j++; } +skip_checksum: sha256_final(&sctx, digest); ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions", |
