diff options
author | Frank van der Linden <fvdl@google.com> | 2025-02-28 18:29:11 +0000 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2025-03-16 22:06:27 -0700 |
commit | d65917c42373f70159a3fc453f8f028fd665e04f (patch) | |
tree | cf9918411a58f1e645c9e8f65be33e6fa3ae13b0 /mm/sparse.c | |
parent | 243a75e236802614075511266c8d1834d4bcffe9 (diff) | |
download | lwn-d65917c42373f70159a3fc453f8f028fd665e04f.tar.gz lwn-d65917c42373f70159a3fc453f8f028fd665e04f.zip |
mm/sparse: allow for alternate vmemmap section init at boot
Add functions that are called just before the per-section memmap is
initialized and just before the memmap page structures are initialized.
They are called sparse_vmemmap_init_nid_early and
sparse_vmemmap_init_nid_late, respectively.
This allows for mm subsystems to add calls to initialize memmap and page
structures in a specific way, if using SPARSEMEM_VMEMMAP. Specifically,
hugetlb can pre-HVO bootmem allocated pages that way, so that no time and
resources are wasted on allocating vmemmap pages, only to free them later
(and possibly unnecessarily running the system out of memory in the
process).
Refactor some code and export a few convenience functions for external
use.
In sparse_init_nid, skip any sections that are already initialized, e.g.
they have been initialized by sparse_vmemmap_init_nid_early already.
The hugetlb code to use these functions will be added in a later commit.
Export section_map_size, as any alternate memmap init code will want to
use it.
The internal config option to enable this is SPARSEMEM_VMEMMAP_PREINIT,
which is selected if an architecture-specific option,
ARCH_WANT_HUGETLB_VMEMMAP_PREINIT, is set. In the future, if other
subsystems want to do preinit too, they can do it in a similar fashion.
The internal config option is there because a section flag is used, and
the number of flags available is architecture-dependent (see mmzone.h).
Architecures can decide if there is room for the flag when enabling
options that select SPARSEMEM_VMEMMAP_PREINIT.
Fortunately, as of right now, all sparse vmemmap using architectures do
have room.
Link: https://lkml.kernel.org/r/20250228182928.2645936-11-fvdl@google.com
Signed-off-by: Frank van der Linden <fvdl@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roman Gushchin (Cruise) <roman.gushchin@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/sparse.c')
-rw-r--r-- | mm/sparse.c | 87 |
1 files changed, 66 insertions, 21 deletions
diff --git a/mm/sparse.c b/mm/sparse.c index 133b033d0cba..ee0234a77c7f 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -408,13 +408,13 @@ static void __init check_usemap_section_nr(int nid, #endif /* CONFIG_MEMORY_HOTREMOVE */ #ifdef CONFIG_SPARSEMEM_VMEMMAP -static unsigned long __init section_map_size(void) +unsigned long __init section_map_size(void) { return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE); } #else -static unsigned long __init section_map_size(void) +unsigned long __init section_map_size(void) { return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION); } @@ -495,6 +495,44 @@ void __weak __meminit vmemmap_populate_print_last(void) { } +static void *sparse_usagebuf __meminitdata; +static void *sparse_usagebuf_end __meminitdata; + +/* + * Helper function that is used for generic section initialization, and + * can also be used by any hooks added above. + */ +void __init sparse_init_early_section(int nid, struct page *map, + unsigned long pnum, unsigned long flags) +{ + BUG_ON(!sparse_usagebuf || sparse_usagebuf >= sparse_usagebuf_end); + check_usemap_section_nr(nid, sparse_usagebuf); + sparse_init_one_section(__nr_to_section(pnum), pnum, map, + sparse_usagebuf, SECTION_IS_EARLY | flags); + sparse_usagebuf = (void *)sparse_usagebuf + mem_section_usage_size(); +} + +static int __init sparse_usage_init(int nid, unsigned long map_count) +{ + unsigned long size; + + size = mem_section_usage_size() * map_count; + sparse_usagebuf = sparse_early_usemaps_alloc_pgdat_section( + NODE_DATA(nid), size); + if (!sparse_usagebuf) { + sparse_usagebuf_end = NULL; + return -ENOMEM; + } + + sparse_usagebuf_end = sparse_usagebuf + size; + return 0; +} + +static void __init sparse_usage_fini(void) +{ + sparse_usagebuf = sparse_usagebuf_end = NULL; +} + /* * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end) * And number of present sections in this node is map_count. @@ -503,47 +541,54 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, unsigned long pnum_end, unsigned long map_count) { - struct mem_section_usage *usage; unsigned long pnum; struct page *map; + struct mem_section *ms; - usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), - mem_section_usage_size() * map_count); - if (!usage) { + if (sparse_usage_init(nid, map_count)) { pr_err("%s: node[%d] usemap allocation failed", __func__, nid); goto failed; } + sparse_buffer_init(map_count * section_map_size(), nid); + + sparse_vmemmap_init_nid_early(nid); + for_each_present_section_nr(pnum_begin, pnum) { unsigned long pfn = section_nr_to_pfn(pnum); if (pnum >= pnum_end) break; - map = __populate_section_memmap(pfn, PAGES_PER_SECTION, - nid, NULL, NULL); - if (!map) { - pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.", - __func__, nid); - pnum_begin = pnum; - sparse_buffer_fini(); - goto failed; + ms = __nr_to_section(pnum); + if (!preinited_vmemmap_section(ms)) { + map = __populate_section_memmap(pfn, PAGES_PER_SECTION, + nid, NULL, NULL); + if (!map) { + pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.", + __func__, nid); + pnum_begin = pnum; + sparse_usage_fini(); + sparse_buffer_fini(); + goto failed; + } + sparse_init_early_section(nid, map, pnum, 0); } - check_usemap_section_nr(nid, usage); - sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage, - SECTION_IS_EARLY); - usage = (void *) usage + mem_section_usage_size(); } + sparse_usage_fini(); sparse_buffer_fini(); return; failed: - /* We failed to allocate, mark all the following pnums as not present */ + /* + * We failed to allocate, mark all the following pnums as not present, + * except the ones already initialized earlier. + */ for_each_present_section_nr(pnum_begin, pnum) { - struct mem_section *ms; - if (pnum >= pnum_end) break; ms = __nr_to_section(pnum); + if (!preinited_vmemmap_section(ms)) + ms->section_mem_map = 0; ms->section_mem_map = 0; } } |