diff options
-rw-r--r-- | Documentation/admin-guide/mm/memory-hotplug.rst | 12 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 120 |
2 files changed, 113 insertions, 19 deletions
diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst index bd77841041af..2994958c7ce8 100644 --- a/Documentation/admin-guide/mm/memory-hotplug.rst +++ b/Documentation/admin-guide/mm/memory-hotplug.rst @@ -433,6 +433,18 @@ The following module parameters are currently defined: memory in a way that huge pages in bigger granularity cannot be formed on hotplugged memory. + + With value "force" it could result in memory + wastage due to memmap size limitations. For + example, if the memmap for a memory block + requires 1 MiB, but the pageblock size is 2 + MiB, 1 MiB of hotplugged memory will be wasted. + Note that there are still cases where the + feature cannot be enforced: for example, if the + memmap is smaller than a single page, or if the + architecture does not support the forced mode + in all configurations. + ``online_policy`` read-write: Set the basic policy used for automatic zone selection when onlining memory blocks without specifying a target zone. diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 746cb7c08c64..76b813991bdc 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -41,17 +41,83 @@ #include "internal.h" #include "shuffle.h" +enum { + MEMMAP_ON_MEMORY_DISABLE = 0, + MEMMAP_ON_MEMORY_ENABLE, + MEMMAP_ON_MEMORY_FORCE, +}; + +static int memmap_mode __read_mostly = MEMMAP_ON_MEMORY_DISABLE; + +static inline unsigned long memory_block_memmap_size(void) +{ + return PHYS_PFN(memory_block_size_bytes()) * sizeof(struct page); +} + +static inline unsigned long memory_block_memmap_on_memory_pages(void) +{ + unsigned long nr_pages = PFN_UP(memory_block_memmap_size()); + + /* + * In "forced" memmap_on_memory mode, we add extra pages to align the + * vmemmap size to cover full pageblocks. That way, we can add memory + * even if the vmemmap size is not properly aligned, however, we might waste + * memory. + */ + if (memmap_mode == MEMMAP_ON_MEMORY_FORCE) + return pageblock_align(nr_pages); + return nr_pages; +} + #ifdef CONFIG_MHP_MEMMAP_ON_MEMORY /* * memory_hotplug.memmap_on_memory parameter */ -static bool memmap_on_memory __ro_after_init; -module_param(memmap_on_memory, bool, 0444); -MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug"); +static int set_memmap_mode(const char *val, const struct kernel_param *kp) +{ + int ret, mode; + bool enabled; + + if (sysfs_streq(val, "force") || sysfs_streq(val, "FORCE")) { + mode = MEMMAP_ON_MEMORY_FORCE; + } else { + ret = kstrtobool(val, &enabled); + if (ret < 0) + return ret; + if (enabled) + mode = MEMMAP_ON_MEMORY_ENABLE; + else + mode = MEMMAP_ON_MEMORY_DISABLE; + } + *((int *)kp->arg) = mode; + if (mode == MEMMAP_ON_MEMORY_FORCE) { + unsigned long memmap_pages = memory_block_memmap_on_memory_pages(); + + pr_info_once("Memory hotplug will waste %ld pages in each memory block\n", + memmap_pages - PFN_UP(memory_block_memmap_size())); + } + return 0; +} + +static int get_memmap_mode(char *buffer, const struct kernel_param *kp) +{ + if (*((int *)kp->arg) == MEMMAP_ON_MEMORY_FORCE) + return sprintf(buffer, "force\n"); + return param_get_bool(buffer, kp); +} + +static const struct kernel_param_ops memmap_mode_ops = { + .set = set_memmap_mode, + .get = get_memmap_mode, +}; +module_param_cb(memmap_on_memory, &memmap_mode_ops, &memmap_mode, 0444); +MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug\n" + "With value \"force\" it could result in memory wastage due " + "to memmap size limitations (Y/N/force)"); static inline bool mhp_memmap_on_memory(void) { - return memmap_on_memory; + return memmap_mode != MEMMAP_ON_MEMORY_DISABLE; } #else static inline bool mhp_memmap_on_memory(void) @@ -1247,11 +1313,6 @@ static int online_memory_block(struct memory_block *mem, void *arg) return device_online(&mem->dev); } -static inline unsigned long memory_block_memmap_size(void) -{ - return PHYS_PFN(memory_block_size_bytes()) * sizeof(struct page); -} - #ifndef arch_supports_memmap_on_memory static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size) { @@ -1267,7 +1328,7 @@ static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size) static bool mhp_supports_memmap_on_memory(unsigned long size) { unsigned long vmemmap_size = memory_block_memmap_size(); - unsigned long remaining_size = size - vmemmap_size; + unsigned long memmap_pages = memory_block_memmap_on_memory_pages(); /* * Besides having arch support and the feature enabled at runtime, we @@ -1295,10 +1356,28 @@ static bool mhp_supports_memmap_on_memory(unsigned long size) * altmap as an alternative source of memory, and we do not exactly * populate a single PMD. */ - return mhp_memmap_on_memory() && - size == memory_block_size_bytes() && - IS_ALIGNED(remaining_size, (pageblock_nr_pages << PAGE_SHIFT)) && - arch_supports_memmap_on_memory(vmemmap_size); + if (!mhp_memmap_on_memory() || size != memory_block_size_bytes()) + return false; + + /* + * Make sure the vmemmap allocation is fully contained + * so that we always allocate vmemmap memory from altmap area. + */ + if (!IS_ALIGNED(vmemmap_size, PAGE_SIZE)) + return false; + + /* + * start pfn should be pageblock_nr_pages aligned for correctly + * setting migrate types + */ + if (!pageblock_aligned(memmap_pages)) + return false; + + if (memmap_pages == PHYS_PFN(memory_block_size_bytes())) + /* No effective hotplugged memory doesn't make sense. */ + return false; + + return arch_supports_memmap_on_memory(vmemmap_size); } /* @@ -1311,7 +1390,10 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) { struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) }; enum memblock_flags memblock_flags = MEMBLOCK_NONE; - struct vmem_altmap mhp_altmap = {}; + struct vmem_altmap mhp_altmap = { + .base_pfn = PHYS_PFN(res->start), + .end_pfn = PHYS_PFN(res->end), + }; struct memory_group *group = NULL; u64 start, size; bool new_node = false; @@ -1356,8 +1438,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) */ if (mhp_flags & MHP_MEMMAP_ON_MEMORY) { if (mhp_supports_memmap_on_memory(size)) { - mhp_altmap.free = PHYS_PFN(size); - mhp_altmap.base_pfn = PHYS_PFN(start); + mhp_altmap.free = memory_block_memmap_on_memory_pages(); params.altmap = &mhp_altmap; } /* fallback to not using altmap */ @@ -1369,8 +1450,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) goto error; /* create memory block devices after memory was added */ - ret = create_memory_block_devices(start, size, mhp_altmap.alloc, - group); + ret = create_memory_block_devices(start, size, mhp_altmap.free, group); if (ret) { arch_remove_memory(start, size, NULL); goto error; @@ -2096,6 +2176,8 @@ static int __ref try_remove_memory(u64 start, u64 size) * right thing if we used vmem_altmap when hot-adding * the range. */ + mhp_altmap.base_pfn = PHYS_PFN(start); + mhp_altmap.free = nr_vmemmap_pages; mhp_altmap.alloc = nr_vmemmap_pages; altmap = &mhp_altmap; } |