summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2018-04-05 16:23:09 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-05 21:36:25 -0700
commita5c6d6509342785bef53bf9508e1842b303f1878 (patch)
treec741ae2db50b36dae555549172246676f9323f5f
parent31286a8484a85e8b4e91ddb0f5415aee8a416827 (diff)
downloadlwn-a5c6d6509342785bef53bf9508e1842b303f1878.tar.gz
lwn-a5c6d6509342785bef53bf9508e1842b303f1878.zip
mm, page_alloc: extend kernelcore and movablecore for percent
Both kernelcore= and movablecore= can be used to define the amount of ZONE_NORMAL and ZONE_MOVABLE on a system, respectively. This requires the system memory capacity to be known when specifying the command line, however. This introduces the ability to define both kernelcore= and movablecore= as a percentage of total system memory. This is convenient for systems software that wants to define the amount of ZONE_MOVABLE, for example, as a proportion of a system's memory rather than a hardcoded byte value. To define the percentage, the final character of the parameter should be a '%'. mhocko: "why is anyone using these options nowadays?" rientjes: : : Fragmentation of non-__GFP_MOVABLE pages due to low on memory : situations can pollute most pageblocks on the system, as much as 1GB of : slab being fragmented over 128GB of memory, for example. When the : amount of kernel memory is well bounded for certain systems, it is : better to aggressively reclaim from existing MIGRATE_UNMOVABLE : pageblocks rather than eagerly fallback to others. : : We have additional patches that help with this fragmentation if you're : interested, specifically kcompactd compaction of MIGRATE_UNMOVABLE : pageblocks triggered by fallback of non-__GFP_MOVABLE allocations and : draining of pcp lists back to the zone free area to prevent stranding. [rientjes@google.com: updates] Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1802131700160.71590@chino.kir.corp.google.com Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1802121622470.179479@chino.kir.corp.google.com Signed-off-by: David Rientjes <rientjes@google.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Mel Gorman <mgorman@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt54
-rw-r--r--mm/page_alloc.c43
2 files changed, 62 insertions, 35 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 5ffe4c4121bd..7993021a1f6e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1840,30 +1840,29 @@
keepinitrd [HW,ARM]
kernelcore= [KNL,X86,IA-64,PPC]
- Format: nn[KMGTPE] | "mirror"
- This parameter
- specifies the amount of memory usable by the kernel
- for non-movable allocations. The requested amount is
- spread evenly throughout all nodes in the system. The
- remaining memory in each node is used for Movable
- pages. In the event, a node is too small to have both
- kernelcore and Movable pages, kernelcore pages will
- take priority and other nodes will have a larger number
- of Movable pages. The Movable zone is used for the
- allocation of pages that may be reclaimed or moved
- by the page migration subsystem. This means that
- HugeTLB pages may not be allocated from this zone.
- Note that allocations like PTEs-from-HighMem still
- use the HighMem zone if it exists, and the Normal
+ Format: nn[KMGTPE] | nn% | "mirror"
+ This parameter specifies the amount of memory usable by
+ the kernel for non-movable allocations. The requested
+ amount is spread evenly throughout all nodes in the
+ system as ZONE_NORMAL. The remaining memory is used for
+ movable memory in its own zone, ZONE_MOVABLE. In the
+ event, a node is too small to have both ZONE_NORMAL and
+ ZONE_MOVABLE, kernelcore memory will take priority and
+ other nodes will have a larger ZONE_MOVABLE.
+
+ ZONE_MOVABLE is used for the allocation of pages that
+ may be reclaimed or moved by the page migration
+ subsystem. Note that allocations like PTEs-from-HighMem
+ still use the HighMem zone if it exists, and the Normal
zone if it does not.
- Instead of specifying the amount of memory (nn[KMGTPE]),
- you can specify "mirror" option. In case "mirror"
+ It is possible to specify the exact amount of memory in
+ the form of "nn[KMGTPE]", a percentage of total system
+ memory in the form of "nn%", or "mirror". If "mirror"
option is specified, mirrored (reliable) memory is used
for non-movable allocations and remaining memory is used
- for Movable pages. nn[KMGTPE] and "mirror" are exclusive,
- so you can NOT specify nn[KMGTPE] and "mirror" at the same
- time.
+ for Movable pages. "nn[KMGTPE]", "nn%", and "mirror"
+ are exclusive, so you cannot specify multiple forms.
kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port.
Format: <Controller#>[,poll interval]
@@ -2377,13 +2376,14 @@
mousedev.yres= [MOUSE] Vertical screen resolution, used for devices
reporting absolute coordinates, such as tablets
- movablecore=nn[KMG] [KNL,X86,IA-64,PPC] This parameter
- is similar to kernelcore except it specifies the
- amount of memory used for migratable allocations.
- If both kernelcore and movablecore is specified,
- then kernelcore will be at *least* the specified
- value but may be more. If movablecore on its own
- is specified, the administrator must be careful
+ movablecore= [KNL,X86,IA-64,PPC]
+ Format: nn[KMGTPE] | nn%
+ This parameter is the complement to kernelcore=, it
+ specifies the amount of memory used for migratable
+ allocations. If both kernelcore and movablecore is
+ specified, then kernelcore will be at *least* the
+ specified value but may be more. If movablecore on its
+ own is specified, the administrator must be careful
that the amount of memory usable for all allocations
is not too small.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a3e2ba4f76bb..766ffb5fa94b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -273,7 +273,9 @@ static unsigned long __meminitdata dma_reserve;
static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
static unsigned long __initdata required_kernelcore;
+static unsigned long required_kernelcore_percent __initdata;
static unsigned long __initdata required_movablecore;
+static unsigned long required_movablecore_percent __initdata;
static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
static bool mirrored_kernelcore;
@@ -6571,7 +6573,18 @@ static void __init find_zone_movable_pfns_for_nodes(void)
}
/*
- * If movablecore=nn[KMG] was specified, calculate what size of
+ * If kernelcore=nn% or movablecore=nn% was specified, calculate the
+ * amount of necessary memory.
+ */
+ if (required_kernelcore_percent)
+ required_kernelcore = (totalpages * 100 * required_kernelcore_percent) /
+ 10000UL;
+ if (required_movablecore_percent)
+ required_movablecore = (totalpages * 100 * required_movablecore_percent) /
+ 10000UL;
+
+ /*
+ * If movablecore= was specified, calculate what size of
* kernelcore that corresponds so that memory usable for
* any allocation type is evenly spread. If both kernelcore
* and movablecore are specified, then the value of kernelcore
@@ -6811,18 +6824,30 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
zero_resv_unavail();
}
-static int __init cmdline_parse_core(char *p, unsigned long *core)
+static int __init cmdline_parse_core(char *p, unsigned long *core,
+ unsigned long *percent)
{
unsigned long long coremem;
+ char *endptr;
+
if (!p)
return -EINVAL;
- coremem = memparse(p, &p);
- *core = coremem >> PAGE_SHIFT;
+ /* Value may be a percentage of total memory, otherwise bytes */
+ coremem = simple_strtoull(p, &endptr, 0);
+ if (*endptr == '%') {
+ /* Paranoid check for percent values greater than 100 */
+ WARN_ON(coremem > 100);
- /* Paranoid check that UL is enough for the coremem value */
- WARN_ON((coremem >> PAGE_SHIFT) > ULONG_MAX);
+ *percent = coremem;
+ } else {
+ coremem = memparse(p, &p);
+ /* Paranoid check that UL is enough for the coremem value */
+ WARN_ON((coremem >> PAGE_SHIFT) > ULONG_MAX);
+ *core = coremem >> PAGE_SHIFT;
+ *percent = 0UL;
+ }
return 0;
}
@@ -6838,7 +6863,8 @@ static int __init cmdline_parse_kernelcore(char *p)
return 0;
}
- return cmdline_parse_core(p, &required_kernelcore);
+ return cmdline_parse_core(p, &required_kernelcore,
+ &required_kernelcore_percent);
}
/*
@@ -6847,7 +6873,8 @@ static int __init cmdline_parse_kernelcore(char *p)
*/
static int __init cmdline_parse_movablecore(char *p)
{
- return cmdline_parse_core(p, &required_movablecore);
+ return cmdline_parse_core(p, &required_movablecore,
+ &required_movablecore_percent);
}
early_param("kernelcore", cmdline_parse_kernelcore);