mm: introduce kv[mz]alloc helpers

Patch series "kvmalloc", v5. There are many open coded kmalloc with vmalloc fallback instances in the tree. Most of them are not careful enough or simply do not care about the underlying semantic of the kmalloc/page allocator which means that a) some vmalloc fallbacks are basically unreachable because the kmalloc part will keep retrying until it succeeds b) the page allocator can invoke a really disruptive steps like the OOM killer to move forward which doesn't sound appropriate when we consider that the vmalloc fallback is available. As it can be seen implementing kvmalloc requires quite an intimate knowledge if the page allocator and the memory reclaim internals which strongly suggests that a helper should be implemented in the memory subsystem proper. Most callers, I could find, have been converted to use the helper instead. This is patch 6. There are some more relying on __GFP_REPEAT in the networking stack which I have converted as well and Eric Dumazet was not opposed [2] to convert them as well. [1] http://lkml.kernel.org/r/20170130094940.13546-1-mhocko@kernel.org [2] http://lkml.kernel.org/r/1485273626.16328.301.camel@edumazet-glaptop3.roam.corp.google.com This patch (of 9): Using kmalloc with the vmalloc fallback for larger allocations is a common pattern in the kernel code. Yet we do not have any common helper for that and so users have invented their own helpers. Some of them are really creative when doing so. Let's just add kv[mz]alloc and make sure it is implemented properly. This implementation makes sure to not make a large memory pressure for > PAGE_SZE requests (__GFP_NORETRY) and also to not warn about allocation failures. This also rules out the OOM killer as the vmalloc is a more approapriate fallback than a disruptive user visible action. This patch also changes some existing users and removes helpers which are specific for them. In some cases this is not possible (e.g. ext4_kvmalloc, libcfs_kvzalloc) because those seems to be broken and require GFP_NO{FS,IO} context which is not vmalloc compatible in general (note that the page table allocation is GFP_KERNEL). Those need to be fixed separately. While we are at it, document that __vmalloc{_node} about unsupported gfp mask because there seems to be a lot of confusion out there. kvmalloc_node will warn about GFP_KERNEL incompatible (which are not superset) flags to catch new abusers. Existing ones would have to die slowly. [sfr@canb.auug.org.au: f2fs fixup] Link: http://lkml.kernel.org/r/20170320163735.332e64b7@canb.auug.org.au Link: http://lkml.kernel.org/r/20170306103032.2540-2-mhocko@kernel.org Signed-off-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Reviewed-by: Andreas Dilger <adilger@dilger.ca> [ext4 part] Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: John Hubbard <jhubbard@nvidia.com> Cc: David Miller <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Michal Hocko <mhocko@suse.com> 2017-05-08 15:57:09 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-05-08 17:15:12 -0700
commit: a7c3e901a46ff54c016d040847eda598a9e3e653 (patch)
tree: d149d70d420ff19586daa827db47a2e26a5598fe /fs
parent: 60f3e00d25b44e3aa51846590d1e10f408466a83 (diff)
download: lwn-a7c3e901a46ff54c016d040847eda598a9e3e653.tar.gz
lwn-a7c3e901a46ff54c016d040847eda598a9e3e653.zip
7 files changed, 16 insertions, 50 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 354dc1a894c2..b60698c104fd 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2393,7 +2393,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
 		return 0;
 
 	size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
-	new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
+	new_groupinfo = kvzalloc(size, GFP_KERNEL);
 	if (!new_groupinfo) {
 		ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
 		return -ENOMEM;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a9c72e39a4ee..b2c74644d5de 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2153,7 +2153,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
 		return 0;
 
 	size = roundup_pow_of_two(size * sizeof(struct flex_groups));
-	new_groups = ext4_kvzalloc(size, GFP_KERNEL);
+	new_groups = kvzalloc(size, GFP_KERNEL);
 	if (!new_groups) {
 		ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
 			 size / (int) sizeof(struct flex_groups));
@@ -3887,7 +3887,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			goto failed_mount;
 		}
 	}
-	sbi->s_group_desc = ext4_kvmalloc(db_count *
+	sbi->s_group_desc = kvmalloc(db_count *
 					  sizeof(struct buffer_head *),
 					  GFP_KERNEL);
 	if (sbi->s_group_desc == NULL) {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 0a6e115562f6..1fc17a1fc5d0 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2005,26 +2005,6 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
 	return kmalloc(size, flags);
 }
 
-static inline void *f2fs_kvmalloc(size_t size, gfp_t flags)
-{
-	void *ret;
-
-	ret = kmalloc(size, flags | __GFP_NOWARN);
-	if (!ret)
-		ret = __vmalloc(size, flags, PAGE_KERNEL);
-	return ret;
-}
-
-static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
-{
-	void *ret;
-
-	ret = kzalloc(size, flags | __GFP_NOWARN);
-	if (!ret)
-		ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
-	return ret;
-}
-
 #define get_inode_mode(i) \
 	((is_inode_flag_set(i, FI_ACL_MODE)) ? \
 	 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5f7317875a67..0849af78381f 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1012,11 +1012,11 @@ static int __exchange_data_block(struct inode *src_inode,
 	while (len) {
 		olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len);
 
-		src_blkaddr = f2fs_kvzalloc(sizeof(block_t) * olen, GFP_KERNEL);
+		src_blkaddr = kvzalloc(sizeof(block_t) * olen, GFP_KERNEL);
 		if (!src_blkaddr)
 			return -ENOMEM;
 
-		do_replace = f2fs_kvzalloc(sizeof(int) * olen, GFP_KERNEL);
+		do_replace = kvzalloc(sizeof(int) * olen, GFP_KERNEL);
 		if (!do_replace) {
 			kvfree(src_blkaddr);
 			return -ENOMEM;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 481aa8dc79f4..0ea1dca8a0e2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2621,17 +2621,17 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 
-	nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->nat_blocks *
+	nm_i->free_nid_bitmap = kvzalloc(nm_i->nat_blocks *
 					NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
 	if (!nm_i->free_nid_bitmap)
 		return -ENOMEM;
 
-	nm_i->nat_block_bitmap = f2fs_kvzalloc(nm_i->nat_blocks / 8,
+	nm_i->nat_block_bitmap = kvzalloc(nm_i->nat_blocks / 8,
 								GFP_KERNEL);
 	if (!nm_i->nat_block_bitmap)
 		return -ENOMEM;
 
-	nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks *
+	nm_i->free_nid_count = kvzalloc(nm_i->nat_blocks *
 					sizeof(unsigned short), GFP_KERNEL);
 	if (!nm_i->free_nid_count)
 		return -ENOMEM;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 29ef7088c558..13806f642ab5 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2501,13 +2501,13 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
 
 	SM_I(sbi)->sit_info = sit_i;
 
-	sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) *
+	sit_i->sentries = kvzalloc(MAIN_SEGS(sbi) *
 					sizeof(struct seg_entry), GFP_KERNEL);
 	if (!sit_i->sentries)
 		return -ENOMEM;
 
 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
-	sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
+	sit_i->dirty_sentries_bitmap = kvzalloc(bitmap_size, GFP_KERNEL);
 	if (!sit_i->dirty_sentries_bitmap)
 		return -ENOMEM;
 
@@ -2540,7 +2540,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
 		return -ENOMEM;
 
 	if (sbi->segs_per_sec > 1) {
-		sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) *
+		sit_i->sec_entries = kvzalloc(MAIN_SECS(sbi) *
 					sizeof(struct sec_entry), GFP_KERNEL);
 		if (!sit_i->sec_entries)
 			return -ENOMEM;
@@ -2591,12 +2591,12 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
 	SM_I(sbi)->free_info = free_i;
 
 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
-	free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL);
+	free_i->free_segmap = kvmalloc(bitmap_size, GFP_KERNEL);
 	if (!free_i->free_segmap)
 		return -ENOMEM;
 
 	sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
-	free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL);
+	free_i->free_secmap = kvmalloc(sec_bitmap_size, GFP_KERNEL);
 	if (!free_i->free_secmap)
 		return -ENOMEM;
 
@@ -2764,7 +2764,7 @@ static int init_victim_secmap(struct f2fs_sb_info *sbi)
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 	unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
 
-	dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
+	dirty_i->victim_secmap = kvzalloc(bitmap_size, GFP_KERNEL);
 	if (!dirty_i->victim_secmap)
 		return -ENOMEM;
 	return 0;
@@ -2786,7 +2786,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
 
 	for (i = 0; i < NR_DIRTY_TYPE; i++) {
-		dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
+		dirty_i->dirty_segmap[i] = kvzalloc(bitmap_size, GFP_KERNEL);
 		if (!dirty_i->dirty_segmap[i])
 			return -ENOMEM;
 	}
diff --git a/fs/seq_file.c b/fs/seq_file.c
index ca69fb99e41a..dc7c2be963ed 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -25,21 +25,7 @@ static void seq_set_overflow(struct seq_file *m)
 
 static void *seq_buf_alloc(unsigned long size)
 {
-	void *buf;
-	gfp_t gfp = GFP_KERNEL;
-
-	/*
-	 * For high order allocations, use __GFP_NORETRY to avoid oom-killing -
-	 * it's better to fall back to vmalloc() than to kill things.  For small
-	 * allocations, just use GFP_KERNEL which will oom kill, thus no need
-	 * for vmalloc fallback.
-	 */
-	if (size > PAGE_SIZE)
-		gfp |= __GFP_NORETRY | __GFP_NOWARN;
-	buf = kmalloc(size, gfp);
-	if (!buf && size > PAGE_SIZE)
-		buf = vmalloc(size);
-	return buf;
+	return kvmalloc(size, GFP_KERNEL);
 }
 
 /**
author	Michal Hocko <mhocko@suse.com>	2017-05-08 15:57:09 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-05-08 17:15:12 -0700
commit	a7c3e901a46ff54c016d040847eda598a9e3e653 (patch)
tree	d149d70d420ff19586daa827db47a2e26a5598fe /fs
parent	60f3e00d25b44e3aa51846590d1e10f408466a83 (diff)
download	lwn-a7c3e901a46ff54c016d040847eda598a9e3e653.tar.gz lwn-a7c3e901a46ff54c016d040847eda598a9e3e653.zip