From 3f6d5e6a468d02676244b868b210433831846127 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 25 Jan 2024 19:00:24 -0500 Subject: mm: introduce memalloc_flags_{save,restore} Our proliferation of memalloc_*_{save,restore} APIs is getting a bit silly, this adds a generic version and converts the existing save/restore functions to wrappers. Signed-off-by: Kent Overstreet Cc: Vlastimil Babka Cc: Matthew Wilcox Cc: Michal Hocko Cc: Darrick J. Wong Cc: linux-mm@kvack.org Acked-by: Vlastimil Babka --- include/linux/sched/mm.h | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 9a19f1b42f64..f00d7ecc2adf 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -306,6 +306,24 @@ static inline void might_alloc(gfp_t gfp_mask) might_sleep_if(gfpflags_allow_blocking(gfp_mask)); } +/** + * memalloc_flags_save - Add a PF_* flag to current->flags, save old value + * + * This allows PF_* flags to be conveniently added, irrespective of current + * value, and then the old version restored with memalloc_flags_restore(). + */ +static inline unsigned memalloc_flags_save(unsigned flags) +{ + unsigned oldflags = ~current->flags & flags; + current->flags |= flags; + return oldflags; +} + +static inline void memalloc_flags_restore(unsigned flags) +{ + current->flags &= ~flags; +} + /** * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. * @@ -319,9 +337,7 @@ static inline void might_alloc(gfp_t gfp_mask) */ static inline unsigned int memalloc_noio_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_NOIO; - current->flags |= PF_MEMALLOC_NOIO; - return flags; + return memalloc_flags_save(PF_MEMALLOC_NOIO); } /** @@ -334,7 +350,7 @@ static inline unsigned int memalloc_noio_save(void) */ static inline void memalloc_noio_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; + memalloc_flags_restore(flags); } /** @@ -350,9 +366,7 @@ static inline void memalloc_noio_restore(unsigned int flags) */ static inline unsigned int memalloc_nofs_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_NOFS; - current->flags |= PF_MEMALLOC_NOFS; - return flags; + return memalloc_flags_save(PF_MEMALLOC_NOFS); } /** @@ -365,32 +379,27 @@ static inline unsigned int memalloc_nofs_save(void) */ static inline void memalloc_nofs_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; + memalloc_flags_restore(flags); } static inline unsigned int memalloc_noreclaim_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC; - current->flags |= PF_MEMALLOC; - return flags; + return memalloc_flags_save(PF_MEMALLOC); } static inline void memalloc_noreclaim_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC) | flags; + memalloc_flags_restore(flags); } static inline unsigned int memalloc_pin_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_PIN; - - current->flags |= PF_MEMALLOC_PIN; - return flags; + return memalloc_flags_save(PF_MEMALLOC_PIN); } static inline void memalloc_pin_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_PIN) | flags; + memalloc_flags_restore(flags); } #ifdef CONFIG_MEMCG -- cgit v1.2.3 From eab0af905bfc3e9c05da2ca163d76a1513159aa4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 25 Jan 2024 19:00:24 -0500 Subject: mm: introduce PF_MEMALLOC_NORECLAIM, PF_MEMALLOC_NOWARN Introduce PF_MEMALLOC_* equivalents of some GFP_ flags: PF_MEMALLOC_NORECLAIM -> GFP_NOWAIT PF_MEMALLOC_NOWARN -> __GFP_NOWARN Cc: Vlastimil Babka Cc: Matthew Wilcox Cc: Michal Hocko Cc: Darrick J. Wong Cc: linux-mm@kvack.org Signed-off-by: Kent Overstreet --- include/linux/sched.h | 4 ++-- include/linux/sched/mm.h | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ffe8f618ab86..192e2c892040 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1636,8 +1636,8 @@ extern struct pid *cad_pid; * I am cleaning dirty pages from some other bdi. */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ -#define PF__HOLE__00800000 0x00800000 -#define PF__HOLE__01000000 0x01000000 +#define PF_MEMALLOC_NORECLAIM 0x00800000 /* All allocation requests will clear __GFP_DIRECT_RECLAIM */ +#define PF_MEMALLOC_NOWARN 0x01000000 /* All allocation requests will inherit __GFP_NOWARN */ #define PF__HOLE__02000000 0x02000000 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index f00d7ecc2adf..c29059a76052 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -236,16 +236,25 @@ static inline gfp_t current_gfp_context(gfp_t flags) { unsigned int pflags = READ_ONCE(current->flags); - if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) { + if (unlikely(pflags & (PF_MEMALLOC_NOIO | + PF_MEMALLOC_NOFS | + PF_MEMALLOC_NORECLAIM | + PF_MEMALLOC_NOWARN | + PF_MEMALLOC_PIN))) { /* - * NOIO implies both NOIO and NOFS and it is a weaker context - * so always make sure it makes precedence + * Stronger flags before weaker flags: + * NORECLAIM implies NOIO, which in turn implies NOFS */ - if (pflags & PF_MEMALLOC_NOIO) + if (pflags & PF_MEMALLOC_NORECLAIM) + flags &= ~__GFP_DIRECT_RECLAIM; + else if (pflags & PF_MEMALLOC_NOIO) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; + if (pflags & PF_MEMALLOC_NOWARN) + flags |= __GFP_NOWARN; + if (pflags & PF_MEMALLOC_PIN) flags &= ~__GFP_MOVABLE; } -- cgit v1.2.3 From 0225bdfafd818f895fa4a4512f124a1614e011e2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 1 Feb 2024 06:28:41 -0500 Subject: mempool: kvmalloc pool Add mempool_init_kvmalloc_pool() and mempool_create_kvmalloc_pool(), which wrap kvmalloc() instead of kmalloc() - kmalloc() with a vmalloc() fallback. This is part of a bcachefs cleanup - dropping an internal kvpmalloc() helper (which predates kvmalloc()) along with mempool helpers; this replaces the bcachefs-private kvpmalloc_pool. Signed-off-by: Kent Overstreet Cc: linux-mm@kvack.org --- include/linux/mempool.h | 13 +++++++++++++ mm/mempool.c | 13 +++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 7be1e32e6d42..16c5cc807ff6 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -95,6 +95,19 @@ static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) (void *) size); } +void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data); +void mempool_kvfree(void *element, void *pool_data); + +static inline int mempool_init_kvmalloc_pool(mempool_t *pool, int min_nr, size_t size) +{ + return mempool_init(pool, min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size); +} + +static inline mempool_t *mempool_create_kvmalloc_pool(int min_nr, size_t size) +{ + return mempool_create(min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size); +} + /* * A mempool_alloc_t and mempool_free_t for a simple page allocator that * allocates pages of the order specified by pool_data diff --git a/mm/mempool.c b/mm/mempool.c index dbbf0e9fb424..076c736f5f1f 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -590,6 +590,19 @@ void mempool_kfree(void *element, void *pool_data) } EXPORT_SYMBOL(mempool_kfree); +void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data) +{ + size_t size = (size_t)pool_data; + return kvmalloc(size, gfp_mask); +} +EXPORT_SYMBOL(mempool_kvmalloc); + +void mempool_kvfree(void *element, void *pool_data) +{ + kvfree(element); +} +EXPORT_SYMBOL(mempool_kvfree); + /* * A simple mempool-backed page allocator that allocates pages * of the order specified by pool_data. -- cgit v1.2.3 From 66a67c860cce3643248f7e80ee095b946829a342 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 28 Feb 2024 18:28:48 -0500 Subject: fs: file_remove_privs_flags() Rename and export __file_remove_privs(); for a buffered write path that doesn't take the inode lock we need to be able to check if the operation needs to do work first. Signed-off-by: Kent Overstreet Cc: Alexander Viro Cc: Christian Brauner --- fs/inode.c | 7 ++++--- include/linux/fs.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index 91048c4c9c9e..b465afdbfcef 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2031,7 +2031,7 @@ static int __remove_privs(struct mnt_idmap *idmap, return notify_change(idmap, dentry, &newattrs, NULL); } -static int __file_remove_privs(struct file *file, unsigned int flags) +int file_remove_privs_flags(struct file *file, unsigned int flags) { struct dentry *dentry = file_dentry(file); struct inode *inode = file_inode(file); @@ -2056,6 +2056,7 @@ static int __file_remove_privs(struct file *file, unsigned int flags) inode_has_no_xattr(inode); return error; } +EXPORT_SYMBOL_GPL(file_remove_privs_flags); /** * file_remove_privs - remove special file privileges (suid, capabilities) @@ -2068,7 +2069,7 @@ static int __file_remove_privs(struct file *file, unsigned int flags) */ int file_remove_privs(struct file *file) { - return __file_remove_privs(file, 0); + return file_remove_privs_flags(file, 0); } EXPORT_SYMBOL(file_remove_privs); @@ -2161,7 +2162,7 @@ static int file_modified_flags(struct file *file, int flags) * Clear the security bits if the process is not being run by root. * This keeps people from modifying setuid and setgid binaries. */ - ret = __file_remove_privs(file, flags); + ret = file_remove_privs_flags(file, flags); if (ret) return ret; diff --git a/include/linux/fs.h b/include/linux/fs.h index 1fbc72c5f112..14ea66b62823 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3004,6 +3004,7 @@ extern struct inode *new_inode_pseudo(struct super_block *sb); extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *); +extern int file_remove_privs_flags(struct file *file, unsigned int flags); extern int file_remove_privs(struct file *); int setattr_should_drop_sgid(struct mnt_idmap *idmap, const struct inode *inode); -- cgit v1.2.3 From 3a319a2476d27e0b6c3cac3ebf6e3d0b665a06e5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 7 Mar 2024 22:32:06 -0500 Subject: lib/generic-radix-tree.c: Make nodes more reasonably sized this code originally used the page allocator directly, but most code shouldn't do that - PAGE_SIZE varies with architecture, and slab is faster. 4k is also on the large side for typical usage, 512 bytes is a better choice for typical usage that might be somewhat sparse. Signed-off-by: Kent Overstreet --- include/linux/generic-radix-tree.h | 29 ++++++++++++++++------------- lib/generic-radix-tree.c | 35 ++++++++++++----------------------- 2 files changed, 28 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h index 847413164738..f3512fddf3d7 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -5,7 +5,7 @@ * DOC: Generic radix trees/sparse arrays * * Very simple and minimalistic, supporting arbitrary size entries up to - * PAGE_SIZE. + * GENRADIX_NODE_SIZE. * * A genradix is defined with the type it will store, like so: * @@ -45,12 +45,15 @@ struct genradix_root; +#define GENRADIX_NODE_SHIFT 9 +#define GENRADIX_NODE_SIZE (1U << GENRADIX_NODE_SHIFT) + struct __genradix { struct genradix_root *root; }; /* - * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE: + * NOTE: currently, sizeof(_type) must not be larger than GENRADIX_NODE_SIZE: */ #define __GENRADIX_INITIALIZER \ @@ -101,14 +104,14 @@ void __genradix_free(struct __genradix *); static inline size_t __idx_to_offset(size_t idx, size_t obj_size) { if (__builtin_constant_p(obj_size)) - BUILD_BUG_ON(obj_size > PAGE_SIZE); + BUILD_BUG_ON(obj_size > GENRADIX_NODE_SIZE); else - BUG_ON(obj_size > PAGE_SIZE); + BUG_ON(obj_size > GENRADIX_NODE_SIZE); if (!is_power_of_2(obj_size)) { - size_t objs_per_page = PAGE_SIZE / obj_size; + size_t objs_per_page = GENRADIX_NODE_SIZE / obj_size; - return (idx / objs_per_page) * PAGE_SIZE + + return (idx / objs_per_page) * GENRADIX_NODE_SIZE + (idx % objs_per_page) * obj_size; } else { return idx * obj_size; @@ -118,9 +121,9 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size) #define __genradix_cast(_radix) (typeof((_radix)->type[0]) *) #define __genradix_obj_size(_radix) sizeof((_radix)->type[0]) #define __genradix_objs_per_page(_radix) \ - (PAGE_SIZE / sizeof((_radix)->type[0])) + (GENRADIX_NODE_SIZE / sizeof((_radix)->type[0])) #define __genradix_page_remainder(_radix) \ - (PAGE_SIZE % sizeof((_radix)->type[0])) + (GENRADIX_NODE_SIZE % sizeof((_radix)->type[0])) #define __genradix_idx_to_offset(_radix, _idx) \ __idx_to_offset(_idx, __genradix_obj_size(_radix)) @@ -217,8 +220,8 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter, iter->offset += obj_size; if (!is_power_of_2(obj_size) && - (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE) - iter->offset = round_up(iter->offset, PAGE_SIZE); + (iter->offset & (GENRADIX_NODE_SIZE - 1)) + obj_size > GENRADIX_NODE_SIZE) + iter->offset = round_up(iter->offset, GENRADIX_NODE_SIZE); iter->pos++; } @@ -235,8 +238,8 @@ static inline void __genradix_iter_rewind(struct genradix_iter *iter, return; } - if ((iter->offset & (PAGE_SIZE - 1)) == 0) - iter->offset -= PAGE_SIZE % obj_size; + if ((iter->offset & (GENRADIX_NODE_SIZE - 1)) == 0) + iter->offset -= GENRADIX_NODE_SIZE % obj_size; iter->offset -= obj_size; iter->pos--; @@ -263,7 +266,7 @@ static inline void __genradix_iter_rewind(struct genradix_iter *iter, genradix_for_each_from(_radix, _iter, _p, 0) #define genradix_last_pos(_radix) \ - (SIZE_MAX / PAGE_SIZE * __genradix_objs_per_page(_radix) - 1) + (SIZE_MAX / GENRADIX_NODE_SIZE * __genradix_objs_per_page(_radix) - 1) /** * genradix_for_each_reverse - iterate over entry in a genradix, reverse order diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c index 41f1bcdc4488..aaefb9b678c8 100644 --- a/lib/generic-radix-tree.c +++ b/lib/generic-radix-tree.c @@ -5,7 +5,7 @@ #include #include -#define GENRADIX_ARY (PAGE_SIZE / sizeof(struct genradix_node *)) +#define GENRADIX_ARY (GENRADIX_NODE_SIZE / sizeof(struct genradix_node *)) #define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY) struct genradix_node { @@ -14,13 +14,13 @@ struct genradix_node { struct genradix_node *children[GENRADIX_ARY]; /* Leaf: */ - u8 data[PAGE_SIZE]; + u8 data[GENRADIX_NODE_SIZE]; }; }; static inline int genradix_depth_shift(unsigned depth) { - return PAGE_SHIFT + GENRADIX_ARY_SHIFT * depth; + return GENRADIX_NODE_SHIFT + GENRADIX_ARY_SHIFT * depth; } /* @@ -33,7 +33,7 @@ static inline size_t genradix_depth_size(unsigned depth) /* depth that's needed for a genradix that can address up to ULONG_MAX: */ #define GENRADIX_MAX_DEPTH \ - DIV_ROUND_UP(BITS_PER_LONG - PAGE_SHIFT, GENRADIX_ARY_SHIFT) + DIV_ROUND_UP(BITS_PER_LONG - GENRADIX_NODE_SHIFT, GENRADIX_ARY_SHIFT) #define GENRADIX_DEPTH_MASK \ ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1)) @@ -79,23 +79,12 @@ EXPORT_SYMBOL(__genradix_ptr); static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask) { - struct genradix_node *node; - - node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO); - - /* - * We're using pages (not slab allocations) directly for kernel data - * structures, so we need to explicitly inform kmemleak of them in order - * to avoid false positive memory leak reports. - */ - kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask); - return node; + return kzalloc(GENRADIX_NODE_SIZE, gfp_mask); } static inline void genradix_free_node(struct genradix_node *node) { - kmemleak_free(node); - free_page((unsigned long)node); + kfree(node); } /* @@ -200,7 +189,7 @@ restart: i++; iter->offset = round_down(iter->offset + objs_per_ptr, objs_per_ptr); - iter->pos = (iter->offset >> PAGE_SHIFT) * + iter->pos = (iter->offset >> GENRADIX_NODE_SHIFT) * objs_per_page; if (i == GENRADIX_ARY) goto restart; @@ -209,7 +198,7 @@ restart: n = n->children[i]; } - return &n->data[iter->offset & (PAGE_SIZE - 1)]; + return &n->data[iter->offset & (GENRADIX_NODE_SIZE - 1)]; } EXPORT_SYMBOL(__genradix_iter_peek); @@ -235,7 +224,7 @@ restart: if (ilog2(iter->offset) >= genradix_depth_shift(level)) { iter->offset = genradix_depth_size(level); - iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page; + iter->pos = (iter->offset >> GENRADIX_NODE_SHIFT) * objs_per_page; iter->offset -= obj_size_plus_page_remainder; iter->pos--; @@ -251,7 +240,7 @@ restart: size_t objs_per_ptr = genradix_depth_size(level); iter->offset = round_down(iter->offset, objs_per_ptr); - iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page; + iter->pos = (iter->offset >> GENRADIX_NODE_SHIFT) * objs_per_page; if (!iter->offset) return NULL; @@ -267,7 +256,7 @@ restart: n = n->children[i]; } - return &n->data[iter->offset & (PAGE_SIZE - 1)]; + return &n->data[iter->offset & (GENRADIX_NODE_SIZE - 1)]; } EXPORT_SYMBOL(__genradix_iter_peek_prev); @@ -289,7 +278,7 @@ int __genradix_prealloc(struct __genradix *radix, size_t size, { size_t offset; - for (offset = 0; offset < size; offset += PAGE_SIZE) + for (offset = 0; offset < size; offset += GENRADIX_NODE_SIZE) if (!__genradix_ptr_alloc(radix, offset, gfp_mask)) return -ENOMEM; -- cgit v1.2.3