From 6b1ead5985bf73b7dd4453f5cd3b8690a9c52cd5 Mon Sep 17 00:00:00 2001 From: Qinglin Pan Date: Mon, 12 Dec 2022 13:56:57 +0800 Subject: lib/test_vmalloc.c: add parameter use_huge for fix_size_alloc_test Add a parameter `use_huge' for fix_size_alloc_test(), which can be used to test allocation vie vmalloc_huge for both functionality and performance. Link: https://lkml.kernel.org/r/20221212055657.698420-1-panqinglin2020@iscas.ac.cn Signed-off-by: Qinglin Pan Cc: "Uladzislau Rezki (Sony)" Signed-off-by: Andrew Morton --- lib/test_vmalloc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index f90d2c27675b..de4ee0d50906 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -38,6 +38,9 @@ __param(int, test_loop_count, 1000000, __param(int, nr_pages, 0, "Set number of pages for fix_size_alloc_test(default: 1)"); +__param(bool, use_huge, false, + "Use vmalloc_huge in fix_size_alloc_test"); + __param(int, run_test_mask, INT_MAX, "Set tests specified in the mask.\n\n" "\t\tid: 1, name: fix_size_alloc_test\n" @@ -264,7 +267,10 @@ static int fix_size_alloc_test(void) int i; for (i = 0; i < test_loop_count; i++) { - ptr = vmalloc((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE); + if (use_huge) + ptr = vmalloc_huge((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE, GFP_KERNEL); + else + ptr = vmalloc((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE); if (!ptr) return -1; -- cgit v1.2.3 From 831978e37e93bd3e36612917a4b193278950daff Mon Sep 17 00:00:00 2001 From: Vernon Yang Date: Wed, 21 Dec 2022 14:00:52 +0800 Subject: maple_tree: remove extra space and blank line Patch series "Clean up and refinement for maple tree", v2. This patchset cleans up and refines some maple tree code. A few small changes make the code easier to understand and for better readability. This patch (of 7): These extra space and blank lines are unnecessary, so drop them. Link: https://lkml.kernel.org/r/20221221060058.609003-1-vernon2gm@gmail.com Link: https://lkml.kernel.org/r/20221221060058.609003-2-vernon2gm@gmail.com Signed-off-by: Vernon Yang Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 2 -- lib/maple_tree.c | 14 ++++---------- 2 files changed, 4 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index e594db58a0f1..4ee5a969441c 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -517,7 +517,6 @@ static inline void mas_reset(struct ma_state *mas) * entry. * * Note: may return the zero entry. - * */ #define mas_for_each(__mas, __entry, __max) \ while (((__entry) = mas_find((__mas), (__max))) != NULL) @@ -639,7 +638,6 @@ static inline void mt_set_in_rcu(struct maple_tree *mt) } static inline unsigned int mt_height(const struct maple_tree *mt) - { return (mt->ma_flags & MT_FLAGS_HEIGHT_MASK) >> MT_FLAGS_HEIGHT_OFFSET; } diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 26e2045d3cda..975358bec754 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -183,7 +183,6 @@ static void ma_free_rcu(struct maple_node *node) call_rcu(&node->rcu, mt_free_rcu); } - static void mas_set_height(struct ma_state *mas) { unsigned int new_flags = mas->tree->ma_flags; @@ -468,7 +467,7 @@ static inline void mte_set_parent(struct maple_enode *enode, const struct maple_enode *parent, unsigned char slot) { - unsigned long val = (unsigned long) parent; + unsigned long val = (unsigned long)parent; unsigned long shift; unsigned long type; enum maple_type p_type = mte_node_type(parent); @@ -502,7 +501,7 @@ void mte_set_parent(struct maple_enode *enode, const struct maple_enode *parent, */ static inline unsigned int mte_parent_slot(const struct maple_enode *enode) { - unsigned long val = (unsigned long) mte_to_node(enode)->parent; + unsigned long val = (unsigned long)mte_to_node(enode)->parent; /* Root. */ if (val & 1) @@ -1278,7 +1277,6 @@ nomem_one: mas->alloc->total = success; mas_set_err(mas, -ENOMEM); return; - } /* @@ -2946,7 +2944,7 @@ next: mas->min = prev_min; mas->max = prev_max; mas->node = last; - return (void *) next; + return (void *)next; dead_node: mas_reset(mas); @@ -3466,7 +3464,6 @@ static inline bool mas_push_data(struct ma_state *mas, int height, */ static int mas_split(struct ma_state *mas, struct maple_big_node *b_node) { - struct maple_subtree_state mast; int height = 0; unsigned char mid_split, split = 0; @@ -3892,7 +3889,7 @@ next: goto dead_node; } while (!ma_is_leaf(type)); - return (void *) next; + return (void *)next; dead_node: mas_reset(mas); @@ -4710,7 +4707,6 @@ found: static inline void mas_rewalk(struct ma_state *mas, unsigned long index) { - retry: mas_set(mas, index); mas_state_walk(mas); @@ -4718,7 +4714,6 @@ retry: goto retry; return; - } /* @@ -5620,7 +5615,6 @@ static void mas_wr_store_setup(struct ma_wr_state *wr_mas) mas_reset(wr_mas->mas); } } - } /* Interface */ -- cgit v1.2.3 From d56c593c8e128c42dc81707c07cbd5af41862214 Mon Sep 17 00:00:00 2001 From: Vernon Yang Date: Wed, 21 Dec 2022 14:00:53 +0800 Subject: maple_tree: remove extra return statement For functions with a return type of void, it is unnecessary to add a reurn statement at the end of the function, so drop it. Link: https://lkml.kernel.org/r/20221221060058.609003-3-vernon2gm@gmail.com Signed-off-by: Vernon Yang Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- lib/maple_tree.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 975358bec754..fc70ae9850b1 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1276,7 +1276,6 @@ nomem_one: if (mas->alloc && !(((unsigned long)mas->alloc & 0x1))) mas->alloc->total = success; mas_set_err(mas, -ENOMEM); - return; } /* @@ -4712,8 +4711,6 @@ retry: mas_state_walk(mas); if (mas_is_start(mas)) goto retry; - - return; } /* -- cgit v1.2.3 From bd592703b81a95473f6a01fe731beccd0992236e Mon Sep 17 00:00:00 2001 From: Vernon Yang Date: Wed, 21 Dec 2022 14:00:54 +0800 Subject: maple_tree: use mt_node_max() instead of direct operations mt_max[] Use mt_node_max() to get the maximum number of slots for a node, rather than direct operations mt_max[], makes it better portability. Link: https://lkml.kernel.org/r/20221221060058.609003-4-vernon2gm@gmail.com Signed-off-by: Vernon Yang Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- lib/maple_tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index fc70ae9850b1..506b8fdb5f1b 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -6725,7 +6725,7 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry, if (i < (MAPLE_RANGE64_SLOTS - 1)) last = node->pivot[i]; - else if (!node->slot[i] && max != mt_max[mte_node_type(entry)]) + else if (!node->slot[i] && max != mt_node_max(entry)) break; if (last == 0 && i > 0) break; @@ -6832,7 +6832,7 @@ void mt_dump(const struct maple_tree *mt) if (!xa_is_node(entry)) mt_dump_entry(entry, 0, 0, 0); else if (entry) - mt_dump_node(mt, entry, 0, mt_max[mte_node_type(entry)], 0); + mt_dump_node(mt, entry, 0, mt_node_max(entry), 0); } EXPORT_SYMBOL_GPL(mt_dump); -- cgit v1.2.3 From 84fd3e1ee395649ac45b7317d44c10b33d0dca79 Mon Sep 17 00:00:00 2001 From: Vernon Yang Date: Wed, 21 Dec 2022 14:00:55 +0800 Subject: maple_tree: use macro MA_ROOT_PARENT instead of number When you need to compare whether node->parent is parent of the root node, using macro MA_ROOT_PARENT is easier to understand and for better readability. Link: https://lkml.kernel.org/r/20221221060058.609003-5-vernon2gm@gmail.com Signed-off-by: Vernon Yang Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- lib/maple_tree.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 506b8fdb5f1b..4b0575b60a11 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -503,8 +503,7 @@ static inline unsigned int mte_parent_slot(const struct maple_enode *enode) { unsigned long val = (unsigned long)mte_to_node(enode)->parent; - /* Root. */ - if (val & 1) + if (val & MA_ROOT_PARENT) return 0; /* -- cgit v1.2.3 From 46b345848261009477552d654cb2f65000c30e4d Mon Sep 17 00:00:00 2001 From: Vernon Yang Date: Wed, 21 Dec 2022 14:00:57 +0800 Subject: maple_tree: refine ma_state init from mas_start() If mas->node is an MAS_START, there are three cases, and they all assign different values to mas->node and mas->offset. So there is no need to set them to a default value before updating. Update them directly to make them easier to understand and for better readability. Link: https://lkml.kernel.org/r/20221221060058.609003-7-vernon2gm@gmail.com Signed-off-by: Vernon Yang Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- lib/maple_tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 4b0575b60a11..d4554c11ec15 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1329,7 +1329,7 @@ static void mas_node_count(struct ma_state *mas, int count) * mas_start() - Sets up maple state for operations. * @mas: The maple state. * - * If mas->node == MAS_START, then set the min, max, depth, and offset to + * If mas->node == MAS_START, then set the min, max and depth to * defaults. * * Return: @@ -1343,22 +1343,22 @@ static inline struct maple_enode *mas_start(struct ma_state *mas) if (likely(mas_is_start(mas))) { struct maple_enode *root; - mas->node = MAS_NONE; mas->min = 0; mas->max = ULONG_MAX; mas->depth = 0; - mas->offset = 0; root = mas_root(mas); /* Tree with nodes */ if (likely(xa_is_node(root))) { mas->depth = 1; mas->node = mte_safe_root(root); + mas->offset = 0; return NULL; } /* empty tree */ if (unlikely(!root)) { + mas->node = MAS_NONE; mas->offset = MAPLE_NODE_SLOTS; return NULL; } -- cgit v1.2.3 From e11cb683b2ebc6699bc0ca200442f1b80a51553f Mon Sep 17 00:00:00 2001 From: Vernon Yang Date: Wed, 21 Dec 2022 14:00:58 +0800 Subject: maple_tree: refine mab_calc_split function Invert the conditional judgment of the mid_split, to focus the return statement in the last statement, which is easier to understand and for better readability. Link: https://lkml.kernel.org/r/20221221060058.609003-8-vernon2gm@gmail.com Signed-off-by: Vernon Yang Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- lib/maple_tree.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index d4554c11ec15..94f0053ec3e0 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1882,10 +1882,9 @@ static inline int mab_calc_split(struct ma_state *mas, /* Avoid ending a node on a NULL entry */ split = mab_no_null_split(bn, split, slot_count); - if (!(*mid_split)) - return split; - *mid_split = mab_no_null_split(bn, *mid_split, slot_count); + if (unlikely(*mid_split)) + *mid_split = mab_no_null_split(bn, *mid_split, slot_count); return split; } -- cgit v1.2.3 From 541e06b772c1aaffb3b6a245ccface36d7107af2 Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Thu, 5 Jan 2023 16:05:34 +0000 Subject: maple_tree: remove GFP_ZERO from kmem_cache_alloc() and kmem_cache_alloc_bulk() Preallocations are common in the VMA code to avoid allocating under certain locking conditions. The preallocations must also cover the worst-case scenario. Removing the GFP_ZERO flag from the kmem_cache_alloc() (and bulk variant) calls will reduce the amount of time spent zeroing memory that may not be used. Only zero out the necessary area to keep track of the allocations in the maple state. Zero the entire node prior to using it in the tree. This required internal changes to node counting on allocation, so the test code is also updated. This restores some micro-benchmark performance: up to +9% in mmtests mmap1 by my testing +10% to +20% in mmap, mmapaddr, mmapmany tests reported by Red Hat Link: https://bugzilla.redhat.com/show_bug.cgi?id=2149636 Link: https://lkml.kernel.org/r/20230105160427.2988454-1-Liam.Howlett@oracle.com Signed-off-by: Liam Howlett Reported-by: Jirka Hladky Suggested-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- lib/maple_tree.c | 80 +++++++++++++++++++++------------------- tools/testing/radix-tree/maple.c | 18 ++++----- 2 files changed, 52 insertions(+), 46 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 94f0053ec3e0..8db3c336d19f 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -149,13 +149,12 @@ struct maple_subtree_state { /* Functions */ static inline struct maple_node *mt_alloc_one(gfp_t gfp) { - return kmem_cache_alloc(maple_node_cache, gfp | __GFP_ZERO); + return kmem_cache_alloc(maple_node_cache, gfp); } static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) { - return kmem_cache_alloc_bulk(maple_node_cache, gfp | __GFP_ZERO, size, - nodes); + return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); } static inline void mt_free_bulk(size_t size, void __rcu **nodes) @@ -1125,9 +1124,10 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas) { struct maple_alloc *ret, *node = mas->alloc; unsigned long total = mas_allocated(mas); + unsigned int req = mas_alloc_req(mas); /* nothing or a request pending. */ - if (unlikely(!total)) + if (WARN_ON(!total)) return NULL; if (total == 1) { @@ -1137,27 +1137,25 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas) goto single_node; } - if (!node->node_count) { + if (node->node_count == 1) { /* Single allocation in this node. */ mas->alloc = node->slot[0]; - node->slot[0] = NULL; mas->alloc->total = node->total - 1; ret = node; goto new_head; } - node->total--; - ret = node->slot[node->node_count]; - node->slot[node->node_count--] = NULL; + ret = node->slot[--node->node_count]; + node->slot[node->node_count] = NULL; single_node: new_head: - ret->total = 0; - ret->node_count = 0; - if (ret->request_count) { - mas_set_alloc_req(mas, ret->request_count + 1); - ret->request_count = 0; + if (req) { + req++; + mas_set_alloc_req(mas, req); } + + memset(ret, 0, sizeof(*ret)); return (struct maple_node *)ret; } @@ -1176,21 +1174,20 @@ static inline void mas_push_node(struct ma_state *mas, struct maple_node *used) unsigned long count; unsigned int requested = mas_alloc_req(mas); - memset(reuse, 0, sizeof(*reuse)); count = mas_allocated(mas); - if (count && (head->node_count < MAPLE_ALLOC_SLOTS - 1)) { - if (head->slot[0]) - head->node_count++; - head->slot[head->node_count] = reuse; + reuse->request_count = 0; + reuse->node_count = 0; + if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) { + head->slot[head->node_count++] = reuse; head->total++; goto done; } reuse->total = 1; if ((head) && !((unsigned long)head & 0x1)) { - head->request_count = 0; reuse->slot[0] = head; + reuse->node_count = 1; reuse->total += head->total; } @@ -1209,7 +1206,6 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) { struct maple_alloc *node; unsigned long allocated = mas_allocated(mas); - unsigned long success = allocated; unsigned int requested = mas_alloc_req(mas); unsigned int count; void **slots = NULL; @@ -1225,24 +1221,29 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) WARN_ON(!allocated); } - if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS - 1) { + if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) { node = (struct maple_alloc *)mt_alloc_one(gfp); if (!node) goto nomem_one; - if (allocated) + if (allocated) { node->slot[0] = mas->alloc; + node->node_count = 1; + } else { + node->node_count = 0; + } - success++; mas->alloc = node; + node->total = ++allocated; requested--; } node = mas->alloc; + node->request_count = 0; while (requested) { max_req = MAPLE_ALLOC_SLOTS; - if (node->slot[0]) { - unsigned int offset = node->node_count + 1; + if (node->node_count) { + unsigned int offset = node->node_count; slots = (void **)&node->slot[offset]; max_req -= offset; @@ -1256,15 +1257,13 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) goto nomem_bulk; node->node_count += count; - /* zero indexed. */ - if (slots == (void **)&node->slot) - node->node_count--; - - success += count; + allocated += count; node = node->slot[0]; + node->node_count = 0; + node->request_count = 0; requested -= count; } - mas->alloc->total = success; + mas->alloc->total = allocated; return; nomem_bulk: @@ -1273,7 +1272,7 @@ nomem_bulk: nomem_one: mas_set_alloc_req(mas, requested); if (mas->alloc && !(((unsigned long)mas->alloc & 0x1))) - mas->alloc->total = success; + mas->alloc->total = allocated; mas_set_err(mas, -ENOMEM); } @@ -5734,6 +5733,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) void mas_destroy(struct ma_state *mas) { struct maple_alloc *node; + unsigned long total; /* * When using mas_for_each() to insert an expected number of elements, @@ -5756,14 +5756,20 @@ void mas_destroy(struct ma_state *mas) } mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC); - while (mas->alloc && !((unsigned long)mas->alloc & 0x1)) { + total = mas_allocated(mas); + while (total) { node = mas->alloc; mas->alloc = node->slot[0]; - if (node->node_count > 0) - mt_free_bulk(node->node_count, - (void __rcu **)&node->slot[1]); + if (node->node_count > 1) { + size_t count = node->node_count - 1; + + mt_free_bulk(count, (void __rcu **)&node->slot[1]); + total -= count; + } kmem_cache_free(maple_node_cache, node); + total--; } + mas->alloc = NULL; } EXPORT_SYMBOL_GPL(mas_destroy); diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 81fa7ec2e66a..1f36bc1c5d36 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -173,11 +173,11 @@ static noinline void check_new_node(struct maple_tree *mt) if (!MAPLE_32BIT) { if (i >= 35) - e = i - 35; + e = i - 34; else if (i >= 5) - e = i - 5; + e = i - 4; else if (i >= 2) - e = i - 2; + e = i - 1; } else { if (i >= 4) e = i - 4; @@ -305,17 +305,17 @@ static noinline void check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM)); MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1); + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS); mn = mas_pop_node(&mas); /* get the next node. */ MT_BUG_ON(mt, mn == NULL); MT_BUG_ON(mt, not_empty(mn)); MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 2); + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1); mas_push_node(&mas, mn); MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1); + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS); /* Check the limit of pop/push/pop */ mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 2); /* Request */ @@ -323,14 +323,14 @@ static noinline void check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM)); MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); MT_BUG_ON(mt, mas_alloc_req(&mas)); - MT_BUG_ON(mt, mas.alloc->node_count); + MT_BUG_ON(mt, mas.alloc->node_count != 1); MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2); mn = mas_pop_node(&mas); MT_BUG_ON(mt, not_empty(mn)); MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1); + MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS); mas_push_node(&mas, mn); - MT_BUG_ON(mt, mas.alloc->node_count); + MT_BUG_ON(mt, mas.alloc->node_count != 1); MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2); mn = mas_pop_node(&mas); MT_BUG_ON(mt, not_empty(mn)); -- cgit v1.2.3 From c5d5546ea06512accc894cd19265c7041a6ac81a Mon Sep 17 00:00:00 2001 From: Vernon Yang Date: Tue, 10 Jan 2023 23:42:11 +0800 Subject: maple_tree: remove the parameter entry of mas_preallocate The parameter entry of mas_preallocate is not used, so drop it. Link: https://lkml.kernel.org/r/20230110154211.1758562-1-vernon2gm@gmail.com Signed-off-by: Vernon Yang Cc: Liam Howlett Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 2 +- lib/maple_tree.c | 3 +-- mm/mmap.c | 16 ++++++++-------- mm/nommu.c | 8 ++++---- tools/testing/radix-tree/maple.c | 32 ++++++++++++++++---------------- 5 files changed, 30 insertions(+), 31 deletions(-) (limited to 'lib') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 815a27661517..a7bf58fd7cc6 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -455,7 +455,7 @@ int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp); void mas_store_prealloc(struct ma_state *mas, void *entry); void *mas_find(struct ma_state *mas, unsigned long max); void *mas_find_rev(struct ma_state *mas, unsigned long min); -int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); +int mas_preallocate(struct ma_state *mas, gfp_t gfp); bool mas_is_err(struct ma_state *mas); bool mas_nomem(struct ma_state *mas, gfp_t gfp); diff --git a/lib/maple_tree.c b/lib/maple_tree.c index baff62a012e1..5be99550e36d 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5700,12 +5700,11 @@ EXPORT_SYMBOL_GPL(mas_store_prealloc); /** * mas_preallocate() - Preallocate enough nodes for a store operation * @mas: The maple state - * @entry: The entry that will be stored * @gfp: The GFP_FLAGS to use for allocations. * * Return: 0 on success, -ENOMEM if memory could not be allocated. */ -int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) +int mas_preallocate(struct ma_state *mas, gfp_t gfp) { int ret; diff --git a/mm/mmap.c b/mm/mmap.c index 425a9349e610..4fe29b8f99b0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -472,7 +472,7 @@ static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma) MA_STATE(mas, &mm->mm_mt, 0, 0); struct address_space *mapping = NULL; - if (mas_preallocate(&mas, vma, GFP_KERNEL)) + if (mas_preallocate(&mas, GFP_KERNEL)) return -ENOMEM; if (vma->vm_file) { @@ -538,7 +538,7 @@ inline int vma_expand(struct ma_state *mas, struct vm_area_struct *vma, /* Only handles expanding */ VM_BUG_ON(vma->vm_start < start || vma->vm_end > end); - if (mas_preallocate(mas, vma, GFP_KERNEL)) + if (mas_preallocate(mas, GFP_KERNEL)) goto nomem; vma_adjust_trans_huge(vma, start, end, 0); @@ -712,7 +712,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, } } - if (mas_preallocate(&mas, vma, GFP_KERNEL)) + if (mas_preallocate(&mas, GFP_KERNEL)) return -ENOMEM; vma_adjust_trans_huge(orig_vma, start, end, adjust_next); @@ -1938,7 +1938,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) /* Check that both stack segments have the same anon_vma? */ } - if (mas_preallocate(&mas, vma, GFP_KERNEL)) + if (mas_preallocate(&mas, GFP_KERNEL)) return -ENOMEM; /* We must make sure the anon_vma is allocated. */ @@ -2019,7 +2019,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address) return -ENOMEM; } - if (mas_preallocate(&mas, vma, GFP_KERNEL)) + if (mas_preallocate(&mas, GFP_KERNEL)) return -ENOMEM; /* We must make sure the anon_vma is allocated. */ @@ -2311,7 +2311,7 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma, mt_init_flags(&mt_detach, MT_FLAGS_LOCK_EXTERN); mt_set_external_lock(&mt_detach, &mm->mmap_lock); - if (mas_preallocate(mas, vma, GFP_KERNEL)) + if (mas_preallocate(mas, GFP_KERNEL)) return -ENOMEM; mas->last = end - 1; @@ -2680,7 +2680,7 @@ cannot_expand: goto free_vma; } - if (mas_preallocate(&mas, vma, GFP_KERNEL)) { + if (mas_preallocate(&mas, GFP_KERNEL)) { error = -ENOMEM; if (file) goto close_and_free_vma; @@ -2953,7 +2953,7 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma, can_vma_merge_after(vma, flags, NULL, NULL, addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) { mas_set_range(mas, vma->vm_start, addr + len - 1); - if (mas_preallocate(mas, vma, GFP_KERNEL)) + if (mas_preallocate(mas, GFP_KERNEL)) goto unacct_fail; vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0); diff --git a/mm/nommu.c b/mm/nommu.c index df1711acdf5b..0481922fe66e 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -602,7 +602,7 @@ static int add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) { MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end); - if (mas_preallocate(&mas, vma, GFP_KERNEL)) { + if (mas_preallocate(&mas, GFP_KERNEL)) { pr_warn("Allocation of vma tree for process %d failed\n", current->pid); return -ENOMEM; @@ -633,7 +633,7 @@ static int delete_vma_from_mm(struct vm_area_struct *vma) { MA_STATE(mas, &vma->vm_mm->mm_mt, 0, 0); - if (mas_preallocate(&mas, vma, GFP_KERNEL)) { + if (mas_preallocate(&mas, GFP_KERNEL)) { pr_warn("Allocation of vma tree for process %d failed\n", current->pid); return -ENOMEM; @@ -1091,7 +1091,7 @@ unsigned long do_mmap(struct file *file, if (!vma) goto error_getting_vma; - if (mas_preallocate(&mas, vma, GFP_KERNEL)) + if (mas_preallocate(&mas, GFP_KERNEL)) goto error_maple_preallocate; region->vm_usage = 1; @@ -1369,7 +1369,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, if (!new) goto err_vma_dup; - if (mas_preallocate(&mas, vma, GFP_KERNEL)) { + if (mas_preallocate(&mas, GFP_KERNEL)) { pr_warn("Allocation of vma tree for process %d failed\n", current->pid); goto err_mas_preallocate; diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 1f36bc1c5d36..958ee9bdb316 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -35342,7 +35342,7 @@ static noinline void check_prealloc(struct maple_tree *mt) for (i = 0; i <= max; i++) mtree_test_store_range(mt, i * 10, i * 10 + 5, &i); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); @@ -35351,18 +35351,18 @@ static noinline void check_prealloc(struct maple_tree *mt) allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); @@ -35370,25 +35370,25 @@ static noinline void check_prealloc(struct maple_tree *mt) mn = mas_pop_node(&mas); MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); ma_free_rcu(mn); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); mn = mas_pop_node(&mas); MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); ma_free_rcu(mn); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); @@ -35397,12 +35397,12 @@ static noinline void check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); mas_push_node(&mas, mn); MT_BUG_ON(mt, mas_allocated(&mas) != allocated); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); @@ -35410,21 +35410,21 @@ static noinline void check_prealloc(struct maple_tree *mt) mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); mas_store_prealloc(&mas, ptr); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); @@ -35432,14 +35432,14 @@ static noinline void check_prealloc(struct maple_tree *mt) mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); mt_set_non_kernel(1); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL & GFP_NOWAIT) == 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL & GFP_NOWAIT) == 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated != 0); mas_destroy(&mas); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); @@ -35447,7 +35447,7 @@ static noinline void check_prealloc(struct maple_tree *mt) mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); mt_set_non_kernel(1); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL & GFP_NOWAIT) == 0); + MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL & GFP_NOWAIT) == 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated != 0); -- cgit v1.2.3 From f942b0f0528d1198b94b8211c84d4f28a654c0ff Mon Sep 17 00:00:00 2001 From: Vernon Yang Date: Wed, 11 Jan 2023 21:53:48 +0800 Subject: maple_tree: fix comment of mte_destroy_walk The parameter name of maple tree is mt, make the comment be mt instead of mn, and the separator between the parameter name and the description to be : instead of -. Link: https://lkml.kernel.org/r/20230111135348.803181-1-vernon2gm@gmail.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Vernon Yang Cc: Liam R. Howlett Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- lib/maple_tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 5be99550e36d..1c5d3b640a24 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5579,8 +5579,8 @@ free_leaf: /* * mte_destroy_walk() - Free a tree or sub-tree. - * @enode - the encoded maple node (maple_enode) to start - * @mn - the tree to free - needed for node types. + * @enode: the encoded maple node (maple_enode) to start + * @mt: the tree to free - needed for node types. * * Must hold the write lock. */ -- cgit v1.2.3 From 2973d8229b78d3f148e0c45916a1e8b237dc6167 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 13 Jan 2023 11:12:17 +0000 Subject: mm: discard __GFP_ATOMIC __GFP_ATOMIC serves little purpose. Its main effect is to set ALLOC_HARDER which adds a few little boosts to increase the chance of an allocation succeeding, one of which is to lower the water-mark at which it will succeed. It is *always* paired with __GFP_HIGH which sets ALLOC_HIGH which also adjusts this watermark. It is probable that other users of __GFP_HIGH should benefit from the other little bonuses that __GFP_ATOMIC gets. __GFP_ATOMIC also gives a warning if used with __GFP_DIRECT_RECLAIM. There is little point to this. We already get a might_sleep() warning if __GFP_DIRECT_RECLAIM is set. __GFP_ATOMIC allows the "watermark_boost" to be side-stepped. It is probable that testing ALLOC_HARDER is a better fit here. __GFP_ATOMIC is used by tegra-smmu.c to check if the allocation might sleep. This should test __GFP_DIRECT_RECLAIM instead. This patch: - removes __GFP_ATOMIC - allows __GFP_HIGH allocations to ignore watermark boosting as well as GFP_ATOMIC requests. - makes other adjustments as suggested by the above. The net result is not change to GFP_ATOMIC allocations. Other allocations that use __GFP_HIGH will benefit from a few different extra privileges. This affects: xen, dm, md, ntfs3 the vermillion frame buffer hibernation ksm swap all of which likely produce more benefit than cost if these selected allocation are more likely to succeed quickly. [mgorman: Minor adjustments to rework on top of a series] Link: https://lkml.kernel.org/r/163712397076.13692.4727608274002939094@noble.neil.brown.name Link: https://lkml.kernel.org/r/20230113111217.14134-7-mgorman@techsingularity.net Signed-off-by: NeilBrown Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Matthew Wilcox Cc: Thierry Reding Signed-off-by: Andrew Morton --- Documentation/mm/balance.rst | 2 +- drivers/iommu/tegra-smmu.c | 4 ++-- include/linux/gfp_types.h | 12 ++++-------- include/trace/events/mmflags.h | 1 - lib/test_printf.c | 8 ++++---- mm/internal.h | 2 +- mm/page_alloc.c | 13 +++---------- tools/perf/builtin-kmem.c | 1 - 8 files changed, 15 insertions(+), 28 deletions(-) (limited to 'lib') diff --git a/Documentation/mm/balance.rst b/Documentation/mm/balance.rst index 6a1fadf3e173..e38e9d83c1c7 100644 --- a/Documentation/mm/balance.rst +++ b/Documentation/mm/balance.rst @@ -6,7 +6,7 @@ Memory Balancing Started Jan 2000 by Kanoj Sarcar -Memory balancing is needed for !__GFP_ATOMIC and !__GFP_KSWAPD_RECLAIM as +Memory balancing is needed for !__GFP_HIGH and !__GFP_KSWAPD_RECLAIM as well as for non __GFP_IO allocations. The first reason why a caller may avoid reclaim is that the caller can not diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 5b1af40221ec..af8d0e685260 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -671,12 +671,12 @@ static struct page *as_get_pde_page(struct tegra_smmu_as *as, * allocate page in a sleeping context if GFP flags permit. Hence * spinlock needs to be unlocked and re-locked after allocation. */ - if (!(gfp & __GFP_ATOMIC)) + if (gfpflags_allow_blocking(gfp)) spin_unlock_irqrestore(&as->lock, *flags); page = alloc_page(gfp | __GFP_DMA | __GFP_ZERO); - if (!(gfp & __GFP_ATOMIC)) + if (gfpflags_allow_blocking(gfp)) spin_lock_irqsave(&as->lock, *flags); /* diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index d88c46ca82e1..5088637fe5c2 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -31,7 +31,7 @@ typedef unsigned int __bitwise gfp_t; #define ___GFP_IO 0x40u #define ___GFP_FS 0x80u #define ___GFP_ZERO 0x100u -#define ___GFP_ATOMIC 0x200u +/* 0x200u unused */ #define ___GFP_DIRECT_RECLAIM 0x400u #define ___GFP_KSWAPD_RECLAIM 0x800u #define ___GFP_WRITE 0x1000u @@ -116,11 +116,8 @@ typedef unsigned int __bitwise gfp_t; * * %__GFP_HIGH indicates that the caller is high-priority and that granting * the request is necessary before the system can make forward progress. - * For example, creating an IO context to clean pages. - * - * %__GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is - * high priority. Users are typically interrupt handlers. This may be - * used in conjunction with %__GFP_HIGH + * For example creating an IO context to clean pages and requests + * from atomic context. * * %__GFP_MEMALLOC allows access to all memory. This should only be used when * the caller guarantees the allocation will allow more memory to be freed @@ -135,7 +132,6 @@ typedef unsigned int __bitwise gfp_t; * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. * This takes precedence over the %__GFP_MEMALLOC flag if both are set. */ -#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) #define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) #define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) #define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) @@ -329,7 +325,7 @@ typedef unsigned int __bitwise gfp_t; * version does not attempt reclaim/compaction at all and is by default used * in page fault path, while the non-light is used by khugepaged. */ -#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) +#define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM) #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) #define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 412b5a46374c..9db52bc4ce19 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -31,7 +31,6 @@ gfpflag_string(__GFP_HIGHMEM), \ gfpflag_string(GFP_DMA32), \ gfpflag_string(__GFP_HIGH), \ - gfpflag_string(__GFP_ATOMIC), \ gfpflag_string(__GFP_IO), \ gfpflag_string(__GFP_FS), \ gfpflag_string(__GFP_NOWARN), \ diff --git a/lib/test_printf.c b/lib/test_printf.c index d34dc636b81c..46b4e6c414a3 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -674,17 +674,17 @@ flags(void) gfp = GFP_ATOMIC|__GFP_DMA; test("GFP_ATOMIC|GFP_DMA", "%pGg", &gfp); - gfp = __GFP_ATOMIC; - test("__GFP_ATOMIC", "%pGg", &gfp); + gfp = __GFP_HIGH; + test("__GFP_HIGH", "%pGg", &gfp); /* Any flags not translated by the table should remain numeric */ gfp = ~__GFP_BITS_MASK; snprintf(cmp_buffer, BUF_SIZE, "%#lx", (unsigned long) gfp); test(cmp_buffer, "%pGg", &gfp); - snprintf(cmp_buffer, BUF_SIZE, "__GFP_ATOMIC|%#lx", + snprintf(cmp_buffer, BUF_SIZE, "__GFP_HIGH|%#lx", (unsigned long) gfp); - gfp |= __GFP_ATOMIC; + gfp |= __GFP_HIGH; test(cmp_buffer, "%pGg", &gfp); kfree(cmp_buffer); diff --git a/mm/internal.h b/mm/internal.h index b0b88a95347f..2d09a7a0600a 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -24,7 +24,7 @@ struct folio_batch; #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\ __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\ - __GFP_ATOMIC|__GFP_NOLOCKDEP) + __GFP_NOLOCKDEP) /* The GFP flags allowed during early boot */ #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 18ca33a1945d..0cfad30fb44c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4105,13 +4105,14 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, if (__zone_watermark_ok(z, order, mark, highest_zoneidx, alloc_flags, free_pages)) return true; + /* - * Ignore watermark boosting for GFP_ATOMIC order-0 allocations + * Ignore watermark boosting for __GFP_HIGH order-0 allocations * when checking the min watermark. The min watermark is the * point where boosting is ignored so that kswapd is woken up * when below the low watermark. */ - if (unlikely(!order && (gfp_mask & __GFP_ATOMIC) && z->watermark_boost + if (unlikely(!order && (alloc_flags & ALLOC_MIN_RESERVE) && z->watermark_boost && ((alloc_flags & ALLOC_WMARK_MASK) == WMARK_MIN))) { mark = z->_watermark[WMARK_MIN]; return __zone_watermark_ok(z, order, mark, highest_zoneidx, @@ -5076,14 +5077,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, unsigned int zonelist_iter_cookie; int reserve_flags; - /* - * We also sanity check to catch abuse of atomic reserves being used by - * callers that are not in atomic context. - */ - if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) == - (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) - gfp_mask &= ~__GFP_ATOMIC; - restart: compaction_retries = 0; no_progress_loops = 0; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 8ae0a1535293..f3029742b800 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -653,7 +653,6 @@ static const struct { { "__GFP_HIGHMEM", "HM" }, { "GFP_DMA32", "D32" }, { "__GFP_HIGH", "H" }, - { "__GFP_ATOMIC", "_A" }, { "__GFP_IO", "I" }, { "__GFP_FS", "F" }, { "__GFP_NOWARN", "NWR" }, -- cgit v1.2.3 From b2db9ef2c0926ba86898136704cdc757c7a5a60a Mon Sep 17 00:00:00 2001 From: Zhaoyang Huang Date: Thu, 19 Jan 2023 09:22:24 +0800 Subject: mm: move KMEMLEAK's Kconfig items from lib to mm Have the kmemleak's source code and Kconfig items be in the same directory. Link: https://lkml.kernel.org/r/1674091345-14799-1-git-send-email-zhaoyang.huang@unisoc.com Signed-off-by: Zhaoyang Huang Acked-by: Mike Rapoport (IBM) Acked-by: Vlastimil Babka Cc: ke.wang Cc: Mirsad Goran Todorovac Cc: Nathan Chancellor Cc: Peter Zijlstra (Intel) Cc: Catalin Marinas Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 71 ------------------------------------------------------ mm/Kconfig.debug | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 71 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 139758854ce6..958087475edb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -743,77 +743,6 @@ config SHRINKER_DEBUG visibility into the kernel memory shrinkers subsystem. Disable it to avoid an extra memory footprint. -config HAVE_DEBUG_KMEMLEAK - bool - -config DEBUG_KMEMLEAK - bool "Kernel memory leak detector" - depends on DEBUG_KERNEL && HAVE_DEBUG_KMEMLEAK - select DEBUG_FS - select STACKTRACE if STACKTRACE_SUPPORT - select KALLSYMS - select CRC32 - select STACKDEPOT - select STACKDEPOT_ALWAYS_INIT if !DEBUG_KMEMLEAK_DEFAULT_OFF - help - Say Y here if you want to enable the memory leak - detector. The memory allocation/freeing is traced in a way - similar to the Boehm's conservative garbage collector, the - difference being that the orphan objects are not freed but - only shown in /sys/kernel/debug/kmemleak. Enabling this - feature will introduce an overhead to memory - allocations. See Documentation/dev-tools/kmemleak.rst for more - details. - - Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances - of finding leaks due to the slab objects poisoning. - - In order to access the kmemleak file, debugfs needs to be - mounted (usually at /sys/kernel/debug). - -config DEBUG_KMEMLEAK_MEM_POOL_SIZE - int "Kmemleak memory pool size" - depends on DEBUG_KMEMLEAK - range 200 1000000 - default 16000 - help - Kmemleak must track all the memory allocations to avoid - reporting false positives. Since memory may be allocated or - freed before kmemleak is fully initialised, use a static pool - of metadata objects to track such callbacks. After kmemleak is - fully initialised, this memory pool acts as an emergency one - if slab allocations fail. - -config DEBUG_KMEMLEAK_TEST - tristate "Simple test for the kernel memory leak detector" - depends on DEBUG_KMEMLEAK && m - help - This option enables a module that explicitly leaks memory. - - If unsure, say N. - -config DEBUG_KMEMLEAK_DEFAULT_OFF - bool "Default kmemleak to off" - depends on DEBUG_KMEMLEAK - help - Say Y here to disable kmemleak by default. It can then be enabled - on the command line via kmemleak=on. - -config DEBUG_KMEMLEAK_AUTO_SCAN - bool "Enable kmemleak auto scan thread on boot up" - default y - depends on DEBUG_KMEMLEAK - help - Depending on the cpu, kmemleak scan may be cpu intensive and can - stall user tasks at times. This option enables/disables automatic - kmemleak scan at boot up. - - Say N here to disable kmemleak auto scan thread to stop automatic - scanning. Disabling this option disables automatic reporting of - memory leaks. - - If unsure, say Y. - config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" depends on DEBUG_KERNEL && !IA64 diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index d62f48131952..c3547a373c9c 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -207,3 +207,75 @@ config PTDUMP_DEBUGFS kernel. If in doubt, say N. + +config HAVE_DEBUG_KMEMLEAK + bool + +config DEBUG_KMEMLEAK + bool "Kernel memory leak detector" + depends on DEBUG_KERNEL && HAVE_DEBUG_KMEMLEAK + select DEBUG_FS + select STACKTRACE if STACKTRACE_SUPPORT + select KALLSYMS + select CRC32 + select STACKDEPOT + select STACKDEPOT_ALWAYS_INIT if !DEBUG_KMEMLEAK_DEFAULT_OFF + help + Say Y here if you want to enable the memory leak + detector. The memory allocation/freeing is traced in a way + similar to the Boehm's conservative garbage collector, the + difference being that the orphan objects are not freed but + only shown in /sys/kernel/debug/kmemleak. Enabling this + feature will introduce an overhead to memory + allocations. See Documentation/dev-tools/kmemleak.rst for more + details. + + Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances + of finding leaks due to the slab objects poisoning. + + In order to access the kmemleak file, debugfs needs to be + mounted (usually at /sys/kernel/debug). + +config DEBUG_KMEMLEAK_MEM_POOL_SIZE + int "Kmemleak memory pool size" + depends on DEBUG_KMEMLEAK + range 200 1000000 + default 16000 + help + Kmemleak must track all the memory allocations to avoid + reporting false positives. Since memory may be allocated or + freed before kmemleak is fully initialised, use a static pool + of metadata objects to track such callbacks. After kmemleak is + fully initialised, this memory pool acts as an emergency one + if slab allocations fail. + +config DEBUG_KMEMLEAK_TEST + tristate "Simple test for the kernel memory leak detector" + depends on DEBUG_KMEMLEAK && m + help + This option enables a module that explicitly leaks memory. + + If unsure, say N. + +config DEBUG_KMEMLEAK_DEFAULT_OFF + bool "Default kmemleak to off" + depends on DEBUG_KMEMLEAK + help + Say Y here to disable kmemleak by default. It can then be enabled + on the command line via kmemleak=on. + +config DEBUG_KMEMLEAK_AUTO_SCAN + bool "Enable kmemleak auto scan thread on boot up" + default y + depends on DEBUG_KMEMLEAK + help + Depending on the cpu, kmemleak scan may be cpu intensive and can + stall user tasks at times. This option enables/disables automatic + kmemleak scan at boot up. + + Say N here to disable kmemleak auto scan thread to stop automatic + scanning. Disabling this option disables automatic reporting of + memory leaks. + + If unsure, say Y. + -- cgit v1.2.3 From 65be6f058b0eba98dc6c6f197ea9f62c9b6a519f Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:03 -0500 Subject: maple_tree: fix potential rcu issue Ensure the node isn't dead after reading the node end. Link: https://lkml.kernel.org/r/20230120162650.984577-3-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- lib/maple_tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 1c5d3b640a24..7e3cf5b7e68b 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -4655,13 +4655,13 @@ static inline void *mas_next_nentry(struct ma_state *mas, pivots = ma_pivots(node, type); slots = ma_slots(node, type); mas->index = mas_safe_min(mas, pivots, mas->offset); + count = ma_data_end(node, type, pivots, mas->max); if (ma_dead_node(node)) return NULL; if (mas->index > max) return NULL; - count = ma_data_end(node, type, pivots, mas->max); if (mas->offset > count) return NULL; -- cgit v1.2.3 From 50e81c82ad947045c7ed26ddc9acb17276b653b6 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:04 -0500 Subject: maple_tree: reduce user error potential When iterating, a user may operate on the tree and cause the maple state to be altered and left in an unintuitive state. Detect this scenario and correct it by setting to the limit and invalidating the state. Link: https://lkml.kernel.org/r/20230120162650.984577-4-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- lib/maple_tree.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 7e3cf5b7e68b..5804c5997598 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -4732,6 +4732,11 @@ static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit) unsigned long last; enum maple_type mt; + if (mas->index > limit) { + mas->index = mas->last = limit; + mas_pause(mas); + return NULL; + } last = mas->last; retry: offset = mas->offset; @@ -4838,6 +4843,11 @@ static inline void *mas_prev_entry(struct ma_state *mas, unsigned long min) { void *entry; + if (mas->index < min) { + mas->index = mas->last = min; + mas_pause(mas); + return NULL; + } retry: while (likely(!mas_is_none(mas))) { entry = mas_prev_nentry(mas, min, mas->index); -- cgit v1.2.3 From 5159d64b335401fa83f18c27e2267f1eafc41bd3 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:05 -0500 Subject: test_maple_tree: test modifications while iterating Add a testcase to ensure the iterator detects bad states on modifications and does what the user expects Link: https://lkml.kernel.org/r/20230120162650.984577-5-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- lib/test_maple_tree.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'lib') diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index ec847bf4dcb4..3d19b1f78d71 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -1709,6 +1709,74 @@ static noinline void check_forking(struct maple_tree *mt) mtree_destroy(&newmt); } +static noinline void check_iteration(struct maple_tree *mt) +{ + int i, nr_entries = 125; + void *val; + MA_STATE(mas, mt, 0, 0); + + for (i = 0; i <= nr_entries; i++) + mtree_store_range(mt, i * 10, i * 10 + 9, + xa_mk_value(i), GFP_KERNEL); + + mt_set_non_kernel(99999); + + i = 0; + mas_lock(&mas); + mas_for_each(&mas, val, 925) { + MT_BUG_ON(mt, mas.index != i * 10); + MT_BUG_ON(mt, mas.last != i * 10 + 9); + /* Overwrite end of entry 92 */ + if (i == 92) { + mas.index = 925; + mas.last = 929; + mas_store(&mas, val); + } + i++; + } + /* Ensure mas_find() gets the next value */ + val = mas_find(&mas, ULONG_MAX); + MT_BUG_ON(mt, val != xa_mk_value(i)); + + mas_set(&mas, 0); + i = 0; + mas_for_each(&mas, val, 785) { + MT_BUG_ON(mt, mas.index != i * 10); + MT_BUG_ON(mt, mas.last != i * 10 + 9); + /* Overwrite start of entry 78 */ + if (i == 78) { + mas.index = 780; + mas.last = 785; + mas_store(&mas, val); + } else { + i++; + } + } + val = mas_find(&mas, ULONG_MAX); + MT_BUG_ON(mt, val != xa_mk_value(i)); + + mas_set(&mas, 0); + i = 0; + mas_for_each(&mas, val, 765) { + MT_BUG_ON(mt, mas.index != i * 10); + MT_BUG_ON(mt, mas.last != i * 10 + 9); + /* Overwrite end of entry 76 and advance to the end */ + if (i == 76) { + mas.index = 760; + mas.last = 765; + mas_store(&mas, val); + mas_next(&mas, ULONG_MAX); + } + i++; + } + /* Make sure the next find returns the one after 765, 766-769 */ + val = mas_find(&mas, ULONG_MAX); + MT_BUG_ON(mt, val != xa_mk_value(76)); + mas_unlock(&mas); + mas_destroy(&mas); + mt_set_non_kernel(0); +} + static noinline void check_mas_store_gfp(struct maple_tree *mt) { @@ -2659,6 +2727,10 @@ static int maple_tree_seed(void) goto skip; #endif + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_iteration(&tree); + mtree_destroy(&tree); + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); check_forking(&tree); mtree_destroy(&tree); -- cgit v1.2.3 From 1202700c3f8cc5f7e4646c3cf05ee6f7c8bc6ccf Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:06 -0500 Subject: maple_tree: fix handle of invalidated state in mas_wr_store_setup() If an invalidated maple state is encountered during write, reset the maple state to MAS_START. This will result in a re-walk of the tree to the correct location for the write. Link: https://lore.kernel.org/all/20230107020126.1627-1-sj@kernel.org/ Link: https://lkml.kernel.org/r/20230120162650.984577-6-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Reported-by: SeongJae Park Signed-off-by: Andrew Morton --- lib/maple_tree.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 5804c5997598..7c786bd5e575 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5609,6 +5609,9 @@ static inline void mte_destroy_walk(struct maple_enode *enode, static void mas_wr_store_setup(struct ma_wr_state *wr_mas) { + if (unlikely(mas_is_paused(wr_mas->mas))) + mas_reset(wr_mas->mas); + if (!mas_is_start(wr_mas->mas)) { if (mas_is_none(wr_mas->mas)) { mas_reset(wr_mas->mas); -- cgit v1.2.3 From 17dc622c7b0f94e49bed030726df4db12ecaa6b5 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:07 -0500 Subject: maple_tree: fix mas_prev() and mas_find() state handling When mas_prev() does not find anything, set the state to MAS_NONE. Handle the MAS_NONE in mas_find() like a MAS_START. Link: https://lkml.kernel.org/r/20230120162650.984577-7-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Reported-by: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 7c786bd5e575..5e9703189259 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -4845,7 +4845,7 @@ static inline void *mas_prev_entry(struct ma_state *mas, unsigned long min) if (mas->index < min) { mas->index = mas->last = min; - mas_pause(mas); + mas->node = MAS_NONE; return NULL; } retry: @@ -5919,6 +5919,7 @@ void *mas_prev(struct ma_state *mas, unsigned long min) if (!mas->index) { /* Nothing comes before 0 */ mas->last = 0; + mas->node = MAS_NONE; return NULL; } @@ -6009,6 +6010,9 @@ void *mas_find(struct ma_state *mas, unsigned long max) mas->index = ++mas->last; } + if (unlikely(mas_is_none(mas))) + mas->node = MAS_START; + if (unlikely(mas_is_start(mas))) { /* First run or continue */ void *entry; -- cgit v1.2.3 From 15ef6a982f40a2b53b057dad24f00c3fb43e7e70 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:49 +0100 Subject: lib/stackdepot: put functions in logical order Patch series "lib/stackdepot: fixes and clean-ups", v2. A set of fixes, comments, and clean-ups I came up with while reading the stack depot code. This patch (of 18): Put stack depot functions' declarations and definitions in a more logical order: 1. Functions that save stack traces into stack depot. 2. Functions that fetch and print stack traces. 3. stack_depot_get_extra_bits that operates on stack depot handles and does not interact with the stack depot storage. No functional changes. Link: https://lkml.kernel.org/r/cover.1676063693.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/daca1319b665d826b94c596b992a8d8117846147.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 15 ++- lib/stackdepot.c | 314 ++++++++++++++++++++++----------------------- 2 files changed, 165 insertions(+), 164 deletions(-) (limited to 'lib') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 9ca7798d7a31..1296a6eeaec0 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -14,17 +14,13 @@ #include typedef u32 depot_stack_handle_t; + /* * Number of bits in the handle that stack depot doesn't use. Users may store * information in them. */ #define STACK_DEPOT_EXTRA_BITS 5 -depot_stack_handle_t __stack_depot_save(unsigned long *entries, - unsigned int nr_entries, - unsigned int extra_bits, - gfp_t gfp_flags, bool can_alloc); - /* * Every user of stack depot has to call stack_depot_init() during its own init * when it's decided that it will be calling stack_depot_save() later. This is @@ -59,17 +55,22 @@ static inline void stack_depot_want_early_init(void) { } static inline int stack_depot_early_init(void) { return 0; } #endif +depot_stack_handle_t __stack_depot_save(unsigned long *entries, + unsigned int nr_entries, + unsigned int extra_bits, + gfp_t gfp_flags, bool can_alloc); + depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); -unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); +void stack_depot_print(depot_stack_handle_t stack); int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); -void stack_depot_print(depot_stack_handle_t stack); +unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); #endif diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 79e894cf8406..4bfaf3bce619 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -79,84 +79,6 @@ static int next_slab_inited; static size_t depot_offset; static DEFINE_RAW_SPINLOCK(depot_lock); -unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) -{ - union handle_parts parts = { .handle = handle }; - - return parts.extra; -} -EXPORT_SYMBOL(stack_depot_get_extra_bits); - -static bool init_stack_slab(void **prealloc) -{ - if (!*prealloc) - return false; - /* - * This smp_load_acquire() pairs with smp_store_release() to - * |next_slab_inited| below and in depot_alloc_stack(). - */ - if (smp_load_acquire(&next_slab_inited)) - return true; - if (stack_slabs[depot_index] == NULL) { - stack_slabs[depot_index] = *prealloc; - *prealloc = NULL; - } else { - /* If this is the last depot slab, do not touch the next one. */ - if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { - stack_slabs[depot_index + 1] = *prealloc; - *prealloc = NULL; - } - /* - * This smp_store_release pairs with smp_load_acquire() from - * |next_slab_inited| above and in stack_depot_save(). - */ - smp_store_release(&next_slab_inited, 1); - } - return true; -} - -/* Allocation of a new stack in raw storage */ -static struct stack_record * -depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) -{ - struct stack_record *stack; - size_t required_size = struct_size(stack, entries, size); - - required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN); - - if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) { - if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) { - WARN_ONCE(1, "Stack depot reached limit capacity"); - return NULL; - } - depot_index++; - depot_offset = 0; - /* - * smp_store_release() here pairs with smp_load_acquire() from - * |next_slab_inited| in stack_depot_save() and - * init_stack_slab(). - */ - if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) - smp_store_release(&next_slab_inited, 0); - } - init_stack_slab(prealloc); - if (stack_slabs[depot_index] == NULL) - return NULL; - - stack = stack_slabs[depot_index] + depot_offset; - - stack->hash = hash; - stack->size = size; - stack->handle.slabindex = depot_index; - stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN; - stack->handle.valid = 1; - stack->handle.extra = 0; - memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); - depot_offset += required_size; - - return stack; -} - /* one hash table bucket entry per 16kB of memory */ #define STACK_HASH_SCALE 14 /* limited between 4k and 1M buckets */ @@ -270,6 +192,76 @@ int stack_depot_init(void) } EXPORT_SYMBOL_GPL(stack_depot_init); +static bool init_stack_slab(void **prealloc) +{ + if (!*prealloc) + return false; + /* + * This smp_load_acquire() pairs with smp_store_release() to + * |next_slab_inited| below and in depot_alloc_stack(). + */ + if (smp_load_acquire(&next_slab_inited)) + return true; + if (stack_slabs[depot_index] == NULL) { + stack_slabs[depot_index] = *prealloc; + *prealloc = NULL; + } else { + /* If this is the last depot slab, do not touch the next one. */ + if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { + stack_slabs[depot_index + 1] = *prealloc; + *prealloc = NULL; + } + /* + * This smp_store_release pairs with smp_load_acquire() from + * |next_slab_inited| above and in stack_depot_save(). + */ + smp_store_release(&next_slab_inited, 1); + } + return true; +} + +/* Allocation of a new stack in raw storage */ +static struct stack_record * +depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) +{ + struct stack_record *stack; + size_t required_size = struct_size(stack, entries, size); + + required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN); + + if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) { + if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) { + WARN_ONCE(1, "Stack depot reached limit capacity"); + return NULL; + } + depot_index++; + depot_offset = 0; + /* + * smp_store_release() here pairs with smp_load_acquire() from + * |next_slab_inited| in stack_depot_save() and + * init_stack_slab(). + */ + if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) + smp_store_release(&next_slab_inited, 0); + } + init_stack_slab(prealloc); + if (stack_slabs[depot_index] == NULL) + return NULL; + + stack = stack_slabs[depot_index] + depot_offset; + + stack->hash = hash; + stack->size = size; + stack->handle.slabindex = depot_index; + stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN; + stack->handle.valid = 1; + stack->handle.extra = 0; + memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); + depot_offset += required_size; + + return stack; +} + /* Calculate hash for a stack */ static inline u32 hash_stack(unsigned long *entries, unsigned int size) { @@ -309,85 +301,6 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, return NULL; } -/** - * stack_depot_snprint - print stack entries from a depot into a buffer - * - * @handle: Stack depot handle which was returned from - * stack_depot_save(). - * @buf: Pointer to the print buffer - * - * @size: Size of the print buffer - * - * @spaces: Number of leading spaces to print - * - * Return: Number of bytes printed. - */ -int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, - int spaces) -{ - unsigned long *entries; - unsigned int nr_entries; - - nr_entries = stack_depot_fetch(handle, &entries); - return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, - spaces) : 0; -} -EXPORT_SYMBOL_GPL(stack_depot_snprint); - -/** - * stack_depot_print - print stack entries from a depot - * - * @stack: Stack depot handle which was returned from - * stack_depot_save(). - * - */ -void stack_depot_print(depot_stack_handle_t stack) -{ - unsigned long *entries; - unsigned int nr_entries; - - nr_entries = stack_depot_fetch(stack, &entries); - if (nr_entries > 0) - stack_trace_print(entries, nr_entries, 0); -} -EXPORT_SYMBOL_GPL(stack_depot_print); - -/** - * stack_depot_fetch - Fetch stack entries from a depot - * - * @handle: Stack depot handle which was returned from - * stack_depot_save(). - * @entries: Pointer to store the entries address - * - * Return: The number of trace entries for this depot. - */ -unsigned int stack_depot_fetch(depot_stack_handle_t handle, - unsigned long **entries) -{ - union handle_parts parts = { .handle = handle }; - void *slab; - size_t offset = parts.offset << STACK_ALLOC_ALIGN; - struct stack_record *stack; - - *entries = NULL; - if (!handle) - return 0; - - if (parts.slabindex > depot_index) { - WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n", - parts.slabindex, depot_index, handle); - return 0; - } - slab = stack_slabs[parts.slabindex]; - if (!slab) - return 0; - stack = slab + offset; - - *entries = stack->entries; - return stack->size; -} -EXPORT_SYMBOL_GPL(stack_depot_fetch); - /** * __stack_depot_save - Save a stack trace from an array * @@ -533,3 +446,90 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, return __stack_depot_save(entries, nr_entries, 0, alloc_flags, true); } EXPORT_SYMBOL_GPL(stack_depot_save); + +/** + * stack_depot_fetch - Fetch stack entries from a depot + * + * @handle: Stack depot handle which was returned from + * stack_depot_save(). + * @entries: Pointer to store the entries address + * + * Return: The number of trace entries for this depot. + */ +unsigned int stack_depot_fetch(depot_stack_handle_t handle, + unsigned long **entries) +{ + union handle_parts parts = { .handle = handle }; + void *slab; + size_t offset = parts.offset << STACK_ALLOC_ALIGN; + struct stack_record *stack; + + *entries = NULL; + if (!handle) + return 0; + + if (parts.slabindex > depot_index) { + WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n", + parts.slabindex, depot_index, handle); + return 0; + } + slab = stack_slabs[parts.slabindex]; + if (!slab) + return 0; + stack = slab + offset; + + *entries = stack->entries; + return stack->size; +} +EXPORT_SYMBOL_GPL(stack_depot_fetch); + +/** + * stack_depot_print - print stack entries from a depot + * + * @stack: Stack depot handle which was returned from + * stack_depot_save(). + * + */ +void stack_depot_print(depot_stack_handle_t stack) +{ + unsigned long *entries; + unsigned int nr_entries; + + nr_entries = stack_depot_fetch(stack, &entries); + if (nr_entries > 0) + stack_trace_print(entries, nr_entries, 0); +} +EXPORT_SYMBOL_GPL(stack_depot_print); + +/** + * stack_depot_snprint - print stack entries from a depot into a buffer + * + * @handle: Stack depot handle which was returned from + * stack_depot_save(). + * @buf: Pointer to the print buffer + * + * @size: Size of the print buffer + * + * @spaces: Number of leading spaces to print + * + * Return: Number of bytes printed. + */ +int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, + int spaces) +{ + unsigned long *entries; + unsigned int nr_entries; + + nr_entries = stack_depot_fetch(handle, &entries); + return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, + spaces) : 0; +} +EXPORT_SYMBOL_GPL(stack_depot_snprint); + +unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) +{ + union handle_parts parts = { .handle = handle }; + + return parts.extra; +} +EXPORT_SYMBOL(stack_depot_get_extra_bits); -- cgit v1.2.3 From 4a6b5314d6bd9093dcc3c0c8e185af7df9a0fe34 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:50 +0100 Subject: lib/stackdepot: use pr_fmt to define message format Use pr_fmt to define the format for printing stack depot messages instead of duplicating the "Stack Depot" prefix in each message. Link: https://lkml.kernel.org/r/3d09db0171a0e92ff3eb0ee74de74558bc9b56c4.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 4bfaf3bce619..83787e46a3ab 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -19,6 +19,8 @@ * Based on code by Dmitry Chernenkov. */ +#define pr_fmt(fmt) "stackdepot: " fmt + #include #include #include @@ -98,7 +100,7 @@ static int __init is_stack_depot_disabled(char *str) ret = kstrtobool(str, &stack_depot_disable); if (!ret && stack_depot_disable) { - pr_info("Stack Depot is disabled\n"); + pr_info("disabled\n"); stack_table = NULL; } return 0; @@ -142,7 +144,7 @@ int __init stack_depot_early_init(void) 1UL << STACK_HASH_ORDER_MAX); if (!stack_table) { - pr_err("Stack Depot hash table allocation failed, disabling\n"); + pr_err("hash table allocation failed, disabling\n"); stack_depot_disable = true; return -ENOMEM; } @@ -177,11 +179,11 @@ int stack_depot_init(void) if (entries > 1UL << STACK_HASH_ORDER_MAX) entries = 1UL << STACK_HASH_ORDER_MAX; - pr_info("Stack Depot allocating hash table of %lu entries with kvcalloc\n", + pr_info("allocating hash table of %lu entries with kvcalloc\n", entries); stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); if (!stack_table) { - pr_err("Stack Depot hash table allocation failed, disabling\n"); + pr_err("hash table allocation failed, disabling\n"); stack_depot_disable = true; ret = -ENOMEM; } -- cgit v1.2.3 From 1c0310add78e7e47e3357c24369b61453a5a72eb Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:51 +0100 Subject: lib/stackdepot, mm: rename stack_depot_want_early_init Rename stack_depot_want_early_init to stack_depot_request_early_init. The old name is confusing, as it hints at returning some kind of intention of stack depot. The new name reflects that this function requests an action from stack depot instead. No functional changes. [akpm@linux-foundation.org: update mm/kmemleak.c] Link: https://lkml.kernel.org/r/359f31bf67429a06e630b4395816a967214ef753.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 14 +++++++------- lib/stackdepot.c | 10 +++++----- mm/kmemleak.c | 2 +- mm/page_owner.c | 2 +- mm/slub.c | 4 ++-- 5 files changed, 16 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 1296a6eeaec0..c4e3abc16b16 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -31,26 +31,26 @@ typedef u32 depot_stack_handle_t; * enabled as part of mm_init(), for subsystems where it's known at compile time * that stack depot will be used. * - * Another alternative is to call stack_depot_want_early_init(), when the + * Another alternative is to call stack_depot_request_early_init(), when the * decision to use stack depot is taken e.g. when evaluating kernel boot * parameters, which precedes the enablement point in mm_init(). * - * stack_depot_init() and stack_depot_want_early_init() can be called regardless - * of CONFIG_STACKDEPOT and are no-op when disabled. The actual save/fetch/print - * functions should only be called from code that makes sure CONFIG_STACKDEPOT - * is enabled. + * stack_depot_init() and stack_depot_request_early_init() can be called + * regardless of CONFIG_STACKDEPOT and are no-op when disabled. The actual + * save/fetch/print functions should only be called from code that makes sure + * CONFIG_STACKDEPOT is enabled. */ #ifdef CONFIG_STACKDEPOT int stack_depot_init(void); -void __init stack_depot_want_early_init(void); +void __init stack_depot_request_early_init(void); /* This is supposed to be called only from mm_init() */ int __init stack_depot_early_init(void); #else static inline int stack_depot_init(void) { return 0; } -static inline void stack_depot_want_early_init(void) { } +static inline void stack_depot_request_early_init(void) { } static inline int stack_depot_early_init(void) { return 0; } #endif diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 83787e46a3ab..136706efe339 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -71,7 +71,7 @@ struct stack_record { unsigned long entries[]; /* Variable-sized array of entries. */ }; -static bool __stack_depot_want_early_init __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); +static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); static bool __stack_depot_early_init_passed __initdata; static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; @@ -107,12 +107,12 @@ static int __init is_stack_depot_disabled(char *str) } early_param("stack_depot_disable", is_stack_depot_disabled); -void __init stack_depot_want_early_init(void) +void __init stack_depot_request_early_init(void) { - /* Too late to request early init now */ + /* Too late to request early init now. */ WARN_ON(__stack_depot_early_init_passed); - __stack_depot_want_early_init = true; + __stack_depot_early_init_requested = true; } int __init stack_depot_early_init(void) @@ -128,7 +128,7 @@ int __init stack_depot_early_init(void) if (kasan_enabled() && !stack_hash_order) stack_hash_order = STACK_HASH_ORDER_MAX; - if (!__stack_depot_want_early_init || stack_depot_disable) + if (!__stack_depot_early_init_requested || stack_depot_disable) return 0; if (stack_hash_order) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index d9b242cfdb1c..a2d34226e3c8 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -2071,7 +2071,7 @@ static int __init kmemleak_boot_config(char *str) kmemleak_disable(); else if (strcmp(str, "on") == 0) { kmemleak_skip_disable = 1; - stack_depot_want_early_init(); + stack_depot_request_early_init(); } else return -EINVAL; diff --git a/mm/page_owner.c b/mm/page_owner.c index 80dc8f4050fa..220cdeddc295 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -48,7 +48,7 @@ static int __init early_page_owner_param(char *buf) int ret = kstrtobool(buf, &page_owner_enabled); if (page_owner_enabled) - stack_depot_want_early_init(); + stack_depot_request_early_init(); return ret; } diff --git a/mm/slub.c b/mm/slub.c index 67020074ecb4..f8dba33e4d15 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1592,7 +1592,7 @@ static int __init setup_slub_debug(char *str) } else { slab_list_specified = true; if (flags & SLAB_STORE_USER) - stack_depot_want_early_init(); + stack_depot_request_early_init(); } } @@ -1611,7 +1611,7 @@ static int __init setup_slub_debug(char *str) out: slub_debug = global_flags; if (slub_debug & SLAB_STORE_USER) - stack_depot_want_early_init(); + stack_depot_request_early_init(); if (slub_debug != 0 || slub_debug_string) static_branch_enable(&slub_debug_enabled); else -- cgit v1.2.3 From 735df3c3a3493cbedfa86739eec6e2ee37fe95f8 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:52 +0100 Subject: lib/stackdepot: rename stack_depot_disable Rename stack_depot_disable to stack_depot_disabled to make its name look similar to the names of other stack depot flags. Also put stack_depot_disabled's definition together with the other flags. Also rename is_stack_depot_disabled to disable_stack_depot: this name looks more conventional for a function that processes a boot parameter. No functional changes. Link: https://lkml.kernel.org/r/d78a07d222e689926e5ead229e4a2e3d87dc9aa7.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 136706efe339..202e07c4f02d 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -71,6 +71,7 @@ struct stack_record { unsigned long entries[]; /* Variable-sized array of entries. */ }; +static bool stack_depot_disabled; static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); static bool __stack_depot_early_init_passed __initdata; @@ -91,21 +92,20 @@ static DEFINE_RAW_SPINLOCK(depot_lock); static unsigned int stack_hash_order; static unsigned int stack_hash_mask; -static bool stack_depot_disable; static struct stack_record **stack_table; -static int __init is_stack_depot_disabled(char *str) +static int __init disable_stack_depot(char *str) { int ret; - ret = kstrtobool(str, &stack_depot_disable); - if (!ret && stack_depot_disable) { + ret = kstrtobool(str, &stack_depot_disabled); + if (!ret && stack_depot_disabled) { pr_info("disabled\n"); stack_table = NULL; } return 0; } -early_param("stack_depot_disable", is_stack_depot_disabled); +early_param("stack_depot_disable", disable_stack_depot); void __init stack_depot_request_early_init(void) { @@ -128,7 +128,7 @@ int __init stack_depot_early_init(void) if (kasan_enabled() && !stack_hash_order) stack_hash_order = STACK_HASH_ORDER_MAX; - if (!__stack_depot_early_init_requested || stack_depot_disable) + if (!__stack_depot_early_init_requested || stack_depot_disabled) return 0; if (stack_hash_order) @@ -145,7 +145,7 @@ int __init stack_depot_early_init(void) if (!stack_table) { pr_err("hash table allocation failed, disabling\n"); - stack_depot_disable = true; + stack_depot_disabled = true; return -ENOMEM; } @@ -158,7 +158,7 @@ int stack_depot_init(void) int ret = 0; mutex_lock(&stack_depot_init_mutex); - if (!stack_depot_disable && !stack_table) { + if (!stack_depot_disabled && !stack_table) { unsigned long entries; int scale = STACK_HASH_SCALE; @@ -184,7 +184,7 @@ int stack_depot_init(void) stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); if (!stack_table) { pr_err("hash table allocation failed, disabling\n"); - stack_depot_disable = true; + stack_depot_disabled = true; ret = -ENOMEM; } stack_hash_mask = entries - 1; @@ -353,7 +353,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, */ nr_entries = filter_irq_stacks(entries, nr_entries); - if (unlikely(nr_entries == 0) || stack_depot_disable) + if (unlikely(nr_entries == 0) || stack_depot_disabled) goto fast_exit; hash = hash_stack(entries, nr_entries); -- cgit v1.2.3 From df225c877d898992740d26250727a16db65c370d Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:53 +0100 Subject: lib/stackdepot: annotate init and early init functions Add comments to stack_depot_early_init and stack_depot_init to explain certain parts of their implementation. Also add a pr_info message to stack_depot_early_init similar to the one in stack_depot_init. Also move the scale variable in stack_depot_init to the scope where it is being used. Link: https://lkml.kernel.org/r/d17fbfbd4d73f38686c5e3d4824a6d62047213a1.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 202e07c4f02d..9fab711e4826 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -115,24 +115,34 @@ void __init stack_depot_request_early_init(void) __stack_depot_early_init_requested = true; } +/* Allocates a hash table via memblock. Can only be used during early boot. */ int __init stack_depot_early_init(void) { unsigned long entries = 0; - /* This is supposed to be called only once, from mm_init() */ + /* This function must be called only once, from mm_init(). */ if (WARN_ON(__stack_depot_early_init_passed)) return 0; - __stack_depot_early_init_passed = true; + /* + * If KASAN is enabled, use the maximum order: KASAN is frequently used + * in fuzzing scenarios, which leads to a large number of different + * stack traces being stored in stack depot. + */ if (kasan_enabled() && !stack_hash_order) stack_hash_order = STACK_HASH_ORDER_MAX; if (!__stack_depot_early_init_requested || stack_depot_disabled) return 0; + /* + * If stack_hash_order is not set, leave entries as 0 to rely on the + * automatic calculations performed by alloc_large_system_hash. + */ if (stack_hash_order) - entries = 1UL << stack_hash_order; + entries = 1UL << stack_hash_order; + pr_info("allocating hash table via alloc_large_system_hash\n"); stack_table = alloc_large_system_hash("stackdepot", sizeof(struct stack_record *), entries, @@ -142,7 +152,6 @@ int __init stack_depot_early_init(void) &stack_hash_mask, 1UL << STACK_HASH_ORDER_MIN, 1UL << STACK_HASH_ORDER_MAX); - if (!stack_table) { pr_err("hash table allocation failed, disabling\n"); stack_depot_disabled = true; @@ -152,6 +161,7 @@ int __init stack_depot_early_init(void) return 0; } +/* Allocates a hash table via kvcalloc. Can be used after boot. */ int stack_depot_init(void) { static DEFINE_MUTEX(stack_depot_init_mutex); @@ -160,11 +170,16 @@ int stack_depot_init(void) mutex_lock(&stack_depot_init_mutex); if (!stack_depot_disabled && !stack_table) { unsigned long entries; - int scale = STACK_HASH_SCALE; + /* + * Similarly to stack_depot_early_init, use stack_hash_order + * if assigned, and rely on automatic scaling otherwise. + */ if (stack_hash_order) { entries = 1UL << stack_hash_order; } else { + int scale = STACK_HASH_SCALE; + entries = nr_free_buffer_pages(); entries = roundup_pow_of_two(entries); @@ -179,7 +194,7 @@ int stack_depot_init(void) if (entries > 1UL << STACK_HASH_ORDER_MAX) entries = 1UL << STACK_HASH_ORDER_MAX; - pr_info("allocating hash table of %lu entries with kvcalloc\n", + pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); if (!stack_table) { -- cgit v1.2.3 From c60324fbf05d9b1dd3231f5373d4bf31cc23db07 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:54 +0100 Subject: lib/stackdepot: lower the indentation in stack_depot_init stack_depot_init does most things inside an if check. Move them out and use a goto statement instead. No functional changes. Link: https://lkml.kernel.org/r/8e382f1f0c352e4b2ad47326fec7782af961fe8e.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 70 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 9fab711e4826..3c713f70b0a3 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -165,46 +165,50 @@ int __init stack_depot_early_init(void) int stack_depot_init(void) { static DEFINE_MUTEX(stack_depot_init_mutex); + unsigned long entries; int ret = 0; mutex_lock(&stack_depot_init_mutex); - if (!stack_depot_disabled && !stack_table) { - unsigned long entries; - /* - * Similarly to stack_depot_early_init, use stack_hash_order - * if assigned, and rely on automatic scaling otherwise. - */ - if (stack_hash_order) { - entries = 1UL << stack_hash_order; - } else { - int scale = STACK_HASH_SCALE; - - entries = nr_free_buffer_pages(); - entries = roundup_pow_of_two(entries); - - if (scale > PAGE_SHIFT) - entries >>= (scale - PAGE_SHIFT); - else - entries <<= (PAGE_SHIFT - scale); - } + if (stack_depot_disabled || stack_table) + goto out_unlock; - if (entries < 1UL << STACK_HASH_ORDER_MIN) - entries = 1UL << STACK_HASH_ORDER_MIN; - if (entries > 1UL << STACK_HASH_ORDER_MAX) - entries = 1UL << STACK_HASH_ORDER_MAX; - - pr_info("allocating hash table of %lu entries via kvcalloc\n", - entries); - stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); - if (!stack_table) { - pr_err("hash table allocation failed, disabling\n"); - stack_depot_disabled = true; - ret = -ENOMEM; - } - stack_hash_mask = entries - 1; + /* + * Similarly to stack_depot_early_init, use stack_hash_order + * if assigned, and rely on automatic scaling otherwise. + */ + if (stack_hash_order) { + entries = 1UL << stack_hash_order; + } else { + int scale = STACK_HASH_SCALE; + + entries = nr_free_buffer_pages(); + entries = roundup_pow_of_two(entries); + + if (scale > PAGE_SHIFT) + entries >>= (scale - PAGE_SHIFT); + else + entries <<= (PAGE_SHIFT - scale); } + + if (entries < 1UL << STACK_HASH_ORDER_MIN) + entries = 1UL << STACK_HASH_ORDER_MIN; + if (entries > 1UL << STACK_HASH_ORDER_MAX) + entries = 1UL << STACK_HASH_ORDER_MAX; + + pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); + stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); + if (!stack_table) { + pr_err("hash table allocation failed, disabling\n"); + stack_depot_disabled = true; + ret = -ENOMEM; + goto out_unlock; + } + stack_hash_mask = entries - 1; + +out_unlock: mutex_unlock(&stack_depot_init_mutex); + return ret; } EXPORT_SYMBOL_GPL(stack_depot_init); -- cgit v1.2.3 From 0d249ac0e07680960929a2d4f7b32be505c8c7a1 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:55 +0100 Subject: lib/stackdepot: reorder and annotate global variables Group stack depot global variables by their purpose: 1. Hash table-related variables, 2. Slab-related variables, and add comments. Also clean up comments for hash table-related constants. Link: https://lkml.kernel.org/r/5606a6c70659065a25bee59cd10e57fc60bb4110.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 3c713f70b0a3..de1afe3fb24d 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -75,24 +75,31 @@ static bool stack_depot_disabled; static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); static bool __stack_depot_early_init_passed __initdata; -static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; - -static int depot_index; -static int next_slab_inited; -static size_t depot_offset; -static DEFINE_RAW_SPINLOCK(depot_lock); - -/* one hash table bucket entry per 16kB of memory */ +/* Use one hash table bucket per 16 KB of memory. */ #define STACK_HASH_SCALE 14 -/* limited between 4k and 1M buckets */ +/* Limit the number of buckets between 4K and 1M. */ #define STACK_HASH_ORDER_MIN 12 #define STACK_HASH_ORDER_MAX 20 +/* Initial seed for jhash2. */ #define STACK_HASH_SEED 0x9747b28c +/* Hash table of pointers to stored stack traces. */ +static struct stack_record **stack_table; +/* Fixed order of the number of table buckets. Used when KASAN is enabled. */ static unsigned int stack_hash_order; +/* Hash mask for indexing the table. */ static unsigned int stack_hash_mask; -static struct stack_record **stack_table; +/* Array of memory regions that store stack traces. */ +static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; +/* Currently used slab in stack_slabs. */ +static int depot_index; +/* Offset to the unused space in the currently used slab. */ +static size_t depot_offset; +/* Lock that protects the variables above. */ +static DEFINE_RAW_SPINLOCK(depot_lock); +/* Whether the next slab is initialized. */ +static int next_slab_inited; static int __init disable_stack_depot(char *str) { -- cgit v1.2.3 From 4c2e9a679468a5bef2100504914481c6ddf0d9bd Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:56 +0100 Subject: lib/stackdepot: rename hash table constants and variables Give more meaningful names to hash table-related constants and variables: 1. Rename STACK_HASH_SCALE to STACK_HASH_TABLE_SCALE to point out that it is related to scaling the hash table. 2. Rename STACK_HASH_ORDER_MIN/MAX to STACK_BUCKET_NUMBER_ORDER_MIN/MAX to point out that it is related to the number of hash table buckets. 3. Rename stack_hash_order to stack_bucket_number_order for the same reason as #2. No functional changes. Link: https://lkml.kernel.org/r/f166dd6f3cb2378aea78600714393dd568c33ee9.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index de1afe3fb24d..d1ab53197353 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -76,17 +76,17 @@ static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_ST static bool __stack_depot_early_init_passed __initdata; /* Use one hash table bucket per 16 KB of memory. */ -#define STACK_HASH_SCALE 14 +#define STACK_HASH_TABLE_SCALE 14 /* Limit the number of buckets between 4K and 1M. */ -#define STACK_HASH_ORDER_MIN 12 -#define STACK_HASH_ORDER_MAX 20 +#define STACK_BUCKET_NUMBER_ORDER_MIN 12 +#define STACK_BUCKET_NUMBER_ORDER_MAX 20 /* Initial seed for jhash2. */ #define STACK_HASH_SEED 0x9747b28c /* Hash table of pointers to stored stack traces. */ static struct stack_record **stack_table; /* Fixed order of the number of table buckets. Used when KASAN is enabled. */ -static unsigned int stack_hash_order; +static unsigned int stack_bucket_number_order; /* Hash mask for indexing the table. */ static unsigned int stack_hash_mask; @@ -137,28 +137,28 @@ int __init stack_depot_early_init(void) * in fuzzing scenarios, which leads to a large number of different * stack traces being stored in stack depot. */ - if (kasan_enabled() && !stack_hash_order) - stack_hash_order = STACK_HASH_ORDER_MAX; + if (kasan_enabled() && !stack_bucket_number_order) + stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX; if (!__stack_depot_early_init_requested || stack_depot_disabled) return 0; /* - * If stack_hash_order is not set, leave entries as 0 to rely on the - * automatic calculations performed by alloc_large_system_hash. + * If stack_bucket_number_order is not set, leave entries as 0 to rely + * on the automatic calculations performed by alloc_large_system_hash. */ - if (stack_hash_order) - entries = 1UL << stack_hash_order; + if (stack_bucket_number_order) + entries = 1UL << stack_bucket_number_order; pr_info("allocating hash table via alloc_large_system_hash\n"); stack_table = alloc_large_system_hash("stackdepot", sizeof(struct stack_record *), entries, - STACK_HASH_SCALE, + STACK_HASH_TABLE_SCALE, HASH_EARLY | HASH_ZERO, NULL, &stack_hash_mask, - 1UL << STACK_HASH_ORDER_MIN, - 1UL << STACK_HASH_ORDER_MAX); + 1UL << STACK_BUCKET_NUMBER_ORDER_MIN, + 1UL << STACK_BUCKET_NUMBER_ORDER_MAX); if (!stack_table) { pr_err("hash table allocation failed, disabling\n"); stack_depot_disabled = true; @@ -181,13 +181,13 @@ int stack_depot_init(void) goto out_unlock; /* - * Similarly to stack_depot_early_init, use stack_hash_order + * Similarly to stack_depot_early_init, use stack_bucket_number_order * if assigned, and rely on automatic scaling otherwise. */ - if (stack_hash_order) { - entries = 1UL << stack_hash_order; + if (stack_bucket_number_order) { + entries = 1UL << stack_bucket_number_order; } else { - int scale = STACK_HASH_SCALE; + int scale = STACK_HASH_TABLE_SCALE; entries = nr_free_buffer_pages(); entries = roundup_pow_of_two(entries); @@ -198,10 +198,10 @@ int stack_depot_init(void) entries <<= (PAGE_SHIFT - scale); } - if (entries < 1UL << STACK_HASH_ORDER_MIN) - entries = 1UL << STACK_HASH_ORDER_MIN; - if (entries > 1UL << STACK_HASH_ORDER_MAX) - entries = 1UL << STACK_HASH_ORDER_MAX; + if (entries < 1UL << STACK_BUCKET_NUMBER_ORDER_MIN) + entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MIN; + if (entries > 1UL << STACK_BUCKET_NUMBER_ORDER_MAX) + entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX; pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); -- cgit v1.2.3 From 961c949b012f009c51ce209ded801e34d0a75306 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:57 +0100 Subject: lib/stackdepot: rename slab to pool Use "pool" instead of "slab" for naming memory regions stack depot uses to store stack traces. Using "slab" is confusing, as stack depot pools have nothing to do with the slab allocator. Also give better names to pool-related global variables: change "depot_" prefix to "pool_" to point out that these variables are related to stack depot pools. Also rename the slabindex (poolindex) field in handle_parts to pool_index to align its name with the pool_index global variable. No functional changes. Link: https://lkml.kernel.org/r/923c507edb350c3b6ef85860f36be489dfc0ad21.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Vlastimil Babka Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 106 +++++++++++++++++++++++++++---------------------------- 1 file changed, 53 insertions(+), 53 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index d1ab53197353..522e36cf449f 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -39,7 +39,7 @@ #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8) #define STACK_ALLOC_NULL_PROTECTION_BITS 1 -#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */ +#define STACK_ALLOC_ORDER 2 /* Pool size order for stack depot, 4 pages */ #define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER)) #define STACK_ALLOC_ALIGN 4 #define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \ @@ -47,16 +47,16 @@ #define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \ STACK_ALLOC_NULL_PROTECTION_BITS - \ STACK_ALLOC_OFFSET_BITS - STACK_DEPOT_EXTRA_BITS) -#define STACK_ALLOC_SLABS_CAP 8192 -#define STACK_ALLOC_MAX_SLABS \ - (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \ - (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP) +#define STACK_ALLOC_POOLS_CAP 8192 +#define STACK_ALLOC_MAX_POOLS \ + (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_POOLS_CAP) ? \ + (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_POOLS_CAP) /* The compact structure to store the reference to stacks. */ union handle_parts { depot_stack_handle_t handle; struct { - u32 slabindex : STACK_ALLOC_INDEX_BITS; + u32 pool_index : STACK_ALLOC_INDEX_BITS; u32 offset : STACK_ALLOC_OFFSET_BITS; u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS; u32 extra : STACK_DEPOT_EXTRA_BITS; @@ -91,15 +91,15 @@ static unsigned int stack_bucket_number_order; static unsigned int stack_hash_mask; /* Array of memory regions that store stack traces. */ -static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; -/* Currently used slab in stack_slabs. */ -static int depot_index; -/* Offset to the unused space in the currently used slab. */ -static size_t depot_offset; +static void *stack_pools[STACK_ALLOC_MAX_POOLS]; +/* Currently used pool in stack_pools. */ +static int pool_index; +/* Offset to the unused space in the currently used pool. */ +static size_t pool_offset; /* Lock that protects the variables above. */ -static DEFINE_RAW_SPINLOCK(depot_lock); -/* Whether the next slab is initialized. */ -static int next_slab_inited; +static DEFINE_RAW_SPINLOCK(pool_lock); +/* Whether the next pool is initialized. */ +static int next_pool_inited; static int __init disable_stack_depot(char *str) { @@ -220,30 +220,30 @@ out_unlock: } EXPORT_SYMBOL_GPL(stack_depot_init); -static bool init_stack_slab(void **prealloc) +static bool init_stack_pool(void **prealloc) { if (!*prealloc) return false; /* * This smp_load_acquire() pairs with smp_store_release() to - * |next_slab_inited| below and in depot_alloc_stack(). + * |next_pool_inited| below and in depot_alloc_stack(). */ - if (smp_load_acquire(&next_slab_inited)) + if (smp_load_acquire(&next_pool_inited)) return true; - if (stack_slabs[depot_index] == NULL) { - stack_slabs[depot_index] = *prealloc; + if (stack_pools[pool_index] == NULL) { + stack_pools[pool_index] = *prealloc; *prealloc = NULL; } else { - /* If this is the last depot slab, do not touch the next one. */ - if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { - stack_slabs[depot_index + 1] = *prealloc; + /* If this is the last depot pool, do not touch the next one. */ + if (pool_index + 1 < STACK_ALLOC_MAX_POOLS) { + stack_pools[pool_index + 1] = *prealloc; *prealloc = NULL; } /* * This smp_store_release pairs with smp_load_acquire() from - * |next_slab_inited| above and in stack_depot_save(). + * |next_pool_inited| above and in stack_depot_save(). */ - smp_store_release(&next_slab_inited, 1); + smp_store_release(&next_pool_inited, 1); } return true; } @@ -257,35 +257,35 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN); - if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) { - if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) { + if (unlikely(pool_offset + required_size > STACK_ALLOC_SIZE)) { + if (unlikely(pool_index + 1 >= STACK_ALLOC_MAX_POOLS)) { WARN_ONCE(1, "Stack depot reached limit capacity"); return NULL; } - depot_index++; - depot_offset = 0; + pool_index++; + pool_offset = 0; /* * smp_store_release() here pairs with smp_load_acquire() from - * |next_slab_inited| in stack_depot_save() and - * init_stack_slab(). + * |next_pool_inited| in stack_depot_save() and + * init_stack_pool(). */ - if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) - smp_store_release(&next_slab_inited, 0); + if (pool_index + 1 < STACK_ALLOC_MAX_POOLS) + smp_store_release(&next_pool_inited, 0); } - init_stack_slab(prealloc); - if (stack_slabs[depot_index] == NULL) + init_stack_pool(prealloc); + if (stack_pools[pool_index] == NULL) return NULL; - stack = stack_slabs[depot_index] + depot_offset; + stack = stack_pools[pool_index] + pool_offset; stack->hash = hash; stack->size = size; - stack->handle.slabindex = depot_index; - stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN; + stack->handle.pool_index = pool_index; + stack->handle.offset = pool_offset >> STACK_ALLOC_ALIGN; stack->handle.valid = 1; stack->handle.extra = 0; memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); - depot_offset += required_size; + pool_offset += required_size; return stack; } @@ -336,10 +336,10 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, * @nr_entries: Size of the storage array * @extra_bits: Flags to store in unused bits of depot_stack_handle_t * @alloc_flags: Allocation gfp flags - * @can_alloc: Allocate stack slabs (increased chance of failure if false) + * @can_alloc: Allocate stack pools (increased chance of failure if false) * * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is - * %true, is allowed to replenish the stack slab pool in case no space is left + * %true, is allowed to replenish the stack pool in case no space is left * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids * any allocations and will fail if no space is left to store the stack trace. * @@ -396,14 +396,14 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, goto exit; /* - * Check if the current or the next stack slab need to be initialized. + * Check if the current or the next stack pool need to be initialized. * If so, allocate the memory - we won't be able to do that under the * lock. * * The smp_load_acquire() here pairs with smp_store_release() to - * |next_slab_inited| in depot_alloc_stack() and init_stack_slab(). + * |next_pool_inited| in depot_alloc_stack() and init_stack_pool(). */ - if (unlikely(can_alloc && !smp_load_acquire(&next_slab_inited))) { + if (unlikely(can_alloc && !smp_load_acquire(&next_pool_inited))) { /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic @@ -417,7 +417,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, prealloc = page_address(page); } - raw_spin_lock_irqsave(&depot_lock, flags); + raw_spin_lock_irqsave(&pool_lock, flags); found = find_stack(*bucket, entries, nr_entries, hash); if (!found) { @@ -437,10 +437,10 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * We didn't need to store this stack trace, but let's keep * the preallocated memory for the future. */ - WARN_ON(!init_stack_slab(&prealloc)); + WARN_ON(!init_stack_pool(&prealloc)); } - raw_spin_unlock_irqrestore(&depot_lock, flags); + raw_spin_unlock_irqrestore(&pool_lock, flags); exit: if (prealloc) { /* Nobody used this memory, ok to free it. */ @@ -488,7 +488,7 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries) { union handle_parts parts = { .handle = handle }; - void *slab; + void *pool; size_t offset = parts.offset << STACK_ALLOC_ALIGN; struct stack_record *stack; @@ -496,15 +496,15 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, if (!handle) return 0; - if (parts.slabindex > depot_index) { - WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n", - parts.slabindex, depot_index, handle); + if (parts.pool_index > pool_index) { + WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", + parts.pool_index, pool_index, handle); return 0; } - slab = stack_slabs[parts.slabindex]; - if (!slab) + pool = stack_pools[parts.pool_index]; + if (!pool) return 0; - stack = slab + offset; + stack = pool + offset; *entries = stack->entries; return stack->size; -- cgit v1.2.3 From 424cafee4a9c66435d8b86e7edbc794c116b52a5 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:58 +0100 Subject: lib/stackdepot: rename handle and pool constants Change the "STACK_ALLOC_" prefix to "DEPOT_" for the constants that define the number of bits in stack depot handles and the maximum number of pools. The old prefix is unclear and makes wonder about how these constants are related to stack allocations. The new prefix is also shorter. Also simplify the comment for DEPOT_POOL_ORDER. No functional changes. Link: https://lkml.kernel.org/r/84fcceb0acc261a356a0ad4bdfab9ff04bea2445.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 56 +++++++++++++++++++++++++++----------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 522e36cf449f..97bba462ee13 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -36,30 +36,28 @@ #include #include -#define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8) - -#define STACK_ALLOC_NULL_PROTECTION_BITS 1 -#define STACK_ALLOC_ORDER 2 /* Pool size order for stack depot, 4 pages */ -#define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER)) -#define STACK_ALLOC_ALIGN 4 -#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \ - STACK_ALLOC_ALIGN) -#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \ - STACK_ALLOC_NULL_PROTECTION_BITS - \ - STACK_ALLOC_OFFSET_BITS - STACK_DEPOT_EXTRA_BITS) -#define STACK_ALLOC_POOLS_CAP 8192 -#define STACK_ALLOC_MAX_POOLS \ - (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_POOLS_CAP) ? \ - (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_POOLS_CAP) +#define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8) + +#define DEPOT_VALID_BITS 1 +#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */ +#define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER)) +#define DEPOT_STACK_ALIGN 4 +#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) +#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_VALID_BITS - \ + DEPOT_OFFSET_BITS - STACK_DEPOT_EXTRA_BITS) +#define DEPOT_POOLS_CAP 8192 +#define DEPOT_MAX_POOLS \ + (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \ + (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP) /* The compact structure to store the reference to stacks. */ union handle_parts { depot_stack_handle_t handle; struct { - u32 pool_index : STACK_ALLOC_INDEX_BITS; - u32 offset : STACK_ALLOC_OFFSET_BITS; - u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS; - u32 extra : STACK_DEPOT_EXTRA_BITS; + u32 pool_index : DEPOT_POOL_INDEX_BITS; + u32 offset : DEPOT_OFFSET_BITS; + u32 valid : DEPOT_VALID_BITS; + u32 extra : STACK_DEPOT_EXTRA_BITS; }; }; @@ -91,7 +89,7 @@ static unsigned int stack_bucket_number_order; static unsigned int stack_hash_mask; /* Array of memory regions that store stack traces. */ -static void *stack_pools[STACK_ALLOC_MAX_POOLS]; +static void *stack_pools[DEPOT_MAX_POOLS]; /* Currently used pool in stack_pools. */ static int pool_index; /* Offset to the unused space in the currently used pool. */ @@ -235,7 +233,7 @@ static bool init_stack_pool(void **prealloc) *prealloc = NULL; } else { /* If this is the last depot pool, do not touch the next one. */ - if (pool_index + 1 < STACK_ALLOC_MAX_POOLS) { + if (pool_index + 1 < DEPOT_MAX_POOLS) { stack_pools[pool_index + 1] = *prealloc; *prealloc = NULL; } @@ -255,10 +253,10 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) struct stack_record *stack; size_t required_size = struct_size(stack, entries, size); - required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN); + required_size = ALIGN(required_size, 1 << DEPOT_STACK_ALIGN); - if (unlikely(pool_offset + required_size > STACK_ALLOC_SIZE)) { - if (unlikely(pool_index + 1 >= STACK_ALLOC_MAX_POOLS)) { + if (unlikely(pool_offset + required_size > DEPOT_POOL_SIZE)) { + if (unlikely(pool_index + 1 >= DEPOT_MAX_POOLS)) { WARN_ONCE(1, "Stack depot reached limit capacity"); return NULL; } @@ -269,7 +267,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) * |next_pool_inited| in stack_depot_save() and * init_stack_pool(). */ - if (pool_index + 1 < STACK_ALLOC_MAX_POOLS) + if (pool_index + 1 < DEPOT_MAX_POOLS) smp_store_release(&next_pool_inited, 0); } init_stack_pool(prealloc); @@ -281,7 +279,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) stack->hash = hash; stack->size = size; stack->handle.pool_index = pool_index; - stack->handle.offset = pool_offset >> STACK_ALLOC_ALIGN; + stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; stack->handle.valid = 1; stack->handle.extra = 0; memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); @@ -412,7 +410,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, alloc_flags &= ~GFP_ZONEMASK; alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); alloc_flags |= __GFP_NOWARN; - page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER); + page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER); if (page) prealloc = page_address(page); } @@ -444,7 +442,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, exit: if (prealloc) { /* Nobody used this memory, ok to free it. */ - free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER); + free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER); } if (found) retval.handle = found->handle.handle; @@ -489,7 +487,7 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, { union handle_parts parts = { .handle = handle }; void *pool; - size_t offset = parts.offset << STACK_ALLOC_ALIGN; + size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; *entries = NULL; -- cgit v1.2.3 From cb788e84a4cfe47941cded45b5ca81a917fbb1a6 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:59 +0100 Subject: lib/stackdepot: rename init_stack_pool Rename init_stack_pool to depot_init_pool to align the name with depot_alloc_stack. No functional changes. Link: https://lkml.kernel.org/r/23106a3e291d8df0aba33c0e2fe86dc596286479.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 97bba462ee13..7f5f08bb6c3a 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -218,7 +218,7 @@ out_unlock: } EXPORT_SYMBOL_GPL(stack_depot_init); -static bool init_stack_pool(void **prealloc) +static bool depot_init_pool(void **prealloc) { if (!*prealloc) return false; @@ -265,12 +265,12 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) /* * smp_store_release() here pairs with smp_load_acquire() from * |next_pool_inited| in stack_depot_save() and - * init_stack_pool(). + * depot_init_pool(). */ if (pool_index + 1 < DEPOT_MAX_POOLS) smp_store_release(&next_pool_inited, 0); } - init_stack_pool(prealloc); + depot_init_pool(prealloc); if (stack_pools[pool_index] == NULL) return NULL; @@ -399,7 +399,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * lock. * * The smp_load_acquire() here pairs with smp_store_release() to - * |next_pool_inited| in depot_alloc_stack() and init_stack_pool(). + * |next_pool_inited| in depot_alloc_stack() and depot_init_pool(). */ if (unlikely(can_alloc && !smp_load_acquire(&next_pool_inited))) { /* @@ -435,7 +435,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * We didn't need to store this stack trace, but let's keep * the preallocated memory for the future. */ - WARN_ON(!init_stack_pool(&prealloc)); + WARN_ON(!depot_init_pool(&prealloc)); } raw_spin_unlock_irqrestore(&pool_lock, flags); -- cgit v1.2.3 From 514d5c557b8b590a80f0569af5ae5f4d455ecef2 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:16:00 +0100 Subject: lib/stacktrace: drop impossible WARN_ON for depot_init_pool depot_init_pool has two call sites: 1. In depot_alloc_stack with a potentially NULL prealloc. 2. In __stack_depot_save with a non-NULL prealloc. At the same time depot_init_pool can only return false when prealloc is NULL. As the second call site makes sure that prealloc is not NULL, the WARN_ON there can never trigger. Thus, drop the WARN_ON and also move the prealloc check from depot_init_pool to its first call site. Also change the return type of depot_init_pool to void as it now always returns true. Link: https://lkml.kernel.org/r/ce149f9bdcbc80a92549b54da67eafb27f846b7b.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 7f5f08bb6c3a..d4d988276b91 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -218,16 +218,14 @@ out_unlock: } EXPORT_SYMBOL_GPL(stack_depot_init); -static bool depot_init_pool(void **prealloc) +static void depot_init_pool(void **prealloc) { - if (!*prealloc) - return false; /* * This smp_load_acquire() pairs with smp_store_release() to * |next_pool_inited| below and in depot_alloc_stack(). */ if (smp_load_acquire(&next_pool_inited)) - return true; + return; if (stack_pools[pool_index] == NULL) { stack_pools[pool_index] = *prealloc; *prealloc = NULL; @@ -243,7 +241,6 @@ static bool depot_init_pool(void **prealloc) */ smp_store_release(&next_pool_inited, 1); } - return true; } /* Allocation of a new stack in raw storage */ @@ -270,7 +267,8 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) if (pool_index + 1 < DEPOT_MAX_POOLS) smp_store_release(&next_pool_inited, 0); } - depot_init_pool(prealloc); + if (*prealloc) + depot_init_pool(prealloc); if (stack_pools[pool_index] == NULL) return NULL; @@ -435,7 +433,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * We didn't need to store this stack trace, but let's keep * the preallocated memory for the future. */ - WARN_ON(!depot_init_pool(&prealloc)); + depot_init_pool(&prealloc); } raw_spin_unlock_irqrestore(&pool_lock, flags); -- cgit v1.2.3 From cd0fc64e76844758b78d0fd376ae3ca4fd802049 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:16:01 +0100 Subject: lib/stackdepot: annotate depot_init_pool and depot_alloc_stack Clean up the exisiting comments and add new ones to depot_init_pool and depot_alloc_stack. As a part of the clean-up, remove mentions of which variable is accessed by smp_store_release and smp_load_acquire: it is clear as is from the code. Link: https://lkml.kernel.org/r/f80b02951364e6b40deda965b4003de0cd1a532d.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index d4d988276b91..c4bc198c3d93 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -218,32 +218,39 @@ out_unlock: } EXPORT_SYMBOL_GPL(stack_depot_init); +/* Uses preallocated memory to initialize a new stack depot pool. */ static void depot_init_pool(void **prealloc) { /* - * This smp_load_acquire() pairs with smp_store_release() to - * |next_pool_inited| below and in depot_alloc_stack(). + * smp_load_acquire() here pairs with smp_store_release() below and + * in depot_alloc_stack(). */ if (smp_load_acquire(&next_pool_inited)) return; + + /* Check if the current pool is not yet allocated. */ if (stack_pools[pool_index] == NULL) { + /* Use the preallocated memory for the current pool. */ stack_pools[pool_index] = *prealloc; *prealloc = NULL; } else { - /* If this is the last depot pool, do not touch the next one. */ + /* + * Otherwise, use the preallocated memory for the next pool + * as long as we do not exceed the maximum number of pools. + */ if (pool_index + 1 < DEPOT_MAX_POOLS) { stack_pools[pool_index + 1] = *prealloc; *prealloc = NULL; } /* - * This smp_store_release pairs with smp_load_acquire() from - * |next_pool_inited| above and in stack_depot_save(). + * This smp_store_release pairs with smp_load_acquire() above + * and in stack_depot_save(). */ smp_store_release(&next_pool_inited, 1); } } -/* Allocation of a new stack in raw storage */ +/* Allocates a new stack in a stack depot pool. */ static struct stack_record * depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) { @@ -252,28 +259,35 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) required_size = ALIGN(required_size, 1 << DEPOT_STACK_ALIGN); + /* Check if there is not enough space in the current pool. */ if (unlikely(pool_offset + required_size > DEPOT_POOL_SIZE)) { + /* Bail out if we reached the pool limit. */ if (unlikely(pool_index + 1 >= DEPOT_MAX_POOLS)) { WARN_ONCE(1, "Stack depot reached limit capacity"); return NULL; } + + /* Move on to the next pool. */ pool_index++; pool_offset = 0; /* - * smp_store_release() here pairs with smp_load_acquire() from - * |next_pool_inited| in stack_depot_save() and - * depot_init_pool(). + * smp_store_release() here pairs with smp_load_acquire() in + * stack_depot_save() and depot_init_pool(). */ if (pool_index + 1 < DEPOT_MAX_POOLS) smp_store_release(&next_pool_inited, 0); } + + /* Assign the preallocated memory to a pool if required. */ if (*prealloc) depot_init_pool(prealloc); + + /* Check if we have a pool to save the stack trace. */ if (stack_pools[pool_index] == NULL) return NULL; + /* Save the stack trace. */ stack = stack_pools[pool_index] + pool_offset; - stack->hash = hash; stack->size = size; stack->handle.pool_index = pool_index; -- cgit v1.2.3 From d11a5621f3252120dfc7cef7600a90bd8e605caf Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:16:02 +0100 Subject: lib/stackdepot: rename next_pool_inited to next_pool_required Stack depot uses next_pool_inited to mark that either the next pool is initialized or the limit on the number of pools is reached. However, the flag name only reflects the former part of its purpose, which is confusing. Rename next_pool_inited to next_pool_required and invert its value. Also annotate usages of next_pool_required with comments. Link: https://lkml.kernel.org/r/484fd2695dff7a9bdc437a32f8a6ee228535aa02.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index c4bc198c3d93..4df162a84bfe 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -96,8 +96,14 @@ static int pool_index; static size_t pool_offset; /* Lock that protects the variables above. */ static DEFINE_RAW_SPINLOCK(pool_lock); -/* Whether the next pool is initialized. */ -static int next_pool_inited; +/* + * Stack depot tries to keep an extra pool allocated even before it runs out + * of space in the currently used pool. + * This flag marks that this next extra pool needs to be allocated and + * initialized. It has the value 0 when either the next pool is not yet + * initialized or the limit on the number of pools is reached. + */ +static int next_pool_required = 1; static int __init disable_stack_depot(char *str) { @@ -222,10 +228,12 @@ EXPORT_SYMBOL_GPL(stack_depot_init); static void depot_init_pool(void **prealloc) { /* + * If the next pool is already initialized or the maximum number of + * pools is reached, do not use the preallocated memory. * smp_load_acquire() here pairs with smp_store_release() below and * in depot_alloc_stack(). */ - if (smp_load_acquire(&next_pool_inited)) + if (!smp_load_acquire(&next_pool_required)) return; /* Check if the current pool is not yet allocated. */ @@ -243,10 +251,13 @@ static void depot_init_pool(void **prealloc) *prealloc = NULL; } /* + * At this point, either the next pool is initialized or the + * maximum number of pools is reached. In either case, take + * note that initializing another pool is not required. * This smp_store_release pairs with smp_load_acquire() above * and in stack_depot_save(). */ - smp_store_release(&next_pool_inited, 1); + smp_store_release(&next_pool_required, 0); } } @@ -271,11 +282,13 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) pool_index++; pool_offset = 0; /* + * If the maximum number of pools is not reached, take note + * that the next pool needs to initialized. * smp_store_release() here pairs with smp_load_acquire() in * stack_depot_save() and depot_init_pool(). */ if (pool_index + 1 < DEPOT_MAX_POOLS) - smp_store_release(&next_pool_inited, 0); + smp_store_release(&next_pool_required, 1); } /* Assign the preallocated memory to a pool if required. */ @@ -406,14 +419,13 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, goto exit; /* - * Check if the current or the next stack pool need to be initialized. - * If so, allocate the memory - we won't be able to do that under the - * lock. + * Check if another stack pool needs to be initialized. If so, allocate + * the memory now - we won't be able to do that under the lock. * * The smp_load_acquire() here pairs with smp_store_release() to * |next_pool_inited| in depot_alloc_stack() and depot_init_pool(). */ - if (unlikely(can_alloc && !smp_load_acquire(&next_pool_inited))) { + if (unlikely(can_alloc && smp_load_acquire(&next_pool_required))) { /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic -- cgit v1.2.3 From 36aa1e6779c3c6f8e0d4552544214f5cffe3c287 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:16:03 +0100 Subject: lib/stacktrace, kasan, kmsan: rework extra_bits interface The current implementation of the extra_bits interface is confusing: passing extra_bits to __stack_depot_save makes it seem that the extra bits are somehow stored in stack depot. In reality, they are only embedded into a stack depot handle and are not used within stack depot. Drop the extra_bits argument from __stack_depot_save and instead provide a new stack_depot_set_extra_bits function (similar to the exsiting stack_depot_get_extra_bits) that saves extra bits into a stack depot handle. Update the callers of __stack_depot_save to use the new interace. This change also fixes a minor issue in the old code: __stack_depot_save does not return NULL if saving stack trace fails and extra_bits is used. Link: https://lkml.kernel.org/r/317123b5c05e2f82854fc55d8b285e0869d3cb77.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 4 +++- lib/stackdepot.c | 42 +++++++++++++++++++++++++++++++++--------- mm/kasan/common.c | 2 +- mm/kmsan/core.c | 10 +++++++--- 4 files changed, 44 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index c4e3abc16b16..267f4b2634ee 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -57,7 +57,6 @@ static inline int stack_depot_early_init(void) { return 0; } depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, - unsigned int extra_bits, gfp_t gfp_flags, bool can_alloc); depot_stack_handle_t stack_depot_save(unsigned long *entries, @@ -71,6 +70,9 @@ void stack_depot_print(depot_stack_handle_t stack); int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); +depot_stack_handle_t __must_check stack_depot_set_extra_bits( + depot_stack_handle_t handle, unsigned int extra_bits); + unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); #endif diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 4df162a84bfe..8c6e4e9cb535 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -357,7 +357,6 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, * * @entries: Pointer to storage array * @nr_entries: Size of the storage array - * @extra_bits: Flags to store in unused bits of depot_stack_handle_t * @alloc_flags: Allocation gfp flags * @can_alloc: Allocate stack pools (increased chance of failure if false) * @@ -369,10 +368,6 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, * If the stack trace in @entries is from an interrupt, only the portion up to * interrupt entry is saved. * - * Additional opaque flags can be passed in @extra_bits, stored in the unused - * bits of the stack handle, and retrieved using stack_depot_get_extra_bits() - * without calling stack_depot_fetch(). - * * Context: Any context, but setting @can_alloc to %false is required if * alloc_pages() cannot be used from the current context. Currently * this is the case from contexts where neither %GFP_ATOMIC nor @@ -382,7 +377,6 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, */ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, - unsigned int extra_bits, gfp_t alloc_flags, bool can_alloc) { struct stack_record *found = NULL, **bucket; @@ -471,8 +465,6 @@ exit: if (found) retval.handle = found->handle.handle; fast_exit: - retval.extra = extra_bits; - return retval.handle; } EXPORT_SYMBOL_GPL(__stack_depot_save); @@ -493,7 +485,7 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t alloc_flags) { - return __stack_depot_save(entries, nr_entries, 0, alloc_flags, true); + return __stack_depot_save(entries, nr_entries, alloc_flags, true); } EXPORT_SYMBOL_GPL(stack_depot_save); @@ -576,6 +568,38 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, } EXPORT_SYMBOL_GPL(stack_depot_snprint); +/** + * stack_depot_set_extra_bits - Set extra bits in a stack depot handle + * + * @handle: Stack depot handle returned from stack_depot_save() + * @extra_bits: Value to set the extra bits + * + * Return: Stack depot handle with extra bits set + * + * Stack depot handles have a few unused bits, which can be used for storing + * user-specific information. These bits are transparent to the stack depot. + */ +depot_stack_handle_t __must_check stack_depot_set_extra_bits( + depot_stack_handle_t handle, unsigned int extra_bits) +{ + union handle_parts parts = { .handle = handle }; + + /* Don't set extra bits on empty handles. */ + if (!handle) + return 0; + + parts.extra = extra_bits; + return parts.handle; +} +EXPORT_SYMBOL(stack_depot_set_extra_bits); + +/** + * stack_depot_get_extra_bits - Retrieve extra bits from a stack depot handle + * + * @handle: Stack depot handle with extra bits saved + * + * Return: Extra bits retrieved from the stack depot handle + */ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) { union handle_parts parts = { .handle = handle }; diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 1e7336ae3786..b376a5d055e5 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -43,7 +43,7 @@ depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc) unsigned int nr_entries; nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0); - return __stack_depot_save(entries, nr_entries, 0, flags, can_alloc); + return __stack_depot_save(entries, nr_entries, flags, can_alloc); } void kasan_set_track(struct kasan_track *track, gfp_t flags) diff --git a/mm/kmsan/core.c b/mm/kmsan/core.c index 112dce135c7f..f710257d6867 100644 --- a/mm/kmsan/core.c +++ b/mm/kmsan/core.c @@ -69,13 +69,15 @@ depot_stack_handle_t kmsan_save_stack_with_flags(gfp_t flags, { unsigned long entries[KMSAN_STACK_DEPTH]; unsigned int nr_entries; + depot_stack_handle_t handle; nr_entries = stack_trace_save(entries, KMSAN_STACK_DEPTH, 0); /* Don't sleep (see might_sleep_if() in __alloc_pages_nodemask()). */ flags &= ~__GFP_DIRECT_RECLAIM; - return __stack_depot_save(entries, nr_entries, extra, flags, true); + handle = __stack_depot_save(entries, nr_entries, flags, true); + return stack_depot_set_extra_bits(handle, extra); } /* Copy the metadata following the memmove() behavior. */ @@ -215,6 +217,7 @@ depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id) u32 extra_bits; int depth; bool uaf; + depot_stack_handle_t handle; if (!id) return id; @@ -250,8 +253,9 @@ depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id) * positives when __stack_depot_save() passes it to instrumented code. */ kmsan_internal_unpoison_memory(entries, sizeof(entries), false); - return __stack_depot_save(entries, ARRAY_SIZE(entries), extra_bits, - GFP_ATOMIC, true); + handle = __stack_depot_save(entries, ARRAY_SIZE(entries), GFP_ATOMIC, + true); + return stack_depot_set_extra_bits(handle, extra_bits); } void kmsan_internal_set_shadow_origin(void *addr, size_t size, int b, -- cgit v1.2.3 From beb3c23c69a91a10f247e93ffef1fcd0209d93e4 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:16:04 +0100 Subject: lib/stackdepot: annotate racy pool_index accesses Accesses to pool_index are protected by pool_lock everywhere except in a sanity check in stack_depot_fetch. The read access there can race with the write access in depot_alloc_stack. Use WRITE/READ_ONCE() to annotate the racy accesses. As the sanity check is only used to print a warning in case of a violation of the stack depot interface usage, it does not make a lot of sense to use proper synchronization. [andreyknvl@google.com: s/pool_index/pool_index_cached/ in stack_depot_fetch()] Link: https://lkml.kernel.org/r/95cf53f0da2c112aa2cc54456cbcd6975c3ff343.1676129911.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/359ac9c13cd0869c56740fb2029f505e41593830.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 8c6e4e9cb535..b625090890e1 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -278,8 +278,12 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) return NULL; } - /* Move on to the next pool. */ - pool_index++; + /* + * Move on to the next pool. + * WRITE_ONCE pairs with potential concurrent read in + * stack_depot_fetch(). + */ + WRITE_ONCE(pool_index, pool_index + 1); pool_offset = 0; /* * If the maximum number of pools is not reached, take note @@ -502,6 +506,11 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries) { union handle_parts parts = { .handle = handle }; + /* + * READ_ONCE pairs with potential concurrent write in + * depot_alloc_stack. + */ + int pool_index_cached = READ_ONCE(pool_index); void *pool; size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; @@ -510,9 +519,9 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, if (!handle) return 0; - if (parts.pool_index > pool_index) { + if (parts.pool_index > pool_index_cached) { WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", - parts.pool_index, pool_index, handle); + parts.pool_index, pool_index_cached, handle); return 0; } pool = stack_pools[parts.pool_index]; -- cgit v1.2.3 From b232b9995a6dbaafe19d07d81acc039bc84bd569 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:16:05 +0100 Subject: lib/stackdepot: various comments clean-ups Clean up comments in include/linux/stackdepot.h and lib/stackdepot.c: 1. Rework the initialization comment in stackdepot.h. 2. Rework the header comment in stackdepot.c. 3. Various clean-ups for other comments. Also adjust whitespaces for find_stack and depot_alloc_stack call sites. No functional changes. Link: https://lkml.kernel.org/r/5836231b7954355e2311fc9b5870f697ea8e1f7d.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 36 +++++++------- lib/stackdepot.c | 120 ++++++++++++++++++++++----------------------- 2 files changed, 78 insertions(+), 78 deletions(-) (limited to 'lib') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 267f4b2634ee..afdf8ee7b597 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * A generic stack depot implementation + * Stack depot - a stack trace storage that avoids duplication. * * Author: Alexander Potapenko * Copyright (C) 2016 Google, Inc. * - * Based on code by Dmitry Chernenkov. + * Based on the code by Dmitry Chernenkov. */ #ifndef _LINUX_STACKDEPOT_H @@ -17,35 +17,37 @@ typedef u32 depot_stack_handle_t; /* * Number of bits in the handle that stack depot doesn't use. Users may store - * information in them. + * information in them via stack_depot_set/get_extra_bits. */ #define STACK_DEPOT_EXTRA_BITS 5 /* - * Every user of stack depot has to call stack_depot_init() during its own init - * when it's decided that it will be calling stack_depot_save() later. This is - * recommended for e.g. modules initialized later in the boot process, when - * slab_is_available() is true. + * Using stack depot requires its initialization, which can be done in 3 ways: * - * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot - * enabled as part of mm_init(), for subsystems where it's known at compile time - * that stack depot will be used. + * 1. Selecting CONFIG_STACKDEPOT_ALWAYS_INIT. This option is suitable in + * scenarios where it's known at compile time that stack depot will be used. + * Enabling this config makes the kernel initialize stack depot in mm_init(). * - * Another alternative is to call stack_depot_request_early_init(), when the - * decision to use stack depot is taken e.g. when evaluating kernel boot - * parameters, which precedes the enablement point in mm_init(). + * 2. Calling stack_depot_request_early_init() during early boot, before + * stack_depot_early_init() in mm_init() completes. For example, this can + * be done when evaluating kernel boot parameters. + * + * 3. Calling stack_depot_init(). Possible after boot is complete. This option + * is recommended for modules initialized later in the boot process, after + * mm_init() completes. * * stack_depot_init() and stack_depot_request_early_init() can be called - * regardless of CONFIG_STACKDEPOT and are no-op when disabled. The actual - * save/fetch/print functions should only be called from code that makes sure - * CONFIG_STACKDEPOT is enabled. + * regardless of whether CONFIG_STACKDEPOT is enabled and are no-op when this + * config is disabled. The save/fetch/print stack depot functions can only be + * called from the code that makes sure CONFIG_STACKDEPOT is enabled _and_ + * initializes stack depot via one of the ways listed above. */ #ifdef CONFIG_STACKDEPOT int stack_depot_init(void); void __init stack_depot_request_early_init(void); -/* This is supposed to be called only from mm_init() */ +/* Must be only called from mm_init(). */ int __init stack_depot_early_init(void); #else static inline int stack_depot_init(void) { return 0; } diff --git a/lib/stackdepot.c b/lib/stackdepot.c index b625090890e1..aaa5d2f1abc2 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -1,22 +1,26 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Generic stack depot for storing stack traces. + * Stack depot - a stack trace storage that avoids duplication. * - * Some debugging tools need to save stack traces of certain events which can - * be later presented to the user. For example, KASAN needs to safe alloc and - * free stacks for each object, but storing two stack traces per object - * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for - * that). + * Stack depot is intended to be used by subsystems that need to store and + * later retrieve many potentially duplicated stack traces without wasting + * memory. * - * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc - * and free stacks repeat a lot, we save about 100x space. - * Stacks are never removed from depot, so we store them contiguously one after - * another in a contiguous memory allocation. + * For example, KASAN needs to save allocation and free stack traces for each + * object. Storing two stack traces per object requires a lot of memory (e.g. + * SLUB_DEBUG needs 256 bytes per object for that). Since allocation and free + * stack traces often repeat, using stack depot allows to save about 100x space. + * + * Internally, stack depot maintains a hash table of unique stacktraces. The + * stack traces themselves are stored contiguously one after another in a set + * of separate page allocations. + * + * Stack traces are never removed from stack depot. * * Author: Alexander Potapenko * Copyright (C) 2016 Google, Inc. * - * Based on code by Dmitry Chernenkov. + * Based on the code by Dmitry Chernenkov. */ #define pr_fmt(fmt) "stackdepot: " fmt @@ -50,7 +54,7 @@ (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \ (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP) -/* The compact structure to store the reference to stacks. */ +/* Compact structure that stores a reference to a stack. */ union handle_parts { depot_stack_handle_t handle; struct { @@ -62,11 +66,11 @@ union handle_parts { }; struct stack_record { - struct stack_record *next; /* Link in the hashtable */ - u32 hash; /* Hash in the hastable */ - u32 size; /* Number of frames in the stack */ + struct stack_record *next; /* Link in the hash table */ + u32 hash; /* Hash in the hash table */ + u32 size; /* Number of stored frames */ union handle_parts handle; - unsigned long entries[]; /* Variable-sized array of entries. */ + unsigned long entries[]; /* Variable-sized array of frames */ }; static bool stack_depot_disabled; @@ -317,7 +321,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) return stack; } -/* Calculate hash for a stack */ +/* Calculates the hash for a stack. */ static inline u32 hash_stack(unsigned long *entries, unsigned int size) { return jhash2((u32 *)entries, @@ -325,9 +329,9 @@ static inline u32 hash_stack(unsigned long *entries, unsigned int size) STACK_HASH_SEED); } -/* Use our own, non-instrumented version of memcmp(). - * - * We actually don't care about the order, just the equality. +/* + * Non-instrumented version of memcmp(). + * Does not check the lexicographical order, only the equality. */ static inline int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, @@ -340,7 +344,7 @@ int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, return 0; } -/* Find a stack that is equal to the one stored in entries in the hash */ +/* Finds a stack in a bucket of the hash table. */ static inline struct stack_record *find_stack(struct stack_record *bucket, unsigned long *entries, int size, u32 hash) @@ -357,27 +361,27 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, } /** - * __stack_depot_save - Save a stack trace from an array + * __stack_depot_save - Save a stack trace to stack depot * - * @entries: Pointer to storage array - * @nr_entries: Size of the storage array - * @alloc_flags: Allocation gfp flags + * @entries: Pointer to the stack trace + * @nr_entries: Number of frames in the stack + * @alloc_flags: Allocation GFP flags * @can_alloc: Allocate stack pools (increased chance of failure if false) * * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is - * %true, is allowed to replenish the stack pool in case no space is left + * %true, stack depot can replenish the stack pools in case no space is left * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids - * any allocations and will fail if no space is left to store the stack trace. + * any allocations and fails if no space is left to store the stack trace. * - * If the stack trace in @entries is from an interrupt, only the portion up to - * interrupt entry is saved. + * If the provided stack trace comes from the interrupt context, only the part + * up to the interrupt entry is saved. * * Context: Any context, but setting @can_alloc to %false is required if * alloc_pages() cannot be used from the current context. Currently - * this is the case from contexts where neither %GFP_ATOMIC nor + * this is the case for contexts where neither %GFP_ATOMIC nor * %GFP_NOWAIT can be used (NMI, raw_spin_lock). * - * Return: The handle of the stack struct stored in depot, 0 on failure. + * Return: Handle of the stack struct stored in depot, 0 on failure */ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, @@ -392,11 +396,11 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, /* * If this stack trace is from an interrupt, including anything before - * interrupt entry usually leads to unbounded stackdepot growth. + * interrupt entry usually leads to unbounded stack depot growth. * - * Because use of filter_irq_stacks() is a requirement to ensure - * stackdepot can efficiently deduplicate interrupt stacks, always - * filter_irq_stacks() to simplify all callers' use of stackdepot. + * Since use of filter_irq_stacks() is a requirement to ensure stack + * depot can efficiently deduplicate interrupt stacks, always + * filter_irq_stacks() to simplify all callers' use of stack depot. */ nr_entries = filter_irq_stacks(entries, nr_entries); @@ -411,8 +415,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * The smp_load_acquire() here pairs with smp_store_release() to * |bucket| below. */ - found = find_stack(smp_load_acquire(bucket), entries, - nr_entries, hash); + found = find_stack(smp_load_acquire(bucket), entries, nr_entries, hash); if (found) goto exit; @@ -441,7 +444,8 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, found = find_stack(*bucket, entries, nr_entries, hash); if (!found) { - struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc); + struct stack_record *new = + depot_alloc_stack(entries, nr_entries, hash, &prealloc); if (new) { new->next = *bucket; @@ -454,8 +458,8 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, } } else if (prealloc) { /* - * We didn't need to store this stack trace, but let's keep - * the preallocated memory for the future. + * Stack depot already contains this stack trace, but let's + * keep the preallocated memory for the future. */ depot_init_pool(&prealloc); } @@ -463,7 +467,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, raw_spin_unlock_irqrestore(&pool_lock, flags); exit: if (prealloc) { - /* Nobody used this memory, ok to free it. */ + /* Stack depot didn't use this memory, free it. */ free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER); } if (found) @@ -474,16 +478,16 @@ fast_exit: EXPORT_SYMBOL_GPL(__stack_depot_save); /** - * stack_depot_save - Save a stack trace from an array + * stack_depot_save - Save a stack trace to stack depot * - * @entries: Pointer to storage array - * @nr_entries: Size of the storage array - * @alloc_flags: Allocation gfp flags + * @entries: Pointer to the stack trace + * @nr_entries: Number of frames in the stack + * @alloc_flags: Allocation GFP flags * * Context: Contexts where allocations via alloc_pages() are allowed. * See __stack_depot_save() for more details. * - * Return: The handle of the stack struct stored in depot, 0 on failure. + * Return: Handle of the stack trace stored in depot, 0 on failure */ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, @@ -494,13 +498,12 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, EXPORT_SYMBOL_GPL(stack_depot_save); /** - * stack_depot_fetch - Fetch stack entries from a depot + * stack_depot_fetch - Fetch a stack trace from stack depot * - * @handle: Stack depot handle which was returned from - * stack_depot_save(). - * @entries: Pointer to store the entries address + * @handle: Stack depot handle returned from stack_depot_save() + * @entries: Pointer to store the address of the stack trace * - * Return: The number of trace entries for this depot. + * Return: Number of frames for the fetched stack */ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries) @@ -535,11 +538,9 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, EXPORT_SYMBOL_GPL(stack_depot_fetch); /** - * stack_depot_print - print stack entries from a depot - * - * @stack: Stack depot handle which was returned from - * stack_depot_save(). + * stack_depot_print - Print a stack trace from stack depot * + * @stack: Stack depot handle returned from stack_depot_save() */ void stack_depot_print(depot_stack_handle_t stack) { @@ -553,17 +554,14 @@ void stack_depot_print(depot_stack_handle_t stack) EXPORT_SYMBOL_GPL(stack_depot_print); /** - * stack_depot_snprint - print stack entries from a depot into a buffer + * stack_depot_snprint - Print a stack trace from stack depot into a buffer * - * @handle: Stack depot handle which was returned from - * stack_depot_save(). + * @handle: Stack depot handle returned from stack_depot_save() * @buf: Pointer to the print buffer - * * @size: Size of the print buffer - * * @spaces: Number of leading spaces to print * - * Return: Number of bytes printed. + * Return: Number of bytes printed */ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces) -- cgit v1.2.3 From 0621d160f1003a8aedd3628133568ecffdd724f7 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:16:06 +0100 Subject: lib/stackdepot: move documentation comments to stackdepot.h Move all interface- and usage-related documentation comments to include/linux/stackdepot.h. It makes sense to have them in the header where they are available to the interface users. [akpm@linux-foundation.org: grammar fix, per Alexander] Link: https://lkml.kernel.org/r/fbfee41495b306dd8881f9b1c1b80999c885e82f.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 87 ++++++++++++++++++++++++++++++++++++++++++++++ lib/stackdepot.c | 87 ---------------------------------------------- 2 files changed, 87 insertions(+), 87 deletions(-) (limited to 'lib') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index afdf8ee7b597..e58306783d8e 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -2,6 +2,17 @@ /* * Stack depot - a stack trace storage that avoids duplication. * + * Stack depot is intended to be used by subsystems that need to store and + * later retrieve many potentially duplicated stack traces without wasting + * memory. + * + * For example, KASAN needs to save allocation and free stack traces for each + * object. Storing two stack traces per object requires a lot of memory (e.g. + * SLUB_DEBUG needs 256 bytes per object for that). Since allocation and free + * stack traces often repeat, using stack depot allows to save about 100x space. + * + * Stack traces are never removed from the stack depot. + * * Author: Alexander Potapenko * Copyright (C) 2016 Google, Inc. * @@ -57,24 +68,100 @@ static inline void stack_depot_request_early_init(void) { } static inline int stack_depot_early_init(void) { return 0; } #endif +/** + * __stack_depot_save - Save a stack trace to stack depot + * + * @entries: Pointer to the stack trace + * @nr_entries: Number of frames in the stack + * @alloc_flags: Allocation GFP flags + * @can_alloc: Allocate stack pools (increased chance of failure if false) + * + * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is + * %true, stack depot can replenish the stack pools in case no space is left + * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids + * any allocations and fails if no space is left to store the stack trace. + * + * If the provided stack trace comes from the interrupt context, only the part + * up to the interrupt entry is saved. + * + * Context: Any context, but setting @can_alloc to %false is required if + * alloc_pages() cannot be used from the current context. Currently + * this is the case for contexts where neither %GFP_ATOMIC nor + * %GFP_NOWAIT can be used (NMI, raw_spin_lock). + * + * Return: Handle of the stack struct stored in depot, 0 on failure + */ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags, bool can_alloc); +/** + * stack_depot_save - Save a stack trace to stack depot + * + * @entries: Pointer to the stack trace + * @nr_entries: Number of frames in the stack + * @alloc_flags: Allocation GFP flags + * + * Context: Contexts where allocations via alloc_pages() are allowed. + * See __stack_depot_save() for more details. + * + * Return: Handle of the stack trace stored in depot, 0 on failure + */ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); +/** + * stack_depot_fetch - Fetch a stack trace from stack depot + * + * @handle: Stack depot handle returned from stack_depot_save() + * @entries: Pointer to store the address of the stack trace + * + * Return: Number of frames for the fetched stack + */ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); +/** + * stack_depot_print - Print a stack trace from stack depot + * + * @stack: Stack depot handle returned from stack_depot_save() + */ void stack_depot_print(depot_stack_handle_t stack); +/** + * stack_depot_snprint - Print a stack trace from stack depot into a buffer + * + * @handle: Stack depot handle returned from stack_depot_save() + * @buf: Pointer to the print buffer + * @size: Size of the print buffer + * @spaces: Number of leading spaces to print + * + * Return: Number of bytes printed + */ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); +/** + * stack_depot_set_extra_bits - Set extra bits in a stack depot handle + * + * @handle: Stack depot handle returned from stack_depot_save() + * @extra_bits: Value to set the extra bits + * + * Return: Stack depot handle with extra bits set + * + * Stack depot handles have a few unused bits, which can be used for storing + * user-specific information. These bits are transparent to the stack depot. + */ depot_stack_handle_t __must_check stack_depot_set_extra_bits( depot_stack_handle_t handle, unsigned int extra_bits); +/** + * stack_depot_get_extra_bits - Retrieve extra bits from a stack depot handle + * + * @handle: Stack depot handle with extra bits saved + * + * Return: Extra bits retrieved from the stack depot handle + */ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); #endif diff --git a/lib/stackdepot.c b/lib/stackdepot.c index aaa5d2f1abc2..036da8e295d1 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -2,21 +2,10 @@ /* * Stack depot - a stack trace storage that avoids duplication. * - * Stack depot is intended to be used by subsystems that need to store and - * later retrieve many potentially duplicated stack traces without wasting - * memory. - * - * For example, KASAN needs to save allocation and free stack traces for each - * object. Storing two stack traces per object requires a lot of memory (e.g. - * SLUB_DEBUG needs 256 bytes per object for that). Since allocation and free - * stack traces often repeat, using stack depot allows to save about 100x space. - * * Internally, stack depot maintains a hash table of unique stacktraces. The * stack traces themselves are stored contiguously one after another in a set * of separate page allocations. * - * Stack traces are never removed from stack depot. - * * Author: Alexander Potapenko * Copyright (C) 2016 Google, Inc. * @@ -360,29 +349,6 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, return NULL; } -/** - * __stack_depot_save - Save a stack trace to stack depot - * - * @entries: Pointer to the stack trace - * @nr_entries: Number of frames in the stack - * @alloc_flags: Allocation GFP flags - * @can_alloc: Allocate stack pools (increased chance of failure if false) - * - * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is - * %true, stack depot can replenish the stack pools in case no space is left - * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids - * any allocations and fails if no space is left to store the stack trace. - * - * If the provided stack trace comes from the interrupt context, only the part - * up to the interrupt entry is saved. - * - * Context: Any context, but setting @can_alloc to %false is required if - * alloc_pages() cannot be used from the current context. Currently - * this is the case for contexts where neither %GFP_ATOMIC nor - * %GFP_NOWAIT can be used (NMI, raw_spin_lock). - * - * Return: Handle of the stack struct stored in depot, 0 on failure - */ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t alloc_flags, bool can_alloc) @@ -477,18 +443,6 @@ fast_exit: } EXPORT_SYMBOL_GPL(__stack_depot_save); -/** - * stack_depot_save - Save a stack trace to stack depot - * - * @entries: Pointer to the stack trace - * @nr_entries: Number of frames in the stack - * @alloc_flags: Allocation GFP flags - * - * Context: Contexts where allocations via alloc_pages() are allowed. - * See __stack_depot_save() for more details. - * - * Return: Handle of the stack trace stored in depot, 0 on failure - */ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t alloc_flags) @@ -497,14 +451,6 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, } EXPORT_SYMBOL_GPL(stack_depot_save); -/** - * stack_depot_fetch - Fetch a stack trace from stack depot - * - * @handle: Stack depot handle returned from stack_depot_save() - * @entries: Pointer to store the address of the stack trace - * - * Return: Number of frames for the fetched stack - */ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries) { @@ -537,11 +483,6 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, } EXPORT_SYMBOL_GPL(stack_depot_fetch); -/** - * stack_depot_print - Print a stack trace from stack depot - * - * @stack: Stack depot handle returned from stack_depot_save() - */ void stack_depot_print(depot_stack_handle_t stack) { unsigned long *entries; @@ -553,16 +494,6 @@ void stack_depot_print(depot_stack_handle_t stack) } EXPORT_SYMBOL_GPL(stack_depot_print); -/** - * stack_depot_snprint - Print a stack trace from stack depot into a buffer - * - * @handle: Stack depot handle returned from stack_depot_save() - * @buf: Pointer to the print buffer - * @size: Size of the print buffer - * @spaces: Number of leading spaces to print - * - * Return: Number of bytes printed - */ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces) { @@ -575,17 +506,6 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, } EXPORT_SYMBOL_GPL(stack_depot_snprint); -/** - * stack_depot_set_extra_bits - Set extra bits in a stack depot handle - * - * @handle: Stack depot handle returned from stack_depot_save() - * @extra_bits: Value to set the extra bits - * - * Return: Stack depot handle with extra bits set - * - * Stack depot handles have a few unused bits, which can be used for storing - * user-specific information. These bits are transparent to the stack depot. - */ depot_stack_handle_t __must_check stack_depot_set_extra_bits( depot_stack_handle_t handle, unsigned int extra_bits) { @@ -600,13 +520,6 @@ depot_stack_handle_t __must_check stack_depot_set_extra_bits( } EXPORT_SYMBOL(stack_depot_set_extra_bits); -/** - * stack_depot_get_extra_bits - Retrieve extra bits from a stack depot handle - * - * @handle: Stack depot handle with extra bits saved - * - * Return: Extra bits retrieved from the stack depot handle - */ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) { union handle_parts parts = { .handle = handle }; -- cgit v1.2.3 From 44081c77e8a4aac9c5a010ed0d9ccdcf684041e1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Feb 2023 11:30:24 +0100 Subject: maple_tree: reduce stack usage with gcc-9 and earlier gcc-10 changed the way inlining works to be less aggressive, but older versions run into an oversized stack frame warning whenever CONFIG_KASAN_STACK is enabled, as that forces variables from inlined callees to be non-overlapping: lib/maple_tree.c: In function 'mas_wr_bnode': lib/maple_tree.c:4320:1: error: the frame size of 1424 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] Change the annotations on mas_store_b_node() and mas_commit_b_node() to explicitly forbid inlining in this configuration, which is the same behavior that newer versions already have. Link: https://lkml.kernel.org/r/20230214103030.1051950-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: David Hildenbrand Reviewed-by: Liam R. Howlett Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Vincenzo Frascino Cc: Vernon Yang Signed-off-by: Andrew Morton --- lib/maple_tree.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 5e9703189259..646297cae5d1 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -146,6 +146,13 @@ struct maple_subtree_state { struct maple_big_node *bn; }; +#ifdef CONFIG_KASAN_STACK +/* Prevent mas_wr_bnode() from exceeding the stack frame limit */ +#define noinline_for_kasan noinline_for_stack +#else +#define noinline_for_kasan inline +#endif + /* Functions */ static inline struct maple_node *mt_alloc_one(gfp_t gfp) { @@ -2107,7 +2114,7 @@ static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, * * Return: The actual end of the data stored in @b_node */ -static inline void mas_store_b_node(struct ma_wr_state *wr_mas, +static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas, struct maple_big_node *b_node, unsigned char offset_end) { unsigned char slot; @@ -3579,7 +3586,7 @@ static inline bool mas_reuse_node(struct ma_wr_state *wr_mas, * @b_node: The maple big node * @end: The end of the data. */ -static inline int mas_commit_b_node(struct ma_wr_state *wr_mas, +static noinline_for_kasan int mas_commit_b_node(struct ma_wr_state *wr_mas, struct maple_big_node *b_node, unsigned char end) { struct maple_node *node; -- cgit v1.2.3