diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2021-06-12 15:45:45 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:09:10 -0400 |
commit | 0423fb7185e3c0178b3a09f24afc3777c2ef9522 (patch) | |
tree | 11f340111387ea1484801ac46268e971e5bc7947 /fs | |
parent | 877da05ffb13c1a998070707e0d15df0167f9364 (diff) | |
download | lwn-0423fb7185e3c0178b3a09f24afc3777c2ef9522.tar.gz lwn-0423fb7185e3c0178b3a09f24afc3777c2ef9522.zip |
bcachefs: Keep a sorted list of btree iterators
This will be used to make other operations on btree iterators within a
transaction more efficient, and enable some other improvements to how we
manage btree iterators.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/btree_iter.c | 235 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.h | 43 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/util.h | 14 |
4 files changed, 249 insertions, 47 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index d1a03fdba9ce..c14be8093116 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -18,10 +18,20 @@ #include <linux/prefetch.h> static void btree_iter_set_search_pos(struct btree_iter *, struct bpos); +static inline void btree_trans_sort_iters(struct btree_trans *); static struct btree_iter *btree_iter_child_alloc(struct btree_iter *, unsigned long); -static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *); +static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *, + struct btree_iter *); static void btree_iter_copy(struct btree_iter *, struct btree_iter *); +static inline int btree_iter_cmp(const struct btree_iter *l, + const struct btree_iter *r) +{ + return cmp_int(l->btree_id, r->btree_id) ?: + -cmp_int(btree_iter_is_cached(l), btree_iter_is_cached(r)) ?: + bkey_cmp(l->real_pos, r->real_pos); +} + static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p) { EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES); @@ -925,6 +935,7 @@ static inline struct bkey_s_c btree_iter_level_peek(struct btree_iter *iter, bch2_btree_node_iter_peek(&l->iter, l->b)); iter->real_pos = k.k ? k.k->p : l->b->key.k.p; + iter->trans->iters_sorted = false; return k; } @@ -935,6 +946,7 @@ static inline struct bkey_s_c btree_iter_level_prev(struct btree_iter *iter, bch2_btree_node_iter_prev(&l->iter, l->b)); iter->real_pos = k.k ? k.k->p : l->b->data->min_key; + iter->trans->iters_sorted = false; return k; } @@ -1264,9 +1276,8 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret, unsigned long trace_ip) { struct bch_fs *c = trans->c; - struct btree_iter *iter; - u8 sorted[BTREE_ITER_MAX]; - int i, nr_sorted = 0; + struct btree_iter *iter, *prev = NULL; + int i; if (trans->in_traverse_all) return -EINTR; @@ -1275,28 +1286,21 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret, retry_all: trans->restarted = false; - nr_sorted = 0; - - trans_for_each_iter(trans, iter) { - sorted[nr_sorted++] = iter->idx; + trans_for_each_iter(trans, iter) iter->should_be_locked = false; - } - -#define btree_iter_cmp_by_idx(_l, _r) \ - btree_iter_lock_cmp(&trans->iters[_l], &trans->iters[_r]) - bubble_sort(sorted, nr_sorted, btree_iter_cmp_by_idx); -#undef btree_iter_cmp_by_idx + btree_trans_sort_iters(trans); - for (i = nr_sorted - 2; i >= 0; --i) { - struct btree_iter *iter1 = trans->iters + sorted[i]; - struct btree_iter *iter2 = trans->iters + sorted[i + 1]; + trans_for_each_iter_inorder_reverse(trans, iter, i) { + if (prev) { + if (iter->btree_id == prev->btree_id && + iter->locks_want < prev->locks_want) + __bch2_btree_iter_upgrade(iter, prev->locks_want); + else if (!iter->locks_want && prev->locks_want) + __bch2_btree_iter_upgrade(iter, 1); + } - if (iter1->btree_id == iter2->btree_id && - iter1->locks_want < iter2->locks_want) - __bch2_btree_iter_upgrade(iter1, iter2->locks_want); - else if (!iter1->locks_want && iter2->locks_want) - __bch2_btree_iter_upgrade(iter1, 1); + prev = iter; } bch2_trans_unlock(trans); @@ -1321,20 +1325,29 @@ retry_all: BUG_ON(ret && ret != -EINTR); /* Now, redo traversals in correct order: */ - for (i = 0; i < nr_sorted; i++) { - unsigned idx = sorted[i]; + i = 0; + while (i < trans->nr_sorted) { + iter = trans->iters + trans->sorted[i]; - /* - * sucessfully traversing one iterator can cause another to be - * unlinked, in btree_key_cache_fill() - */ - if (!(trans->iters_linked & (1ULL << idx))) - continue; + EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx))); - ret = btree_iter_traverse_one(&trans->iters[idx], _THIS_IP_); + ret = btree_iter_traverse_one(iter, _THIS_IP_); if (ret) goto retry_all; + + EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx))); + + if (iter->nodes_locked) + i++; } + + /* + * BTREE_ITER_NEED_RELOCK is ok here - if we called bch2_trans_unlock() + * and relock(), relock() won't relock since iter->should_be_locked + * isn't set yet, which is all fine + */ + trans_for_each_iter(trans, iter) + BUG_ON(iter->uptodate >= BTREE_ITER_NEED_TRAVERSE); out: bch2_btree_cache_cannibalize_unlock(c); @@ -1536,6 +1549,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) BUG_ON(bpos_cmp(b->key.k.p, iter->pos) < 0); iter->pos = iter->real_pos = b->key.k.p; + iter->trans->iters_sorted = false; bch2_btree_iter_verify(iter); iter->should_be_locked = true; @@ -1593,6 +1607,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) } iter->pos = iter->real_pos = b->key.k.p; + iter->trans->iters_sorted = false; bch2_btree_iter_verify(iter); iter->should_be_locked = true; @@ -1617,6 +1632,7 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p iter->real_pos = new_pos; iter->should_be_locked = false; + iter->trans->iters_sorted = false; if (unlikely(btree_iter_type(iter) == BTREE_ITER_CACHED)) { btree_node_unlock(iter, 0); @@ -2032,6 +2048,135 @@ static inline void bch2_btree_iter_init(struct btree_trans *trans, /* new transactional stuff: */ +#ifdef CONFIG_BCACHEFS_DEBUG +static void btree_trans_verify_sorted_refs(struct btree_trans *trans) +{ + struct btree_iter *iter; + unsigned i; + + BUG_ON(trans->nr_sorted != hweight64(trans->iters_linked)); + + trans_for_each_iter(trans, iter) { + BUG_ON(iter->sorted_idx >= trans->nr_sorted); + BUG_ON(trans->sorted[iter->sorted_idx] != iter->idx); + } + + for (i = 0; i < trans->nr_sorted; i++) { + unsigned idx = trans->sorted[i]; + + EBUG_ON(!(trans->iters_linked & (1ULL << idx))); + BUG_ON(trans->iters[idx].sorted_idx != i); + } +} +#else +static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) {} +#endif + +static void btree_trans_verify_sorted(struct btree_trans *trans) +{ +#ifdef CONFIG_BCACHEFS_DEBUG + struct btree_iter *iter, *prev = NULL; + unsigned i; + + trans_for_each_iter_inorder(trans, iter, i) { + BUG_ON(prev && btree_iter_cmp(prev, iter) > 0); + prev = iter; + } +#endif +} + +static noinline void __btree_trans_sort_iters(struct btree_trans *trans) +{ + int i, l = 0, r = trans->nr_sorted, inc = 1; + bool swapped; + + /* + * Cocktail shaker sort: this is efficient because iterators will be + * mostly sorteda. + */ + do { + swapped = false; + + for (i = inc > 0 ? l : r - 2; + i + 1 < r && i >= l; + i += inc) { + if (btree_iter_cmp(trans->iters + trans->sorted[i], + trans->iters + trans->sorted[i + 1]) > 0) { + swap(trans->sorted[i], trans->sorted[i + 1]); + trans->iters[trans->sorted[i]].sorted_idx = i; + trans->iters[trans->sorted[i + 1]].sorted_idx = i + 1; + swapped = true; + } + } + + if (inc > 0) + --r; + else + l++; + inc = -inc; + } while (swapped); + + trans->iters_sorted = true; + + btree_trans_verify_sorted(trans); +} + +static inline void btree_trans_sort_iters(struct btree_trans *trans) +{ + btree_trans_verify_sorted_refs(trans); + + if (trans->iters_sorted) { + if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) + btree_trans_verify_sorted(trans); + return; + } + __btree_trans_sort_iters(trans); +} + +static inline void btree_iter_list_remove(struct btree_trans *trans, + struct btree_iter *iter) +{ + unsigned i; + + EBUG_ON(iter->sorted_idx >= trans->nr_sorted); +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + trans->nr_sorted--; + memmove_u64s_down_small(trans->sorted + iter->sorted_idx, + trans->sorted + iter->sorted_idx + 1, + DIV_ROUND_UP(trans->nr_sorted - iter->sorted_idx, 8)); +#else + array_remove_item(trans->sorted, trans->nr_sorted, iter->sorted_idx); +#endif + for (i = iter->sorted_idx; i < trans->nr_sorted; i++) + trans->iters[trans->sorted[i]].sorted_idx = i; + + iter->sorted_idx = U8_MAX; +} + +static inline void btree_iter_list_add(struct btree_trans *trans, + struct btree_iter *pos, + struct btree_iter *iter) +{ + unsigned i; + + iter->sorted_idx = pos ? pos->sorted_idx + 1 : 0; + +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + memmove_u64s_up_small(trans->sorted + iter->sorted_idx + 1, + trans->sorted + iter->sorted_idx, + DIV_ROUND_UP(trans->nr_sorted - iter->sorted_idx, 8)); + trans->nr_sorted++; + trans->sorted[iter->sorted_idx] = iter->idx; +#else + array_insert_item(trans->sorted, trans->nr_sorted, iter->sorted_idx, iter->idx); +#endif + + for (i = iter->sorted_idx; i < trans->nr_sorted; i++) + trans->iters[trans->sorted[i]].sorted_idx = i; + + btree_trans_verify_sorted_refs(trans); +} + static void btree_iter_child_free(struct btree_iter *iter) { struct btree_iter *child = btree_iter_child(iter); @@ -2049,7 +2194,7 @@ static struct btree_iter *btree_iter_child_alloc(struct btree_iter *iter, struct btree_iter *child = btree_iter_child(iter); if (!child) { - child = btree_trans_iter_alloc(trans); + child = btree_trans_iter_alloc(trans, iter); child->ip_allocated = ip; iter->child_idx = child->idx; @@ -2065,10 +2210,14 @@ static inline void __bch2_trans_iter_free(struct btree_trans *trans, { btree_iter_child_free(&trans->iters[idx]); + btree_iter_list_remove(trans, &trans->iters[idx]); + __bch2_btree_iter_unlock(&trans->iters[idx]); trans->iters_linked &= ~(1ULL << idx); trans->iters_live &= ~(1ULL << idx); trans->iters_touched &= ~(1ULL << idx); + + btree_trans_verify_sorted_refs(trans); } int bch2_trans_iter_put(struct btree_trans *trans, @@ -2109,12 +2258,15 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans) struct btree_iter *iter; struct btree_insert_entry *i; + unsigned idx; char buf[100]; - trans_for_each_iter(trans, iter) + btree_trans_sort_iters(trans); + + trans_for_each_iter_inorder(trans, iter, idx) printk(KERN_ERR "iter: btree %s pos %s%s%s%s %pS\n", bch2_btree_ids[iter->btree_id], - (bch2_bpos_to_text(&PBUF(buf), iter->pos), buf), + (bch2_bpos_to_text(&PBUF(buf), iter->real_pos), buf), btree_iter_live(trans, iter) ? " live" : "", (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "", iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "", @@ -2130,11 +2282,14 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans) panic("trans iter oveflow\n"); } -static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans) +static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans, + struct btree_iter *pos) { struct btree_iter *iter; unsigned idx; + btree_trans_verify_sorted_refs(trans); + if (unlikely(trans->iters_linked == ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) btree_trans_iter_alloc_fail(trans); @@ -2145,10 +2300,13 @@ static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans) iter->trans = trans; iter->idx = idx; iter->child_idx = U8_MAX; + iter->sorted_idx = U8_MAX; iter->flags = 0; iter->nodes_locked = 0; iter->nodes_intent_locked = 0; trans->iters_linked |= 1ULL << idx; + + btree_iter_list_add(trans, pos, iter); return iter; } @@ -2170,6 +2328,7 @@ static void btree_iter_copy(struct btree_iter *dst, struct btree_iter *src) dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT; dst->flags &= ~BTREE_ITER_SET_POS_AFTER_COMMIT; + dst->trans->iters_sorted = false; } struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, @@ -2223,10 +2382,10 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, } if (!best) { - iter = btree_trans_iter_alloc(trans); + iter = btree_trans_iter_alloc(trans, best); bch2_btree_iter_init(trans, iter, btree_id); } else if (btree_iter_keep(trans, best)) { - iter = btree_trans_iter_alloc(trans); + iter = btree_trans_iter_alloc(trans, best); btree_iter_copy(iter, best); } else { iter = best; @@ -2307,7 +2466,7 @@ struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans, { struct btree_iter *iter; - iter = btree_trans_iter_alloc(trans); + iter = btree_trans_iter_alloc(trans, src); btree_iter_copy(iter, src); trans->iters_live |= 1ULL << iter->idx; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index aeabc07d2c9c..6fb0cb8252eb 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -71,6 +71,36 @@ __trans_next_iter(struct btree_trans *trans, unsigned idx) (_iter); \ _iter = __trans_next_iter((_trans), (_iter)->idx + 1)) +static inline struct btree_iter *next_btree_iter(struct btree_trans *trans, struct btree_iter *iter) +{ + unsigned idx = iter ? iter->sorted_idx + 1 : 0; + + EBUG_ON(idx > trans->nr_sorted); + + return idx < trans->nr_sorted + ? trans->iters + trans->sorted[idx] + : NULL; +} + +static inline struct btree_iter *prev_btree_iter(struct btree_trans *trans, struct btree_iter *iter) +{ + unsigned idx = iter ? iter->sorted_idx : trans->nr_sorted; + + return idx + ? trans->iters + trans->sorted[idx - 1] + : NULL; +} + +#define trans_for_each_iter_inorder(_trans, _iter, _i) \ + for (_i = 0; \ + ((_iter) = (_trans)->iters + trans->sorted[_i]), (_i) < (_trans)->nr_sorted;\ + _i++) + +#define trans_for_each_iter_inorder_reverse(_trans, _iter, _i) \ + for (_i = trans->nr_sorted - 1; \ + ((_iter) = (_trans)->iters + trans->sorted[_i]), (_i) >= 0;\ + --_i) + static inline bool __iter_has_node(const struct btree_iter *iter, const struct btree *b) { @@ -191,19 +221,14 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it iter->pos = bkey_start_pos(&iter->k); } -static inline struct btree_iter *btree_iter_child(struct btree_iter *iter) +static inline struct btree_iter *idx_to_btree_iter(struct btree_trans *trans, unsigned idx) { - return iter->child_idx == U8_MAX ? NULL - : iter->trans->iters + iter->child_idx; + return idx != U8_MAX ? trans->iters + idx : NULL; } -/* Sort order for locking btree iterators: */ -static inline int btree_iter_lock_cmp(const struct btree_iter *l, - const struct btree_iter *r) +static inline struct btree_iter *btree_iter_child(struct btree_iter *iter) { - return cmp_int(l->btree_id, r->btree_id) ?: - -cmp_int(btree_iter_is_cached(l), btree_iter_is_cached(r)) ?: - bkey_cmp(l->real_pos, r->real_pos); + return idx_to_btree_iter(iter->trans, iter->child_idx); } /* diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 4fa37fbc41fa..7a9aece2eb87 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -246,6 +246,7 @@ struct btree_iter { u8 idx; u8 child_idx; + u8 sorted_idx; /* btree_iter_copy starts here: */ u16 flags; @@ -379,11 +380,13 @@ struct btree_trans { unsigned long ip; int srcu_idx; + u8 nr_sorted; u8 nr_updates; bool used_mempool:1; bool error:1; bool in_traverse_all:1; bool restarted:1; + bool iters_sorted:1; /* * For when bch2_trans_update notices we'll be splitting a compressed * extent: @@ -398,6 +401,7 @@ struct btree_trans { unsigned mem_bytes; void *mem; + u8 sorted[BTREE_ITER_MAX + 8]; struct btree_iter *iters; struct btree_insert_entry *updates; diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index a0cbebf190b4..41dfc5867c9e 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -593,6 +593,20 @@ static inline void memmove_u64s_down(void *dst, const void *src, __memmove_u64s_down(dst, src, u64s); } +static inline void __memmove_u64s_down_small(void *dst, const void *src, + unsigned u64s) +{ + memcpy_u64s_small(dst, src, u64s); +} + +static inline void memmove_u64s_down_small(void *dst, const void *src, + unsigned u64s) +{ + EBUG_ON(dst > src); + + __memmove_u64s_down_small(dst, src, u64s); +} + static inline void __memmove_u64s_up_small(void *_dst, const void *_src, unsigned u64s) { |