diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-09-07 14:16:00 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:26 -0400 |
commit | 36e9d69854752bdad5c5b63f72e6c4901512c9a2 (patch) | |
tree | 38341ebc83436563d2e005d234871937dffb6341 /fs/bcachefs | |
parent | f9c5519336731174cc79ef23543909f9f4e11f64 (diff) | |
download | lwn-36e9d69854752bdad5c5b63f72e6c4901512c9a2.tar.gz lwn-36e9d69854752bdad5c5b63f72e6c4901512c9a2.zip |
bcachefs: Do updates in order they were queued up in
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs')
-rw-r--r-- | fs/bcachefs/btree_iter.c | 41 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/btree_update.h | 25 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 169 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 23 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 7 |
7 files changed, 144 insertions, 128 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index a278921d3e6f..17596aee23cc 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1671,7 +1671,10 @@ int bch2_trans_iter_free_on_commit(struct btree_trans *trans, static int bch2_trans_realloc_iters(struct btree_trans *trans, unsigned new_size) { - void *new_iters, *new_updates; + void *new_iters, *new_updates, *new_sorted; + size_t iters_bytes; + size_t updates_bytes; + size_t sorted_bytes; new_size = roundup_pow_of_two(new_size); @@ -1684,9 +1687,13 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans, bch2_trans_unlock(trans); - new_iters = kmalloc(sizeof(struct btree_iter) * new_size + - sizeof(struct btree_insert_entry) * (new_size + 4), - GFP_NOFS); + iters_bytes = sizeof(struct btree_iter) * new_size; + updates_bytes = sizeof(struct btree_insert_entry) * (new_size + 4); + sorted_bytes = sizeof(u8) * (new_size + 4); + + new_iters = kmalloc(iters_bytes + + updates_bytes + + sorted_bytes, GFP_NOFS); if (new_iters) goto success; @@ -1695,7 +1702,8 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans, trans->used_mempool = true; success: - new_updates = new_iters + sizeof(struct btree_iter) * new_size; + new_updates = new_iters + iters_bytes; + new_sorted = new_updates + updates_bytes; memcpy(new_iters, trans->iters, sizeof(struct btree_iter) * trans->nr_iters); @@ -1710,9 +1718,10 @@ success: if (trans->iters != trans->iters_onstack) kfree(trans->iters); - trans->iters = new_iters; - trans->updates = new_updates; - trans->size = new_size; + trans->iters = new_iters; + trans->updates = new_updates; + trans->updates_sorted = new_sorted; + trans->size = new_size; if (trans->iters_live) { trace_trans_restart_iters_realloced(trans->ip, trans->size); @@ -1958,6 +1967,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, trans->size = ARRAY_SIZE(trans->iters_onstack); trans->iters = trans->iters_onstack; trans->updates = trans->updates_onstack; + trans->updates_sorted = trans->updates_sorted_onstack; trans->fs_usage_deltas = NULL; if (expected_nr_iters > trans->size) @@ -1982,3 +1992,18 @@ int bch2_trans_exit(struct btree_trans *trans) return trans->error ? -EIO : 0; } + +void bch2_fs_btree_iter_exit(struct bch_fs *c) +{ + mempool_exit(&c->btree_iters_pool); +} + +int bch2_fs_btree_iter_init(struct bch_fs *c) +{ + unsigned nr = BTREE_ITER_MAX; + + return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1, + sizeof(struct btree_iter) * nr + + sizeof(struct btree_insert_entry) * (nr + 4) + + sizeof(u8) * (nr + 4)); +} diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index b54351073231..b52d8bff0115 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -303,4 +303,7 @@ void *bch2_trans_kmalloc(struct btree_trans *, size_t); void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t); int bch2_trans_exit(struct btree_trans *); +void bch2_fs_btree_iter_exit(struct bch_fs *); +int bch2_fs_btree_iter_init(struct bch_fs *); + #endif /* _BCACHEFS_BTREE_ITER_H */ diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 621cbfa22fc9..88e048fa0fba 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -264,8 +264,6 @@ struct btree_insert_entry { }; bool deferred; - bool triggered; - bool marked; }; #define BTREE_ITER_MAX 64 @@ -294,6 +292,7 @@ struct btree_trans { struct btree_iter *iters; struct btree_insert_entry *updates; + u8 *updates_sorted; /* update path: */ struct journal_res journal_res; @@ -305,6 +304,7 @@ struct btree_trans { struct btree_iter iters_onstack[2]; struct btree_insert_entry updates_onstack[6]; + u8 updates_sorted_onstack[6]; struct replicas_delta_list *fs_usage_deltas; }; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 08c17477e76c..36e34b3d9213 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -119,8 +119,13 @@ int bch2_trans_commit(struct btree_trans *, struct disk_reservation *, u64 *, unsigned); -struct btree_insert_entry *bch2_trans_update(struct btree_trans *, - struct btree_insert_entry); +static inline void bch2_trans_update(struct btree_trans *trans, + struct btree_insert_entry entry) +{ + EBUG_ON(trans->nr_updates >= trans->nr_iters + 4); + + trans->updates[trans->nr_updates++] = entry; +} #define bch2_trans_do(_c, _journal_seq, _flags, _do) \ ({ \ @@ -140,18 +145,6 @@ struct btree_insert_entry *bch2_trans_update(struct btree_trans *, _ret; \ }) -/* - * We sort transaction entries so that if multiple iterators point to the same - * leaf node they'll be adjacent: - */ -static inline bool same_leaf_as_prev(struct btree_trans *trans, - struct btree_insert_entry *i) -{ - return i != trans->updates && - !i->deferred && - i[0].iter->l[0].b == i[-1].iter->l[0].b; -} - #define __trans_next_update(_trans, _i, _filter) \ ({ \ while ((_i) < (_trans)->updates + (_trans->nr_updates) && !(_filter))\ @@ -171,8 +164,4 @@ static inline bool same_leaf_as_prev(struct btree_trans *trans, #define trans_for_each_update_iter(trans, i) \ __trans_for_each_update(trans, i, !(i)->deferred) -#define trans_for_each_update_leaf(trans, i) \ - __trans_for_each_update(trans, i, !(i)->deferred && \ - !same_leaf_as_prev(trans, i)) - #endif /* _BCACHEFS_BTREE_UPDATE_H */ diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 66b12e55d946..657359059a08 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -19,6 +19,26 @@ #include <linux/sort.h> +static inline bool same_leaf_as_prev(struct btree_trans *trans, + unsigned sorted_idx) +{ + struct btree_insert_entry *i = trans->updates + + trans->updates_sorted[sorted_idx]; + struct btree_insert_entry *prev = sorted_idx + ? trans->updates + trans->updates_sorted[sorted_idx - 1] + : NULL; + + return !i->deferred && + prev && + i->iter->l[0].b == prev->iter->l[0].b; +} + +#define trans_for_each_update_sorted(_trans, _i, _iter) \ + for (iter = 0; \ + _iter < _trans->nr_updates && \ + (_i = _trans->updates + _trans->updates_sorted[_iter], 1); \ + _iter++) + inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b, struct btree_iter *iter) { @@ -36,20 +56,21 @@ inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b, bch2_btree_init_next(c, b, iter); } -static void btree_trans_lock_write(struct bch_fs *c, struct btree_trans *trans) +static void btree_trans_lock_write(struct btree_trans *trans, bool lock) { + struct bch_fs *c = trans->c; struct btree_insert_entry *i; + unsigned iter; - trans_for_each_update_leaf(trans, i) - bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter); -} - -static void btree_trans_unlock_write(struct btree_trans *trans) -{ - struct btree_insert_entry *i; + trans_for_each_update_sorted(trans, i, iter) { + if (same_leaf_as_prev(trans, iter)) + continue; - trans_for_each_update_leaf(trans, i) - bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter); + if (lock) + bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter); + else + bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter); + } } static inline int btree_trans_cmp(struct btree_insert_entry l, @@ -59,6 +80,30 @@ static inline int btree_trans_cmp(struct btree_insert_entry l, btree_iter_cmp(l.iter, r.iter); } +static inline void btree_trans_sort_updates(struct btree_trans *trans) +{ + struct btree_insert_entry *l, *r; + unsigned nr = 0, pos; + + trans_for_each_update(trans, l) { + for (pos = 0; pos < nr; pos++) { + r = trans->updates + trans->updates_sorted[pos]; + + if (btree_trans_cmp(*l, *r) <= 0) + break; + } + + memmove(&trans->updates_sorted[pos + 1], + &trans->updates_sorted[pos], + (nr - pos) * sizeof(trans->updates_sorted[0])); + + trans->updates_sorted[pos] = l - trans->updates; + nr++; + } + + BUG_ON(nr != trans->nr_updates); +} + /* Inserting into a given leaf node (last stage of insert): */ /* Handle overwrites and do insert, for non extents: */ @@ -488,12 +533,12 @@ static int btree_trans_check_can_insert(struct btree_trans *trans, struct btree_insert_entry **stopped_at) { struct btree_insert_entry *i; - unsigned u64s = 0; + unsigned iter, u64s = 0; int ret; - trans_for_each_update_iter(trans, i) { + trans_for_each_update_sorted(trans, i, iter) { /* Multiple inserts might go to same leaf: */ - if (!same_leaf_as_prev(trans, i)) + if (!same_leaf_as_prev(trans, iter)) u64s = 0; u64s += i->k->k.u64s; @@ -542,7 +587,6 @@ static inline int do_btree_insert_at(struct btree_trans *trans, struct bch_fs *c = trans->c; struct bch_fs_usage_online *fs_usage = NULL; struct btree_insert_entry *i; - bool saw_non_marked; unsigned mark_flags = trans->flags & BTREE_INSERT_BUCKET_INVALIDATE ? BCH_BUCKET_MARK_BUCKET_INVALIDATE : 0; @@ -551,35 +595,31 @@ static inline int do_btree_insert_at(struct btree_trans *trans, trans_for_each_update_iter(trans, i) BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK); + /* + * note: running triggers will append more updates to the list of + * updates as we're walking it: + */ trans_for_each_update_iter(trans, i) - i->marked = false; - - do { - saw_non_marked = false; - - trans_for_each_update_iter(trans, i) { - if (i->marked) - continue; - - saw_non_marked = true; - i->marked = true; - - if (update_has_triggers(trans, i) && - update_triggers_transactional(trans, i)) { - ret = bch2_trans_mark_update(trans, i->iter, i->k); - if (ret == -EINTR) - trace_trans_restart_mark(trans->ip); - if (ret) - goto out_clear_replicas; - } + if (update_has_triggers(trans, i) && + update_triggers_transactional(trans, i)) { + ret = bch2_trans_mark_update(trans, i->iter, i->k); + if (ret == -EINTR) + trace_trans_restart_mark(trans->ip); + if (ret) + goto out_clear_replicas; } - } while (saw_non_marked); trans_for_each_update(trans, i) btree_insert_entry_checks(trans, i); bch2_btree_trans_verify_locks(trans); - btree_trans_lock_write(c, trans); + /* + * No more updates can be added - sort updates so we can take write + * locks in the correct order: + */ + btree_trans_sort_updates(trans); + + btree_trans_lock_write(trans, true); if (race_fault()) { ret = -EINTR; @@ -597,8 +637,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, goto out; trans_for_each_update_iter(trans, i) { - if (i->deferred || - !btree_node_type_needs_gc(i->iter->btree_id)) + if (!btree_node_type_needs_gc(i->iter->btree_id)) continue; if (!fs_usage) { @@ -664,7 +703,7 @@ out: (trans->flags & BTREE_INSERT_JOURNAL_RESERVED) && trans->journal_res.ref); - btree_trans_unlock_write(trans); + btree_trans_lock_write(trans, false); if (fs_usage) { bch2_fs_usage_scratch_put(c, fs_usage); @@ -689,19 +728,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, { struct bch_fs *c = trans->c; unsigned flags = trans->flags; - struct btree_insert_entry *src, *dst; - - src = dst = trans->updates; - - while (src < trans->updates + trans->nr_updates) { - if (!src->triggered) { - *dst = *src; - dst++; - } - src++; - } - - trans->nr_updates = dst - trans->updates; /* * BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree @@ -816,6 +842,7 @@ static int __bch2_trans_commit(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_insert_entry *i; + unsigned iter; int ret; trans_for_each_update_iter(trans, i) { @@ -837,8 +864,10 @@ static int __bch2_trans_commit(struct btree_trans *trans, if (trans->flags & BTREE_INSERT_NOUNLOCK) trans->nounlock = true; - trans_for_each_update_leaf(trans, i) - bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags); + trans_for_each_update_sorted(trans, i, iter) + if (!same_leaf_as_prev(trans, iter)) + bch2_foreground_maybe_merge(c, i->iter, + 0, trans->flags); trans->nounlock = false; @@ -858,7 +887,8 @@ int bch2_trans_commit(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_insert_entry *i = NULL; - unsigned orig_mem_top = trans->mem_top; + unsigned orig_nr_updates = trans->nr_updates; + unsigned orig_mem_top = trans->mem_top; int ret = 0; if (!trans->nr_updates) @@ -931,39 +961,20 @@ out_noupdates: err: ret = bch2_trans_commit_error(trans, i, ret); + /* free updates and memory used by triggers, they'll be reexecuted: */ + trans->nr_updates = orig_nr_updates; + trans->mem_top = orig_mem_top; + /* can't loop if it was passed in and we changed it: */ if (unlikely(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS) && !ret) ret = -EINTR; - if (!ret) { - /* free memory used by triggers, they'll be reexecuted: */ - trans->mem_top = orig_mem_top; + if (!ret) goto retry; - } goto out; } -struct btree_insert_entry *bch2_trans_update(struct btree_trans *trans, - struct btree_insert_entry entry) -{ - struct btree_insert_entry *i; - - BUG_ON(trans->nr_updates >= trans->nr_iters + 4); - - for (i = trans->updates; - i < trans->updates + trans->nr_updates; - i++) - if (btree_trans_cmp(entry, *i) < 0) - break; - - memmove(&i[1], &i[0], - (void *) &trans->updates[trans->nr_updates] - (void *) i); - trans->nr_updates++; - *i = entry; - return i; -} - /** * bch2_btree_insert - insert keys into the extent btree * @c: pointer to struct bch_fs diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index e432a4507081..b6340a2f6deb 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1358,11 +1358,8 @@ static int trans_get_key(struct btree_trans *trans, struct btree_insert_entry *i; int ret; - for (i = trans->updates; - i < trans->updates + trans->nr_updates; - i++) - if (!i->deferred && - i->iter->btree_id == btree_id && + trans_for_each_update_iter(trans, i) + if (i->iter->btree_id == btree_id && (btree_node_type_is_extents(btree_id) ? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 && bkey_cmp(pos, i->k->k.p) < 0 @@ -1390,8 +1387,8 @@ static void *trans_update_key(struct btree_trans *trans, struct btree_iter *iter, unsigned u64s) { + struct btree_insert_entry *i; struct bkey_i *new_k; - unsigned i; new_k = bch2_trans_kmalloc(trans, u64s * sizeof(u64)); if (IS_ERR(new_k)) @@ -1400,19 +1397,13 @@ static void *trans_update_key(struct btree_trans *trans, bkey_init(&new_k->k); new_k->k.p = iter->pos; - for (i = 0; i < trans->nr_updates; i++) - if (!trans->updates[i].deferred && - trans->updates[i].iter == iter) { - trans->updates[i].k = new_k; + trans_for_each_update_iter(trans, i) + if (i->iter == iter) { + i->k = new_k; return new_k; } - bch2_trans_update(trans, ((struct btree_insert_entry) { - .iter = iter, - .k = new_k, - .triggered = true, - })); - + bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, new_k)); return new_k; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 202c0b443ef4..14e2f6828cc6 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -462,6 +462,7 @@ static void bch2_fs_free(struct bch_fs *c) bch2_fs_ec_exit(c); bch2_fs_encryption_exit(c); bch2_fs_io_exit(c); + bch2_fs_btree_iter_exit(c); bch2_fs_btree_cache_exit(c); bch2_fs_journal_exit(&c->journal); bch2_io_clock_exit(&c->io_clock[WRITE]); @@ -474,7 +475,6 @@ static void bch2_fs_free(struct bch_fs *c) free_percpu(c->usage[0]); kfree(c->usage_base); free_percpu(c->pcpu); - mempool_exit(&c->btree_iters_pool); mempool_exit(&c->btree_bounce_pool); bioset_exit(&c->btree_bio); mempool_exit(&c->btree_interior_update_pool); @@ -729,15 +729,12 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) !(c->online_reserved = alloc_percpu(u64)) || mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, btree_bytes(c)) || - mempool_init_kmalloc_pool(&c->btree_iters_pool, 1, - sizeof(struct btree_iter) * BTREE_ITER_MAX + - sizeof(struct btree_insert_entry) * - (BTREE_ITER_MAX + 4)) || bch2_io_clock_init(&c->io_clock[READ]) || bch2_io_clock_init(&c->io_clock[WRITE]) || bch2_fs_journal_init(&c->journal) || bch2_fs_replicas_init(c) || bch2_fs_btree_cache_init(c) || + bch2_fs_btree_iter_init(c) || bch2_fs_io_init(c) || bch2_fs_encryption_init(c) || bch2_fs_compress_init(c) || |