diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2022-10-29 15:54:17 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:09:45 -0400 |
commit | 1df3e19996a3b29ed82315bf03cb02ac4e4e70ab (patch) | |
tree | 745b9032df5c61f7b371071cbfd62622a8e4258c /fs/bcachefs/io.c | |
parent | a10195764901e0a41e64d596de57a957e7f982f0 (diff) | |
download | lwn-1df3e19996a3b29ed82315bf03cb02ac4e4e70ab.tar.gz lwn-1df3e19996a3b29ed82315bf03cb02ac4e4e70ab.zip |
bcachefs: BCH_WRITE_SYNC
This adds a new flag for the write path, BCH_WRITE_SYNC, and switches
the O_DIRECT write path to use it when we're not running asynchronously.
It runs the btree update after the write in the original thread's
context instead of a kworker, cutting context switches in half.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/io.c')
-rw-r--r-- | fs/bcachefs/io.c | 81 |
1 files changed, 51 insertions, 30 deletions
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 97427487aa79..4818c78e5213 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -596,7 +596,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, static void __bch2_write(struct bch_write_op *); -static void bch2_write_done(struct closure *cl) +static void __bch2_write_done(struct closure *cl) { struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); struct bch_fs *c = op->c; @@ -612,7 +612,23 @@ static void bch2_write_done(struct closure *cl) EBUG_ON(cl->parent); closure_debug_destroy(cl); - op->end_io(op); + if (op->end_io) + op->end_io(op); +} + +static __always_inline void bch2_write_done(struct bch_write_op *op) +{ + if (likely(!(op->flags & BCH_WRITE_FLUSH) || op->error)) { + __bch2_write_done(&op->cl); + } else if (!(op->flags & BCH_WRITE_SYNC)) { + bch2_journal_flush_seq_async(&op->c->journal, + op->journal_seq, + &op->cl); + continue_at(&op->cl, __bch2_write_done, index_update_wq(op)); + } else { + bch2_journal_flush_seq(&op->c->journal, op->journal_seq); + __bch2_write_done(&op->cl); + } } static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op) @@ -699,6 +715,7 @@ out: err: keys->top = keys->keys; op->error = ret; + op->flags |= BCH_WRITE_DONE; goto out; } @@ -778,9 +795,9 @@ unlock: bch2_journal_flush_seq_async(&op->c->journal, op->journal_seq, &op->cl); - continue_at(&op->cl, bch2_write_done, index_update_wq(op)); + continue_at(&op->cl, __bch2_write_done, index_update_wq(op)); } else { - bch2_write_done(&op->cl); + __bch2_write_done(&op->cl); } } } @@ -1271,10 +1288,10 @@ again: ? NULL : &op->cl, &wp); if (unlikely(ret)) { - if (unlikely(ret != -EAGAIN)) - goto err; + if (ret == -EAGAIN) + break; - break; + goto err; } EBUG_ON(!wp); @@ -1283,13 +1300,25 @@ again: ret = bch2_write_extent(op, wp, &bio); bch2_alloc_sectors_done(c, wp); +err: + if (ret <= 0) { + if (!(op->flags & BCH_WRITE_SYNC)) { + spin_lock(&wp->writes_lock); + op->wp = wp; + list_add_tail(&op->wp_list, &wp->writes); + if (wp->state == WRITE_POINT_stopped) + __wp_update_state(wp, WRITE_POINT_waiting_io); + spin_unlock(&wp->writes_lock); + } - if (ret < 0) - goto err; - - if (!ret) op->flags |= BCH_WRITE_DONE; + if (ret < 0) { + op->error = ret; + break; + } + } + bio->bi_end_io = bch2_write_endio; bio->bi_private = &op->cl; bio->bi_opf |= REQ_OP_WRITE; @@ -1302,36 +1331,28 @@ again: bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user, key_to_write); } while (ret); -out: + /* - * If the write can't all be submitted at once, we generally want to - * block synchronously as that signals backpressure to the caller. + * Sync or no? + * + * If we're running asynchronously, wne may still want to block + * synchronously here if we weren't able to submit all of the IO at + * once, as that signals backpressure to the caller. */ - if (!(op->flags & BCH_WRITE_DONE) && - !(op->flags & BCH_WRITE_IN_WORKER)) { + if ((op->flags & BCH_WRITE_SYNC) || + (!(op->flags & BCH_WRITE_DONE) && + !(op->flags & BCH_WRITE_IN_WORKER))) { closure_sync(&op->cl); __bch2_write_index(op); if (!(op->flags & BCH_WRITE_DONE)) goto again; - bch2_write_done(&op->cl); + bch2_write_done(op); } else { - spin_lock(&wp->writes_lock); - op->wp = wp; - list_add_tail(&op->wp_list, &wp->writes); - if (wp->state == WRITE_POINT_stopped) - __wp_update_state(wp, WRITE_POINT_waiting_io); - spin_unlock(&wp->writes_lock); - continue_at(&op->cl, bch2_write_index, NULL); } memalloc_nofs_restore(nofs_flags); - return; -err: - op->error = ret; - op->flags |= BCH_WRITE_DONE; - goto out; } static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) @@ -1374,7 +1395,7 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) __bch2_write_index(op); err: - bch2_write_done(&op->cl); + bch2_write_done(op); } /** |