diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2022-10-31 20:30:27 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:09:45 -0400 |
commit | 182c7bbfbfe8d435672b8cb9730b07e88e103670 (patch) | |
tree | 61238500bdf507fcab5c8d79a5b23d24ec824a0f | |
parent | 1df3e19996a3b29ed82315bf03cb02ac4e4e70ab (diff) | |
download | lwn-182c7bbfbfe8d435672b8cb9730b07e88e103670.tar.gz lwn-182c7bbfbfe8d435672b8cb9730b07e88e103670.zip |
bcachefs: DIO write path optimization
- With BCH_WRITE_SYNC, we no longer need the completion in struct
dio_write
- Pull out bch2_dio_write_copy_iov() into a separate non-inline
function, it's code that doesn't run in the common case
- Copy mapping and inode pointers into dio_write, avoiding pointer
chasing at the start of bch2_dio_write_loop()
- kthread_use_mm() is not needed in the common case; move it into
bch2_dio_write_loop_async()
- factor out various helpers from bch2_dio_write_loop() and rework
control flow for better icache utilization
Other small optimizations:
- bch2_keylist_free() is only used in one place, at the end of the
bch2_write() path - drop the reinit
- in bch2_disk_reservation_put(), check if res->sectors is nonzero
before touching c->online_reserved, since that will likely be a cache
miss
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
bcachefs: More DIO write path optimization
Better code prefetching (?)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/fs-io.c | 175 | ||||
-rw-r--r-- | fs/bcachefs/keylist.h | 1 |
2 files changed, 103 insertions, 73 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index dbad24f5f2ea..dff103a66780 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -72,8 +72,9 @@ struct bch_writepage_io { }; struct dio_write { - struct completion done; struct kiocb *req; + struct address_space *mapping; + struct bch_inode_info *inode; struct mm_struct *mm; unsigned loop:1, sync:1, @@ -2043,6 +2044,18 @@ err: return err ? false : ret; } +static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio) +{ + struct bch_fs *c = dio->op.c; + struct bch_inode_info *inode = dio->inode; + struct bio *bio = &dio->op.wbio.bio; + + return bch2_check_range_allocated(c, inode_inum(inode), + dio->op.pos.offset, bio_sectors(bio), + dio->op.opts.data_replicas, + dio->op.opts.compression != 0); +} + /* * We're going to return -EIOCBQUEUED, but we haven't finished consuming the * iov_iter yet, so we need to stash a copy of the iovec: it might be on the @@ -2082,27 +2095,71 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) static void bch2_dio_write_loop_async(struct bch_write_op *); +static __always_inline long bch2_dio_write_done(struct dio_write *dio) +{ + struct bch_fs *c = dio->op.c; + struct kiocb *req = dio->req; + struct bch_inode_info *inode = dio->inode; + bool sync = dio->sync; + long ret = dio->op.error ?: ((long) dio->written << 9); + + bch2_pagecache_block_put(&inode->ei_pagecache_lock); + bch2_quota_reservation_put(c, inode, &dio->quota_res); + + if (dio->free_iov) + kfree(dio->iter.__iov); + bio_put(&dio->op.wbio.bio); + + /* inode->i_dio_count is our ref on inode and thus bch_fs */ + inode_dio_end(&inode->v); + + if (ret < 0) + ret = bch2_err_class(ret); + + if (!sync) { + req->ki_complete(req, ret); + ret = -EIOCBQUEUED; + } + return ret; +} + +static __always_inline void bch2_dio_write_end(struct dio_write *dio) +{ + struct bch_fs *c = dio->op.c; + struct kiocb *req = dio->req; + struct bch_inode_info *inode = dio->inode; + struct bio *bio = &dio->op.wbio.bio; + + i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta); + req->ki_pos += (u64) dio->op.written << 9; + dio->written += dio->op.written; + + spin_lock(&inode->v.i_lock); + if (req->ki_pos > inode->v.i_size) + i_size_write(&inode->v, req->ki_pos); + spin_unlock(&inode->v.i_lock); + + bio_release_pages(bio, false); + + if (unlikely(dio->op.error)) + set_bit(EI_INODE_ERROR, &inode->ei_flags); +} + static long bch2_dio_write_loop(struct dio_write *dio) { - bool kthread = (current->flags & PF_KTHREAD) != 0; + struct bch_fs *c = dio->op.c; struct kiocb *req = dio->req; - struct address_space *mapping = req->ki_filp->f_mapping; - struct bch_inode_info *inode = file_bch_inode(req->ki_filp); - struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct address_space *mapping = dio->mapping; + struct bch_inode_info *inode = dio->inode; struct bio *bio = &dio->op.wbio.bio; unsigned unaligned, iter_count; bool sync = dio->sync, dropped_locks; long ret; - if (dio->loop) - goto loop; - while (1) { iter_count = dio->iter.count; - if (kthread && dio->mm) - kthread_use_mm(dio->mm); - BUG_ON(current->faults_disabled_mapping); + EBUG_ON(current->faults_disabled_mapping); current->faults_disabled_mapping = mapping; ret = bio_iov_iter_get_pages(bio, &dio->iter); @@ -2110,8 +2167,6 @@ static long bch2_dio_write_loop(struct dio_write *dio) dropped_locks = fdm_dropped_locks(); current->faults_disabled_mapping = NULL; - if (kthread && dio->mm) - kthread_unuse_mm(dio->mm); /* * If the fault handler returned an error but also signalled @@ -2149,7 +2204,9 @@ static long bch2_dio_write_loop(struct dio_write *dio) } bch2_write_op_init(&dio->op, c, io_opts(c, &inode->ei_inode)); - dio->op.end_io = bch2_dio_write_loop_async; + dio->op.end_io = sync + ? NULL + : bch2_dio_write_loop_async; dio->op.target = dio->op.opts.foreground_target; dio->op.write_point = writepoint_hashed((unsigned long) current); dio->op.nr_replicas = dio->op.opts.data_replicas; @@ -2166,86 +2223,58 @@ static long bch2_dio_write_loop(struct dio_write *dio) ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio), dio->op.opts.data_replicas, 0); if (unlikely(ret) && - !bch2_check_range_allocated(c, inode_inum(inode), - dio->op.pos.offset, bio_sectors(bio), - dio->op.opts.data_replicas, - dio->op.opts.compression != 0)) + !bch2_dio_write_check_allocated(dio)) goto err; task_io_account_write(bio->bi_iter.bi_size); - if (!dio->sync && !dio->loop && dio->iter.count) { - if (bch2_dio_write_copy_iov(dio)) { - dio->sync = sync = true; - goto do_io; - } - } -do_io: + if (unlikely(dio->iter.count) && + !dio->sync && + !dio->loop && + bch2_dio_write_copy_iov(dio)) + dio->sync = sync = true; + dio->loop = true; closure_call(&dio->op.cl, bch2_write, NULL, NULL); - if (sync) - wait_for_completion(&dio->done); - else + if (!sync) return -EIOCBQUEUED; -loop: - i_sectors_acct(c, inode, &dio->quota_res, - dio->op.i_sectors_delta); - req->ki_pos += (u64) dio->op.written << 9; - dio->written += dio->op.written; - spin_lock(&inode->v.i_lock); - if (req->ki_pos > inode->v.i_size) - i_size_write(&inode->v, req->ki_pos); - spin_unlock(&inode->v.i_lock); + bch2_dio_write_end(dio); - bio_release_pages(bio, false); - bio->bi_vcnt = 0; - - if (dio->op.error) { - set_bit(EI_INODE_ERROR, &inode->ei_flags); - break; - } - - if (!dio->iter.count) + if (likely(!dio->iter.count) || dio->op.error) break; bio_reset(bio, NULL, REQ_OP_WRITE); - reinit_completion(&dio->done); } - - ret = dio->op.error ?: ((long) dio->written << 9); +out: + return bch2_dio_write_done(dio); err: - bch2_pagecache_block_put(&inode->ei_pagecache_lock); - bch2_quota_reservation_put(c, inode, &dio->quota_res); - - if (dio->free_iov) - kfree(dio->iter.__iov); + dio->op.error = ret; bio_release_pages(bio, false); - bio_put(bio); - - /* inode->i_dio_count is our ref on inode and thus bch_fs */ - inode_dio_end(&inode->v); - - if (ret < 0) - ret = bch2_err_class(ret); - - if (!sync) { - req->ki_complete(req, ret); - ret = -EIOCBQUEUED; - } - return ret; + goto out; } static void bch2_dio_write_loop_async(struct bch_write_op *op) { struct dio_write *dio = container_of(op, struct dio_write, op); + struct mm_struct *mm = dio->mm; - if (dio->sync) - complete(&dio->done); - else - bch2_dio_write_loop(dio); + bch2_dio_write_end(dio); + + if (likely(!dio->iter.count) || dio->op.error) { + bch2_dio_write_done(dio); + return; + } + + bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE); + + if (mm) + kthread_use_mm(mm); + bch2_dio_write_loop(dio); + if (mm) + kthread_unuse_mm(mm); } static noinline @@ -2297,8 +2326,9 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) GFP_KERNEL, &c->dio_write_bioset); dio = container_of(bio, struct dio_write, op.wbio.bio); - init_completion(&dio->done); dio->req = req; + dio->mapping = mapping; + dio->inode = inode; dio->mm = current->mm; dio->loop = false; dio->sync = is_sync_kiocb(req) || extending; @@ -2306,6 +2336,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) dio->quota_res.sectors = 0; dio->written = 0; dio->iter = *iter; + dio->op.c = c; ret = bch2_quota_reservation_add(c, inode, &dio->quota_res, iter->count >> 9, true); diff --git a/fs/bcachefs/keylist.h b/fs/bcachefs/keylist.h index 195799bb20bc..635efb7e8228 100644 --- a/fs/bcachefs/keylist.h +++ b/fs/bcachefs/keylist.h @@ -17,7 +17,6 @@ static inline void bch2_keylist_free(struct keylist *l, u64 *inline_keys) { if (l->keys_p != inline_keys) kfree(l->keys_p); - bch2_keylist_init(l, inline_keys); } static inline void bch2_keylist_push(struct keylist *l) |