From 1c6fdbd8f2465ddfb73a01ec620cbf3d14044e1a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Mar 2017 22:18:50 -0800 Subject: bcachefs: Initial commit Initially forked from drivers/md/bcache, bcachefs is a new copy-on-write filesystem with every feature you could possibly want. Website: https://bcachefs.org Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 1773 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1773 insertions(+) create mode 100644 fs/bcachefs/fs.c (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c new file mode 100644 index 000000000000..3f3d916e0d37 --- /dev/null +++ b/fs/bcachefs/fs.c @@ -0,0 +1,1773 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef NO_BCACHEFS_FS + +#include "bcachefs.h" +#include "acl.h" +#include "btree_update.h" +#include "buckets.h" +#include "chardev.h" +#include "dirent.h" +#include "extents.h" +#include "fs.h" +#include "fs-io.h" +#include "fs-ioctl.h" +#include "fsck.h" +#include "inode.h" +#include "io.h" +#include "journal.h" +#include "keylist.h" +#include "quota.h" +#include "super.h" +#include "xattr.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct kmem_cache *bch2_inode_cache; + +static void bch2_vfs_inode_init(struct bch_fs *, + struct bch_inode_info *, + struct bch_inode_unpacked *); + +static void journal_seq_copy(struct bch_inode_info *dst, + u64 journal_seq) +{ + u64 old, v = READ_ONCE(dst->ei_journal_seq); + + do { + old = v; + + if (old >= journal_seq) + break; + } while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old); +} + +static void __pagecache_lock_put(struct pagecache_lock *lock, long i) +{ + BUG_ON(atomic_long_read(&lock->v) == 0); + + if (atomic_long_sub_return_release(i, &lock->v) == 0) + wake_up_all(&lock->wait); +} + +static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i) +{ + long v = atomic_long_read(&lock->v), old; + + do { + old = v; + + if (i > 0 ? v < 0 : v > 0) + return false; + } while ((v = atomic_long_cmpxchg_acquire(&lock->v, + old, old + i)) != old); + return true; +} + +static void __pagecache_lock_get(struct pagecache_lock *lock, long i) +{ + wait_event(lock->wait, __pagecache_lock_tryget(lock, i)); +} + +void bch2_pagecache_add_put(struct pagecache_lock *lock) +{ + __pagecache_lock_put(lock, 1); +} + +void bch2_pagecache_add_get(struct pagecache_lock *lock) +{ + __pagecache_lock_get(lock, 1); +} + +void bch2_pagecache_block_put(struct pagecache_lock *lock) +{ + __pagecache_lock_put(lock, -1); +} + +void bch2_pagecache_block_get(struct pagecache_lock *lock) +{ + __pagecache_lock_get(lock, -1); +} + +/* + * I_SIZE_DIRTY requires special handling: + * + * To the recovery code, the flag means that there is stale data past i_size + * that needs to be deleted; it's used for implementing atomic appends and + * truncates. + * + * On append, we set I_SIZE_DIRTY before doing the write, then after the write + * we clear I_SIZE_DIRTY atomically with updating i_size to the new larger size + * that exposes the data we just wrote. + * + * On truncate, it's the reverse: We set I_SIZE_DIRTY atomically with setting + * i_size to the new smaller size, then we delete the data that we just made + * invisible, and then we clear I_SIZE_DIRTY. + * + * Because there can be multiple appends in flight at a time, we need a refcount + * (i_size_dirty_count) instead of manipulating the flag directly. Nonzero + * refcount means I_SIZE_DIRTY is set, zero means it's cleared. + * + * Because write_inode() can be called at any time, i_size_dirty_count means + * something different to the runtime code - it means to write_inode() "don't + * update i_size yet". + * + * We don't clear I_SIZE_DIRTY directly, we let write_inode() clear it when + * i_size_dirty_count is zero - but the reverse is not true, I_SIZE_DIRTY must + * be set explicitly. + */ + +void bch2_inode_update_after_write(struct bch_fs *c, + struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + unsigned fields) +{ + set_nlink(&inode->v, bi->bi_flags & BCH_INODE_UNLINKED + ? 0 + : bi->bi_nlink + nlink_bias(inode->v.i_mode)); + i_uid_write(&inode->v, bi->bi_uid); + i_gid_write(&inode->v, bi->bi_gid); + inode->v.i_mode = bi->bi_mode; + + if (fields & ATTR_ATIME) + inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime); + if (fields & ATTR_MTIME) + inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime); + if (fields & ATTR_CTIME) + inode_set_ctime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_ctime)); + + inode->ei_inode = *bi; + inode->ei_qid = bch_qid(bi); +} + +int __must_check bch2_write_inode_trans(struct btree_trans *trans, + struct bch_inode_info *inode, + struct bch_inode_unpacked *inode_u, + inode_set_fn set, + void *p) +{ + struct btree_iter *iter; + struct bkey_inode_buf *inode_p; + struct bkey_s_c k; + u64 inum = inode->v.i_ino; + int ret; + + lockdep_assert_held(&inode->ei_update_lock); + + iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(inum, 0), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + if (IS_ERR(iter)) + return PTR_ERR(iter); + + k = bch2_btree_iter_peek_slot(iter); + if ((ret = btree_iter_err(k))) + return ret; + + if (WARN_ONCE(k.k->type != BCH_INODE_FS, + "inode %llu not found when updating", inum)) + return -ENOENT; + + ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode_u); + if (WARN_ONCE(ret, + "error %i unpacking inode %llu", ret, inum)) + return -ENOENT; + + BUG_ON(inode_u->bi_size != inode->ei_inode.bi_size); + + BUG_ON(inode_u->bi_size != inode->ei_inode.bi_size && + !(inode_u->bi_flags & BCH_INODE_I_SIZE_DIRTY) && + inode_u->bi_size > i_size_read(&inode->v)); + + if (set) { + ret = set(inode, inode_u, p); + if (ret) + return ret; + } + + inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); + if (IS_ERR(inode_p)) + return PTR_ERR(inode_p); + + bch2_inode_pack(inode_p, inode_u); + bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); + return 0; +} + +int __must_check __bch2_write_inode(struct bch_fs *c, + struct bch_inode_info *inode, + inode_set_fn set, + void *p, unsigned fields) +{ + struct btree_trans trans; + struct bch_inode_unpacked inode_u; + int ret; + + bch2_trans_init(&trans, c); +retry: + bch2_trans_begin(&trans); + + ret = bch2_write_inode_trans(&trans, inode, &inode_u, set, p) ?: + bch2_trans_commit(&trans, NULL, NULL, + &inode->ei_journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK| + BTREE_INSERT_NOFAIL); + if (ret == -EINTR) + goto retry; + + /* + * the btree node lock protects inode->ei_inode, not ei_update_lock; + * this is important for inode updates via bchfs_write_index_update + */ + if (!ret) + bch2_inode_update_after_write(c, inode, &inode_u, fields); + + bch2_trans_exit(&trans); + return ret < 0 ? ret : 0; +} + +static struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum) +{ + struct bch_inode_unpacked inode_u; + struct bch_inode_info *inode; + int ret; + + inode = to_bch_ei(iget_locked(c->vfs_sb, inum)); + if (unlikely(!inode)) + return ERR_PTR(-ENOMEM); + if (!(inode->v.i_state & I_NEW)) + return &inode->v; + + ret = bch2_inode_find_by_inum(c, inum, &inode_u); + if (ret) { + iget_failed(&inode->v); + return ERR_PTR(ret); + } + + bch2_vfs_inode_init(c, inode, &inode_u); + + inode->ei_journal_seq = bch2_inode_journal_seq(&c->journal, inum); + + unlock_new_inode(&inode->v); + + return &inode->v; +} + +static void bch2_inode_init_owner(struct bch_inode_unpacked *inode_u, + const struct inode *dir, umode_t mode) +{ + kuid_t uid = current_fsuid(); + kgid_t gid; + + if (dir && dir->i_mode & S_ISGID) { + gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else + gid = current_fsgid(); + + inode_u->bi_uid = from_kuid(i_user_ns(dir), uid); + inode_u->bi_gid = from_kgid(i_user_ns(dir), gid); + inode_u->bi_mode = mode; +} + +static int inode_update_for_create_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_inode_unpacked *new_inode = p; + struct timespec64 now = current_time(&inode->v); + + bi->bi_mtime = bi->bi_ctime = timespec_to_bch2_time(c, now); + + if (S_ISDIR(new_inode->bi_mode)) + bi->bi_nlink++; + + return 0; +} + +static int inum_test(struct inode *inode, void *p) +{ + unsigned long *ino = p; + + return *ino == inode->i_ino; +} + +static struct bch_inode_info * +__bch2_create(struct mnt_idmap *idmap, + struct bch_inode_info *dir, struct dentry *dentry, + umode_t mode, dev_t rdev, bool tmpfile) +{ + struct bch_fs *c = dir->v.i_sb->s_fs_info; + struct btree_trans trans; + struct bch_inode_unpacked dir_u; + struct bch_inode_info *inode, *old; + struct bch_inode_unpacked inode_u; + struct bch_hash_info hash_info; + struct posix_acl *default_acl = NULL, *acl = NULL; + int ret; + + bch2_inode_init(c, &inode_u, 0, 0, 0, rdev, &dir->ei_inode); + bch2_inode_init_owner(&inode_u, &dir->v, mode); + + inode_u.bi_project = dir->ei_qid.q[QTYP_PRJ]; + + hash_info = bch2_hash_info_init(c, &inode_u); + + if (tmpfile) + inode_u.bi_flags |= BCH_INODE_UNLINKED; + + ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, BCH_QUOTA_PREALLOC); + if (ret) + return ERR_PTR(ret); + +#ifdef CONFIG_BCACHEFS_POSIX_ACL + ret = posix_acl_create(&dir->v, &inode_u.bi_mode, &default_acl, &acl); + if (ret) + goto err; +#endif + + /* + * preallocate vfs inode before btree transaction, so that nothing can + * fail after the transaction succeeds: + */ + inode = to_bch_ei(new_inode(c->vfs_sb)); + if (unlikely(!inode)) { + ret = -ENOMEM; + goto err; + } + + if (!tmpfile) + mutex_lock(&dir->ei_update_lock); + + bch2_trans_init(&trans, c); +retry: + bch2_trans_begin(&trans); + + ret = __bch2_inode_create(&trans, &inode_u, + BLOCKDEV_INODE_MAX, 0, + &c->unused_inode_hint) ?: + (default_acl + ? bch2_set_acl_trans(&trans, &inode_u, &hash_info, + default_acl, ACL_TYPE_DEFAULT) + : 0) ?: + (acl + ? bch2_set_acl_trans(&trans, &inode_u, &hash_info, + acl, ACL_TYPE_ACCESS) + : 0) ?: + (!tmpfile + ? __bch2_dirent_create(&trans, dir->v.i_ino, + &dir->ei_str_hash, + mode_to_type(mode), + &dentry->d_name, + inode_u.bi_inum, + BCH_HASH_SET_MUST_CREATE) + : 0) ?: + (!tmpfile + ? bch2_write_inode_trans(&trans, dir, &dir_u, + inode_update_for_create_fn, + &inode_u) + : 0) ?: + bch2_trans_commit(&trans, NULL, NULL, + &inode->ei_journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK); + if (ret == -EINTR) + goto retry; + if (unlikely(ret)) + goto err_trans; + + atomic_long_inc(&c->nr_inodes); + + if (!tmpfile) { + bch2_inode_update_after_write(c, dir, &dir_u, + ATTR_MTIME|ATTR_CTIME); + journal_seq_copy(dir, inode->ei_journal_seq); + mutex_unlock(&dir->ei_update_lock); + } + + bch2_vfs_inode_init(c, inode, &inode_u); + + set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); + set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl); + + /* + * we must insert the new inode into the inode cache before calling + * bch2_trans_exit() and dropping locks, else we could race with another + * thread pulling the inode in and modifying it: + */ + + inode->v.i_state |= I_CREATING; + old = to_bch_ei(inode_insert5(&inode->v, inode->v.i_ino, + inum_test, NULL, &inode->v.i_ino)); + BUG_ON(!old); + + if (unlikely(old != inode)) { + /* + * We raced, another process pulled the new inode into cache + * before us: + */ + old->ei_journal_seq = inode->ei_journal_seq; + make_bad_inode(&inode->v); + iput(&inode->v); + + inode = old; + } else { + /* + * we really don't want insert_inode_locked2() to be setting + * I_NEW... + */ + unlock_new_inode(&inode->v); + } + + bch2_trans_exit(&trans); +out: + posix_acl_release(default_acl); + posix_acl_release(acl); + return inode; +err_trans: + bch2_trans_exit(&trans); + make_bad_inode(&inode->v); + iput(&inode->v); +err: + bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, BCH_QUOTA_WARN); + inode = ERR_PTR(ret); + goto out; +} + +/* methods */ + +static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, + unsigned int flags) +{ + struct bch_fs *c = vdir->i_sb->s_fs_info; + struct bch_inode_info *dir = to_bch_ei(vdir); + struct inode *vinode = NULL; + u64 inum; + + inum = bch2_dirent_lookup(c, dir->v.i_ino, + &dir->ei_str_hash, + &dentry->d_name); + + if (inum) + vinode = bch2_vfs_inode_get(c, inum); + + return d_splice_alias(vinode, dentry); +} + +static int bch2_create(struct mnt_idmap *idmap, + struct inode *vdir, struct dentry *dentry, + umode_t mode, bool excl) +{ + struct bch_inode_info *inode = + __bch2_create(idmap, to_bch_ei(vdir), dentry, mode|S_IFREG, 0, false); + + if (IS_ERR(inode)) + return PTR_ERR(inode); + + d_instantiate(dentry, &inode->v); + return 0; +} + +static int inode_update_for_link_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct timespec64 now = current_time(&inode->v); + + bi->bi_ctime = timespec_to_bch2_time(c, now); + + if (bi->bi_flags & BCH_INODE_UNLINKED) + bi->bi_flags &= ~BCH_INODE_UNLINKED; + else + bi->bi_nlink++; + + return 0; +} + +static int __bch2_link(struct bch_fs *c, + struct bch_inode_info *inode, + struct bch_inode_info *dir, + struct dentry *dentry) +{ + struct btree_trans trans; + struct bch_inode_unpacked inode_u; + int ret; + + lockdep_assert_held(&inode->v.i_rwsem); + + bch2_trans_init(&trans, c); +retry: + bch2_trans_begin(&trans); + + ret = __bch2_dirent_create(&trans, dir->v.i_ino, + &dir->ei_str_hash, + mode_to_type(inode->v.i_mode), + &dentry->d_name, + inode->v.i_ino, + BCH_HASH_SET_MUST_CREATE) ?: + bch2_write_inode_trans(&trans, inode, &inode_u, + inode_update_for_link_fn, + NULL) ?: + bch2_trans_commit(&trans, NULL, NULL, + &inode->ei_journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK); + + if (ret == -EINTR) + goto retry; + + if (likely(!ret)) + bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME); + + bch2_trans_exit(&trans); + return ret; +} + +static int bch2_link(struct dentry *old_dentry, struct inode *vdir, + struct dentry *dentry) +{ + struct bch_fs *c = vdir->i_sb->s_fs_info; + struct bch_inode_info *dir = to_bch_ei(vdir); + struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode); + int ret; + + ret = __bch2_link(c, inode, dir, dentry); + if (unlikely(ret)) + return ret; + + ihold(&inode->v); + d_instantiate(dentry, &inode->v); + return 0; +} + +static int inode_update_dir_for_unlink_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_inode_info *unlink_inode = p; + struct timespec64 now = current_time(&inode->v); + + bi->bi_mtime = bi->bi_ctime = timespec_to_bch2_time(c, now); + + bi->bi_nlink -= S_ISDIR(unlink_inode->v.i_mode); + + return 0; +} + +static int inode_update_for_unlink_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct timespec64 now = current_time(&inode->v); + + bi->bi_ctime = timespec_to_bch2_time(c, now); + if (bi->bi_nlink) + bi->bi_nlink--; + else + bi->bi_flags |= BCH_INODE_UNLINKED; + + return 0; +} + +static int bch2_unlink(struct inode *vdir, struct dentry *dentry) +{ + struct bch_fs *c = vdir->i_sb->s_fs_info; + struct bch_inode_info *dir = to_bch_ei(vdir); + struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); + struct bch_inode_unpacked dir_u, inode_u; + struct btree_trans trans; + int ret; + + bch2_trans_init(&trans, c); +retry: + bch2_trans_begin(&trans); + + ret = __bch2_dirent_delete(&trans, dir->v.i_ino, + &dir->ei_str_hash, + &dentry->d_name) ?: + bch2_write_inode_trans(&trans, dir, &dir_u, + inode_update_dir_for_unlink_fn, + inode) ?: + bch2_write_inode_trans(&trans, inode, &inode_u, + inode_update_for_unlink_fn, + NULL) ?: + bch2_trans_commit(&trans, NULL, NULL, + &dir->ei_journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK| + BTREE_INSERT_NOFAIL); + if (ret == -EINTR) + goto retry; + if (ret) + goto err; + + if (dir->ei_journal_seq > inode->ei_journal_seq) + inode->ei_journal_seq = dir->ei_journal_seq; + + bch2_inode_update_after_write(c, dir, &dir_u, + ATTR_MTIME|ATTR_CTIME); + bch2_inode_update_after_write(c, inode, &inode_u, + ATTR_MTIME); +err: + bch2_trans_exit(&trans); + + return ret; +} + +static int bch2_symlink(struct mnt_idmap *idmap, + struct inode *vdir, struct dentry *dentry, + const char *symname) +{ + struct bch_fs *c = vdir->i_sb->s_fs_info; + struct bch_inode_info *dir = to_bch_ei(vdir), *inode; + int ret; + + inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0, true); + if (unlikely(IS_ERR(inode))) + return PTR_ERR(inode); + + inode_lock(&inode->v); + ret = page_symlink(&inode->v, symname, strlen(symname) + 1); + inode_unlock(&inode->v); + + if (unlikely(ret)) + goto err; + + ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX); + if (unlikely(ret)) + goto err; + + journal_seq_copy(dir, inode->ei_journal_seq); + + ret = __bch2_link(c, inode, dir, dentry); + if (unlikely(ret)) + goto err; + + d_instantiate(dentry, &inode->v); + return 0; +err: + iput(&inode->v); + return ret; +} + +static int bch2_mkdir(struct mnt_idmap *idmap, + struct inode *vdir, struct dentry *dentry, umode_t mode) +{ + struct bch_inode_info *inode = + __bch2_create(idmap, to_bch_ei(vdir), dentry, mode|S_IFDIR, 0, false); + + if (IS_ERR(inode)) + return PTR_ERR(inode); + + d_instantiate(dentry, &inode->v); + return 0; +} + +static int bch2_rmdir(struct inode *vdir, struct dentry *dentry) +{ + struct bch_fs *c = vdir->i_sb->s_fs_info; + + if (bch2_empty_dir(c, dentry->d_inode->i_ino)) + return -ENOTEMPTY; + + return bch2_unlink(vdir, dentry); +} + +static int bch2_mknod(struct mnt_idmap *idmap, + struct inode *vdir, struct dentry *dentry, + umode_t mode, dev_t rdev) +{ + struct bch_inode_info *inode = + __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev, false); + + if (IS_ERR(inode)) + return PTR_ERR(inode); + + d_instantiate(dentry, &inode->v); + return 0; +} + +struct rename_info { + u64 now; + struct bch_inode_info *src_dir; + struct bch_inode_info *dst_dir; + struct bch_inode_info *src_inode; + struct bch_inode_info *dst_inode; + enum bch_rename_mode mode; +}; + +static int inode_update_for_rename_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct rename_info *info = p; + + if (inode == info->src_dir) { + bi->bi_nlink -= S_ISDIR(info->src_inode->v.i_mode); + bi->bi_nlink += info->dst_inode && + S_ISDIR(info->dst_inode->v.i_mode) && + info->mode == BCH_RENAME_EXCHANGE; + } + + if (inode == info->dst_dir) { + bi->bi_nlink += S_ISDIR(info->src_inode->v.i_mode); + bi->bi_nlink -= info->dst_inode && + S_ISDIR(info->dst_inode->v.i_mode); + } + + if (inode == info->dst_inode && + info->mode == BCH_RENAME_OVERWRITE) { + BUG_ON(bi->bi_nlink && + S_ISDIR(info->dst_inode->v.i_mode)); + + if (bi->bi_nlink) + bi->bi_nlink--; + else + bi->bi_flags |= BCH_INODE_UNLINKED; + } + + if (inode == info->src_dir || + inode == info->dst_dir) + bi->bi_mtime = info->now; + bi->bi_ctime = info->now; + + return 0; +} + +static int bch2_rename2(struct mnt_idmap *idmap, + struct inode *src_vdir, struct dentry *src_dentry, + struct inode *dst_vdir, struct dentry *dst_dentry, + unsigned flags) +{ + struct bch_fs *c = src_vdir->i_sb->s_fs_info; + struct rename_info i = { + .now = timespec_to_bch2_time(c, + current_time(src_vdir)), + .src_dir = to_bch_ei(src_vdir), + .dst_dir = to_bch_ei(dst_vdir), + .src_inode = to_bch_ei(src_dentry->d_inode), + .dst_inode = to_bch_ei(dst_dentry->d_inode), + .mode = flags & RENAME_EXCHANGE + ? BCH_RENAME_EXCHANGE + : dst_dentry->d_inode + ? BCH_RENAME_OVERWRITE : BCH_RENAME, + }; + struct btree_trans trans; + struct bch_inode_unpacked dst_dir_u, src_dir_u; + struct bch_inode_unpacked src_inode_u, dst_inode_u; + u64 journal_seq = 0; + int ret; + + if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE)) + return -EINVAL; + + if (i.mode == BCH_RENAME_OVERWRITE) { + if (S_ISDIR(i.src_inode->v.i_mode) != + S_ISDIR(i.dst_inode->v.i_mode)) + return -ENOTDIR; + + if (S_ISDIR(i.src_inode->v.i_mode) && + bch2_empty_dir(c, i.dst_inode->v.i_ino)) + return -ENOTEMPTY; + + ret = filemap_write_and_wait_range(i.src_inode->v.i_mapping, + 0, LLONG_MAX); + if (ret) + return ret; + } + + bch2_trans_init(&trans, c); +retry: + bch2_trans_begin(&trans); + i.now = timespec_to_bch2_time(c, current_time(src_vdir)), + + ret = bch2_dirent_rename(&trans, + i.src_dir, &src_dentry->d_name, + i.dst_dir, &dst_dentry->d_name, + i.mode) ?: + bch2_write_inode_trans(&trans, i.src_dir, &src_dir_u, + inode_update_for_rename_fn, &i) ?: + (i.src_dir != i.dst_dir + ? bch2_write_inode_trans(&trans, i.dst_dir, &dst_dir_u, + inode_update_for_rename_fn, &i) + : 0 ) ?: + bch2_write_inode_trans(&trans, i.src_inode, &src_inode_u, + inode_update_for_rename_fn, &i) ?: + (i.dst_inode + ? bch2_write_inode_trans(&trans, i.dst_inode, &dst_inode_u, + inode_update_for_rename_fn, &i) + : 0 ) ?: + bch2_trans_commit(&trans, NULL, NULL, + &journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK); + if (ret == -EINTR) + goto retry; + if (unlikely(ret)) + goto err; + + bch2_inode_update_after_write(c, i.src_dir, &src_dir_u, + ATTR_MTIME|ATTR_CTIME); + journal_seq_copy(i.src_dir, journal_seq); + + if (i.src_dir != i.dst_dir) { + bch2_inode_update_after_write(c, i.dst_dir, &dst_dir_u, + ATTR_MTIME|ATTR_CTIME); + journal_seq_copy(i.dst_dir, journal_seq); + } + + bch2_inode_update_after_write(c, i.src_inode, &src_inode_u, + ATTR_CTIME); + if (i.dst_inode) + bch2_inode_update_after_write(c, i.dst_inode, &dst_inode_u, + ATTR_CTIME); +err: + bch2_trans_exit(&trans); + + return ret; +} + +struct inode_write_setattr { + struct iattr *attr; + struct mnt_idmap *idmap; +}; + +static int inode_update_for_setattr_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct inode_write_setattr *s = p; + unsigned int ia_valid = s->attr->ia_valid; + + if (ia_valid & ATTR_UID) + bi->bi_uid = from_kuid(i_user_ns(&inode->v), s->attr->ia_uid); + if (ia_valid & ATTR_GID) + bi->bi_gid = from_kgid(i_user_ns(&inode->v), s->attr->ia_gid); + + if (ia_valid & ATTR_ATIME) + bi->bi_atime = timespec_to_bch2_time(c, s->attr->ia_atime); + if (ia_valid & ATTR_MTIME) + bi->bi_mtime = timespec_to_bch2_time(c, s->attr->ia_mtime); + if (ia_valid & ATTR_CTIME) + bi->bi_ctime = timespec_to_bch2_time(c, s->attr->ia_ctime); + + if (ia_valid & ATTR_MODE) { + umode_t mode = s->attr->ia_mode; + kgid_t gid = ia_valid & ATTR_GID + ? s->attr->ia_gid + : inode->v.i_gid; + + if (!in_group_p(gid) && + !capable_wrt_inode_uidgid(s->idmap, &inode->v, CAP_FSETID)) + mode &= ~S_ISGID; + bi->bi_mode = mode; + } + + return 0; +} + +static int bch2_setattr_nonsize(struct mnt_idmap *idmap, + struct bch_inode_info *inode, + struct iattr *iattr) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_qid qid = inode->ei_qid; + struct btree_trans trans; + struct bch_inode_unpacked inode_u; + struct posix_acl *acl = NULL; + struct inode_write_setattr s = { iattr, idmap }; + unsigned qtypes = 0; + int ret; + + mutex_lock(&inode->ei_update_lock); + + if (c->opts.usrquota && + (iattr->ia_valid & ATTR_UID) && + !uid_eq(iattr->ia_uid, inode->v.i_uid)) { + qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), iattr->ia_uid), + qtypes |= 1 << QTYP_USR; + } + + if (c->opts.grpquota && + (iattr->ia_valid & ATTR_GID) && + !gid_eq(iattr->ia_gid, inode->v.i_gid)) { + qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), iattr->ia_gid); + qtypes |= 1 << QTYP_GRP; + } + + if (qtypes) { + ret = bch2_quota_transfer(c, qtypes, qid, inode->ei_qid, + inode->v.i_blocks + + inode->ei_quota_reserved); + if (ret) + goto err; + } + + bch2_trans_init(&trans, c); +retry: + bch2_trans_begin(&trans); + kfree(acl); + acl = NULL; + + ret = bch2_write_inode_trans(&trans, inode, &inode_u, + inode_update_for_setattr_fn, &s) ?: + (iattr->ia_valid & ATTR_MODE + ? bch2_acl_chmod(&trans, inode, iattr->ia_mode, &acl) + : 0) ?: + bch2_trans_commit(&trans, NULL, NULL, + &inode->ei_journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK| + BTREE_INSERT_NOFAIL); + if (ret == -EINTR) + goto retry; + if (unlikely(ret)) + goto err_trans; + + bch2_inode_update_after_write(c, inode, &inode_u, iattr->ia_valid); + + if (acl) + set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); +err_trans: + bch2_trans_exit(&trans); +err: + mutex_unlock(&inode->ei_update_lock); + + return ret; +} + +static int bch2_getattr(struct mnt_idmap *idmap, + const struct path *path, struct kstat *stat, + u32 request_mask, unsigned query_flags) +{ + struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry)); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + + stat->dev = inode->v.i_sb->s_dev; + stat->ino = inode->v.i_ino; + stat->mode = inode->v.i_mode; + stat->nlink = inode->v.i_nlink; + stat->uid = inode->v.i_uid; + stat->gid = inode->v.i_gid; + stat->rdev = inode->v.i_rdev; + stat->size = i_size_read(&inode->v); + stat->atime = inode->v.i_atime; + stat->mtime = inode->v.i_mtime; + stat->ctime = inode_get_ctime(&inode->v); + stat->blksize = block_bytes(c); + stat->blocks = inode->v.i_blocks; + + if (request_mask & STATX_BTIME) { + stat->result_mask |= STATX_BTIME; + stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime); + } + + if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (inode->ei_inode.bi_flags & BCH_INODE_APPEND) + stat->attributes |= STATX_ATTR_APPEND; + if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP) + stat->attributes |= STATX_ATTR_NODUMP; + + return 0; +} + +static int bch2_setattr(struct mnt_idmap *idmap, + struct dentry *dentry, struct iattr *iattr) +{ + struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); + int ret; + + lockdep_assert_held(&inode->v.i_rwsem); + + ret = setattr_prepare(idmap, dentry, iattr); + if (ret) + return ret; + + return iattr->ia_valid & ATTR_SIZE + ? bch2_truncate(inode, iattr) + : bch2_setattr_nonsize(idmap, inode, iattr); +} + +static int bch2_tmpfile(struct mnt_idmap *idmap, + struct inode *vdir, struct file *file, umode_t mode) +{ + struct bch_inode_info *inode = + __bch2_create(idmap, to_bch_ei(vdir), + file->f_path.dentry, mode, 0, true); + + if (IS_ERR(inode)) + return PTR_ERR(inode); + + d_mark_tmpfile(file, &inode->v); + d_instantiate(file->f_path.dentry, &inode->v); + return finish_open_simple(file, 0); +} + +static int bch2_fill_extent(struct fiemap_extent_info *info, + const struct bkey_i *k, unsigned flags) +{ + if (bkey_extent_is_data(&k->k)) { + struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k); + const struct bch_extent_ptr *ptr; + struct bch_extent_crc_unpacked crc; + int ret; + + extent_for_each_ptr_crc(e, ptr, crc) { + int flags2 = 0; + u64 offset = ptr->offset; + + if (crc.compression_type) + flags2 |= FIEMAP_EXTENT_ENCODED; + else + offset += crc.offset; + + if ((offset & (PAGE_SECTORS - 1)) || + (e.k->size & (PAGE_SECTORS - 1))) + flags2 |= FIEMAP_EXTENT_NOT_ALIGNED; + + ret = fiemap_fill_next_extent(info, + bkey_start_offset(e.k) << 9, + offset << 9, + e.k->size << 9, flags|flags2); + if (ret) + return ret; + } + + return 0; + } else if (k->k.type == BCH_RESERVATION) { + return fiemap_fill_next_extent(info, + bkey_start_offset(&k->k) << 9, + 0, k->k.size << 9, + flags| + FIEMAP_EXTENT_DELALLOC| + FIEMAP_EXTENT_UNWRITTEN); + } else { + BUG(); + } +} + +static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, + u64 start, u64 len) +{ + struct bch_fs *c = vinode->i_sb->s_fs_info; + struct bch_inode_info *ei = to_bch_ei(vinode); + struct btree_iter iter; + struct bkey_s_c k; + BKEY_PADDED(k) tmp; + bool have_extent = false; + int ret = 0; + + ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC); + if (ret) + return ret; + + if (start + len < start) + return -EINVAL; + + for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + POS(ei->v.i_ino, start >> 9), 0, k) + if (bkey_extent_is_data(k.k) || + k.k->type == BCH_RESERVATION) { + if (bkey_cmp(bkey_start_pos(k.k), + POS(ei->v.i_ino, (start + len) >> 9)) >= 0) + break; + + if (have_extent) { + ret = bch2_fill_extent(info, &tmp.k, 0); + if (ret) + goto out; + } + + bkey_reassemble(&tmp.k, k); + have_extent = true; + } + + if (have_extent) + ret = bch2_fill_extent(info, &tmp.k, FIEMAP_EXTENT_LAST); +out: + bch2_btree_iter_unlock(&iter); + return ret < 0 ? ret : 0; +} + +static const struct vm_operations_struct bch_vm_ops = { + .fault = bch2_page_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = bch2_page_mkwrite, +}; + +static int bch2_mmap(struct file *file, struct vm_area_struct *vma) +{ + file_accessed(file); + + vma->vm_ops = &bch_vm_ops; + return 0; +} + +/* Directories: */ + +static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence) +{ + return generic_file_llseek_size(file, offset, whence, + S64_MAX, S64_MAX); +} + +static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) +{ + struct bch_fs *c = file_inode(file)->i_sb->s_fs_info; + + return bch2_readdir(c, file, ctx); +} + +static const struct file_operations bch_file_operations = { + .llseek = bch2_llseek, + .read_iter = bch2_read_iter, + .write_iter = bch2_write_iter, + .mmap = bch2_mmap, + .open = generic_file_open, + .fsync = bch2_fsync, + .splice_read = filemap_splice_read, + .splice_write = iter_file_splice_write, + .fallocate = bch2_fallocate_dispatch, + .unlocked_ioctl = bch2_fs_file_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = bch2_compat_fs_ioctl, +#endif +}; + +static const struct inode_operations bch_file_inode_operations = { + .getattr = bch2_getattr, + .setattr = bch2_setattr, + .fiemap = bch2_fiemap, + .listxattr = bch2_xattr_list, +#ifdef CONFIG_BCACHEFS_POSIX_ACL + .get_acl = bch2_get_acl, + .set_acl = bch2_set_acl, +#endif +}; + +static const struct inode_operations bch_dir_inode_operations = { + .lookup = bch2_lookup, + .create = bch2_create, + .link = bch2_link, + .unlink = bch2_unlink, + .symlink = bch2_symlink, + .mkdir = bch2_mkdir, + .rmdir = bch2_rmdir, + .mknod = bch2_mknod, + .rename = bch2_rename2, + .getattr = bch2_getattr, + .setattr = bch2_setattr, + .tmpfile = bch2_tmpfile, + .listxattr = bch2_xattr_list, +#ifdef CONFIG_BCACHEFS_POSIX_ACL + .get_acl = bch2_get_acl, + .set_acl = bch2_set_acl, +#endif +}; + +static const struct file_operations bch_dir_file_operations = { + .llseek = bch2_dir_llseek, + .read = generic_read_dir, + .iterate_shared = bch2_vfs_readdir, + .fsync = bch2_fsync, + .unlocked_ioctl = bch2_fs_file_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = bch2_compat_fs_ioctl, +#endif +}; + +static const struct inode_operations bch_symlink_inode_operations = { + .get_link = page_get_link, + .getattr = bch2_getattr, + .setattr = bch2_setattr, + .listxattr = bch2_xattr_list, +#ifdef CONFIG_BCACHEFS_POSIX_ACL + .get_acl = bch2_get_acl, + .set_acl = bch2_set_acl, +#endif +}; + +static const struct inode_operations bch_special_inode_operations = { + .getattr = bch2_getattr, + .setattr = bch2_setattr, + .listxattr = bch2_xattr_list, +#ifdef CONFIG_BCACHEFS_POSIX_ACL + .get_acl = bch2_get_acl, + .set_acl = bch2_set_acl, +#endif +}; + +static const struct address_space_operations bch_address_space_operations = { + .writepage = bch2_writepage, + .read_folio = bch2_read_folio, + .writepages = bch2_writepages, + .readahead = bch2_readahead, + .dirty_folio = bch2_dirty_folio, + .write_begin = bch2_write_begin, + .write_end = bch2_write_end, + .invalidate_folio = bch2_invalidate_folio, + .release_folio = bch2_release_folio, + .direct_IO = noop_direct_IO, +#ifdef CONFIG_MIGRATION + .migrate_folio = filemap_migrate_folio, +#endif + .error_remove_page = generic_error_remove_page, +}; + +static struct inode *bch2_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) +{ + struct bch_fs *c = sb->s_fs_info; + struct inode *vinode; + + if (ino < BCACHEFS_ROOT_INO) + return ERR_PTR(-ESTALE); + + vinode = bch2_vfs_inode_get(c, ino); + if (IS_ERR(vinode)) + return ERR_CAST(vinode); + if (generation && vinode->i_generation != generation) { + /* we didn't find the right inode.. */ + iput(vinode); + return ERR_PTR(-ESTALE); + } + return vinode; +} + +static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + bch2_nfs_get_inode); +} + +static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + bch2_nfs_get_inode); +} + +static const struct export_operations bch_export_ops = { + .fh_to_dentry = bch2_fh_to_dentry, + .fh_to_parent = bch2_fh_to_parent, + //.get_parent = bch2_get_parent, +}; + +static void bch2_vfs_inode_init(struct bch_fs *c, + struct bch_inode_info *inode, + struct bch_inode_unpacked *bi) +{ + bch2_inode_update_after_write(c, inode, bi, ~0); + + inode->v.i_blocks = bi->bi_sectors; + inode->v.i_ino = bi->bi_inum; + inode->v.i_rdev = bi->bi_dev; + inode->v.i_generation = bi->bi_generation; + inode->v.i_size = bi->bi_size; + + inode->ei_journal_seq = 0; + inode->ei_quota_reserved = 0; + inode->ei_str_hash = bch2_hash_info_init(c, bi); + + bch2_inode_flags_to_vfs(inode); + + inode->v.i_mapping->a_ops = &bch_address_space_operations; + + switch (inode->v.i_mode & S_IFMT) { + case S_IFREG: + inode->v.i_op = &bch_file_inode_operations; + inode->v.i_fop = &bch_file_operations; + break; + case S_IFDIR: + inode->v.i_op = &bch_dir_inode_operations; + inode->v.i_fop = &bch_dir_file_operations; + break; + case S_IFLNK: + inode_nohighmem(&inode->v); + inode->v.i_op = &bch_symlink_inode_operations; + break; + default: + init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev); + inode->v.i_op = &bch_special_inode_operations; + break; + } +} + +static struct inode *bch2_alloc_inode(struct super_block *sb) +{ + struct bch_inode_info *inode; + + inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS); + if (!inode) + return NULL; + + inode_init_once(&inode->v); + mutex_init(&inode->ei_update_lock); + pagecache_lock_init(&inode->ei_pagecache_lock); + mutex_init(&inode->ei_quota_lock); + inode->ei_journal_seq = 0; + + return &inode->v; +} + +static void bch2_i_callback(struct rcu_head *head) +{ + struct inode *vinode = container_of(head, struct inode, i_rcu); + struct bch_inode_info *inode = to_bch_ei(vinode); + + kmem_cache_free(bch2_inode_cache, inode); +} + +static void bch2_destroy_inode(struct inode *vinode) +{ + call_rcu(&vinode->i_rcu, bch2_i_callback); +} + +static int inode_update_times_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + + bi->bi_atime = timespec_to_bch2_time(c, inode->v.i_atime); + bi->bi_mtime = timespec_to_bch2_time(c, inode->v.i_mtime); + bi->bi_ctime = timespec_to_bch2_time(c, inode_get_ctime(&inode->v)); + + return 0; +} + +static int bch2_vfs_write_inode(struct inode *vinode, + struct writeback_control *wbc) +{ + struct bch_fs *c = vinode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(vinode); + int ret; + + mutex_lock(&inode->ei_update_lock); + ret = __bch2_write_inode(c, inode, inode_update_times_fn, NULL, + ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); + mutex_unlock(&inode->ei_update_lock); + + if (c->opts.journal_flush_disabled) + return ret; + + if (!ret && wbc->sync_mode == WB_SYNC_ALL) + ret = bch2_journal_flush_seq(&c->journal, inode->ei_journal_seq); + + return ret; +} + +static void bch2_evict_inode(struct inode *vinode) +{ + struct bch_fs *c = vinode->i_sb->s_fs_info; + struct bch_inode_info *inode = to_bch_ei(vinode); + + truncate_inode_pages_final(&inode->v.i_data); + + clear_inode(&inode->v); + + BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved); + + if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) { + bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks), + BCH_QUOTA_WARN); + bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, + BCH_QUOTA_WARN); + bch2_inode_rm(c, inode->v.i_ino); + + WARN_ONCE(atomic_long_dec_return(&c->nr_inodes) < 0, + "nr_inodes < 0"); + } +} + +static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct bch_fs *c = sb->s_fs_info; + u64 fsid; + + buf->f_type = BCACHEFS_STATFS_MAGIC; + buf->f_bsize = sb->s_blocksize; + buf->f_blocks = c->capacity >> PAGE_SECTOR_SHIFT; + buf->f_bfree = bch2_fs_sectors_free(c, bch2_fs_usage_read(c)) >> + PAGE_SECTOR_SHIFT; + buf->f_bavail = buf->f_bfree; + buf->f_files = atomic_long_read(&c->nr_inodes); + buf->f_ffree = U64_MAX; + + fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^ + le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64)); + buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; + buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; + buf->f_namelen = BCH_NAME_MAX; + + return 0; +} + +static int bch2_sync_fs(struct super_block *sb, int wait) +{ + struct bch_fs *c = sb->s_fs_info; + + if (!wait) { + bch2_journal_flush_async(&c->journal, NULL); + return 0; + } + + return bch2_journal_flush(&c->journal); +} + +static struct bch_fs *bch2_path_to_fs(const char *path) +{ + struct bch_fs *c; + dev_t dev; + int ret; + + ret = lookup_bdev(path, &dev); + if (ret) + return ERR_PTR(ret); + + c = bch2_dev_to_fs(dev); + return c ?: ERR_PTR(-ENOENT); +} + +static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * const *devs, + unsigned nr_devs, struct bch_opts opts) +{ + struct bch_fs *c, *c1, *c2; + size_t i; + + if (!nr_devs) + return ERR_PTR(-EINVAL); + + c = bch2_fs_open(devs, nr_devs, opts); + + if (IS_ERR(c) && PTR_ERR(c) == -EBUSY) { + /* + * Already open? + * Look up each block device, make sure they all belong to a + * filesystem and they all belong to the _same_ filesystem + */ + + c1 = bch2_path_to_fs(devs[0]); + if (!c1) + return c; + + for (i = 1; i < nr_devs; i++) { + c2 = bch2_path_to_fs(devs[i]); + if (!IS_ERR(c2)) + closure_put(&c2->cl); + + if (c1 != c2) { + closure_put(&c1->cl); + return c; + } + } + + c = c1; + } + + if (IS_ERR(c)) + return c; + + mutex_lock(&c->state_lock); + + if (!bch2_fs_running(c)) { + mutex_unlock(&c->state_lock); + closure_put(&c->cl); + pr_err("err mounting %s: incomplete filesystem", dev_name); + return ERR_PTR(-EINVAL); + } + + mutex_unlock(&c->state_lock); + + set_bit(BCH_FS_BDEV_MOUNTED, &c->flags); + return c; +} + +static struct bch_fs *bch2_open_as_blockdevs(const char *_dev_name, + struct bch_opts opts) +{ + char *dev_name = NULL, **devs = NULL, *s; + struct bch_fs *c = ERR_PTR(-ENOMEM); + size_t i, nr_devs = 0; + + dev_name = kstrdup(_dev_name, GFP_KERNEL); + if (!dev_name) + goto err; + + for (s = dev_name; s; s = strchr(s + 1, ':')) + nr_devs++; + + devs = kcalloc(nr_devs, sizeof(const char *), GFP_KERNEL); + if (!devs) + goto err; + + for (i = 0, s = dev_name; + s; + (s = strchr(s, ':')) && (*s++ = '\0')) + devs[i++] = s; + + c = __bch2_open_as_blockdevs(_dev_name, devs, nr_devs, opts); +err: + kfree(devs); + kfree(dev_name); + return c; +} + +static int bch2_remount(struct super_block *sb, int *flags, char *data) +{ + struct bch_fs *c = sb->s_fs_info; + struct bch_opts opts = bch2_opts_empty(); + int ret; + + opt_set(opts, read_only, (*flags & SB_RDONLY) != 0); + + ret = bch2_parse_mount_opts(&opts, data); + if (ret) + return ret; + + if (opts.read_only != c->opts.read_only) { + const char *err = NULL; + + mutex_lock(&c->state_lock); + + if (opts.read_only) { + bch2_fs_read_only(c); + + sb->s_flags |= SB_RDONLY; + } else { + err = bch2_fs_read_write(c); + if (err) { + bch_err(c, "error going rw: %s", err); + return -EINVAL; + } + + sb->s_flags &= ~SB_RDONLY; + } + + c->opts.read_only = opts.read_only; + + mutex_unlock(&c->state_lock); + } + + if (opts.errors >= 0) + c->opts.errors = opts.errors; + + return ret; +} + +static int bch2_show_options(struct seq_file *seq, struct dentry *root) +{ + struct bch_fs *c = root->d_sb->s_fs_info; + enum bch_opt_id i; + char buf[512]; + + for (i = 0; i < bch2_opts_nr; i++) { + const struct bch_option *opt = &bch2_opt_table[i]; + u64 v = bch2_opt_get_by_id(&c->opts, i); + + if (opt->mode < OPT_MOUNT) + continue; + + if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) + continue; + + bch2_opt_to_text(c, buf, sizeof(buf), opt, v, + OPT_SHOW_MOUNT_STYLE); + seq_putc(seq, ','); + seq_puts(seq, buf); + } + + return 0; + +} + +static const struct super_operations bch_super_operations = { + .alloc_inode = bch2_alloc_inode, + .destroy_inode = bch2_destroy_inode, + .write_inode = bch2_vfs_write_inode, + .evict_inode = bch2_evict_inode, + .sync_fs = bch2_sync_fs, + .statfs = bch2_statfs, + .show_options = bch2_show_options, + .remount_fs = bch2_remount, +#if 0 + .put_super = bch2_put_super, + .freeze_fs = bch2_freeze, + .unfreeze_fs = bch2_unfreeze, +#endif +}; + +static int bch2_test_super(struct super_block *s, void *data) +{ + return s->s_fs_info == data; +} + +static int bch2_set_super(struct super_block *s, void *data) +{ + s->s_fs_info = data; + return 0; +} + +static struct dentry *bch2_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + struct bch_fs *c; + struct bch_dev *ca; + struct super_block *sb; + struct inode *vinode; + struct bch_opts opts = bch2_opts_empty(); + unsigned i; + int ret; + + opt_set(opts, read_only, (flags & SB_RDONLY) != 0); + + ret = bch2_parse_mount_opts(&opts, data); + if (ret) + return ERR_PTR(ret); + + c = bch2_open_as_blockdevs(dev_name, opts); + if (IS_ERR(c)) + return ERR_CAST(c); + + sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|SB_NOSEC, c); + if (IS_ERR(sb)) { + closure_put(&c->cl); + return ERR_CAST(sb); + } + + BUG_ON(sb->s_fs_info != c); + + if (sb->s_root) { + closure_put(&c->cl); + + if ((flags ^ sb->s_flags) & SB_RDONLY) { + ret = -EBUSY; + goto err_put_super; + } + goto out; + } + + /* XXX: blocksize */ + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_op = &bch_super_operations; + sb->s_export_op = &bch_export_ops; +#ifdef CONFIG_BCACHEFS_QUOTA + sb->s_qcop = &bch2_quotactl_operations; + sb->s_quota_types = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ; +#endif + sb->s_xattr = bch2_xattr_handlers; + sb->s_magic = BCACHEFS_STATFS_MAGIC; + sb->s_time_gran = c->sb.time_precision; + c->vfs_sb = sb; + strlcpy(sb->s_id, c->name, sizeof(sb->s_id)); + + ret = super_setup_bdi(sb); + if (ret) + goto err_put_super; + + sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; + + for_each_online_member(ca, c, i) { + struct block_device *bdev = ca->disk_sb.bdev; + + /* XXX: create an anonymous device for multi device filesystems */ + sb->s_bdev = bdev; + sb->s_dev = bdev->bd_dev; + percpu_ref_put(&ca->io_ref); + break; + } + +#ifdef CONFIG_BCACHEFS_POSIX_ACL + if (c->opts.acl) + sb->s_flags |= SB_POSIXACL; +#endif + + vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO); + if (IS_ERR(vinode)) { + ret = PTR_ERR(vinode); + goto err_put_super; + } + + sb->s_root = d_make_root(vinode); + if (!sb->s_root) { + ret = -ENOMEM; + goto err_put_super; + } + + sb->s_flags |= SB_ACTIVE; +out: + return dget(sb->s_root); + +err_put_super: + deactivate_locked_super(sb); + return ERR_PTR(ret); +} + +static void bch2_kill_sb(struct super_block *sb) +{ + struct bch_fs *c = sb->s_fs_info; + + generic_shutdown_super(sb); + + if (test_bit(BCH_FS_BDEV_MOUNTED, &c->flags)) + bch2_fs_stop(c); + else + closure_put(&c->cl); +} + +static struct file_system_type bcache_fs_type = { + .owner = THIS_MODULE, + .name = "bcachefs", + .mount = bch2_mount, + .kill_sb = bch2_kill_sb, + .fs_flags = FS_REQUIRES_DEV, +}; + +MODULE_ALIAS_FS("bcachefs"); + +void bch2_vfs_exit(void) +{ + unregister_filesystem(&bcache_fs_type); + if (bch2_inode_cache) + kmem_cache_destroy(bch2_inode_cache); +} + +int __init bch2_vfs_init(void) +{ + int ret = -ENOMEM; + + bch2_inode_cache = KMEM_CACHE(bch_inode_info, 0); + if (!bch2_inode_cache) + goto err; + + ret = register_filesystem(&bcache_fs_type); + if (ret) + goto err; + + return 0; +err: + bch2_vfs_exit(); + return ret; +} + +#endif /* NO_BCACHEFS_FS */ -- cgit v1.2.3 From 4e1ec2cc0d82f1d4344e7b5a53229c9ccde8437d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 17 Jul 2018 14:03:47 -0400 Subject: bcachefs: Simplify bch2_write_inode_trans, fix lockdep splat ei_update_lock isn't currently needed for write inode (but it will be needed again when deferred btree updates are used for inode updates) Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 6 ++++-- fs/bcachefs/fs.c | 33 +++++++++------------------------ 2 files changed, 13 insertions(+), 26 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index b53fbdc15c87..29d289b0dfa5 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -355,8 +355,6 @@ bchfs_extent_update_hook(struct extent_insert_hook *hook, h->inode_u.bi_size = offset; do_pack = true; - inode->ei_inode.bi_size = offset; - spin_lock(&inode->v.i_lock); if (offset > inode->v.i_size) { if (h->op->is_dio) @@ -478,6 +476,7 @@ static int bchfs_write_index_update(struct bch_write_op *wop) &hook.hook, op_journal_seq(wop), BTREE_INSERT_NOFAIL| BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK| BTREE_INSERT_USE_RESERVE, BTREE_INSERT_ENTRY(extent_iter, k)); } @@ -493,6 +492,9 @@ err: if (ret) break; + if (hook.need_inode_update) + op->inode->ei_inode = hook.inode_u; + BUG_ON(bkey_cmp(extent_iter->pos, k->k.p) < 0); bch2_keylist_pop_front(keys); } while (!bch2_keylist_empty(keys)); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3f3d916e0d37..53107d02cbb6 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -157,35 +157,20 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, { struct btree_iter *iter; struct bkey_inode_buf *inode_p; - struct bkey_s_c k; - u64 inum = inode->v.i_ino; int ret; - lockdep_assert_held(&inode->ei_update_lock); - - iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(inum, 0), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, + POS(inode->v.i_ino, 0), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); if (IS_ERR(iter)) return PTR_ERR(iter); - k = bch2_btree_iter_peek_slot(iter); - if ((ret = btree_iter_err(k))) + /* The btree node lock is our lock on the inode: */ + ret = bch2_btree_iter_traverse(iter); + if (ret) return ret; - if (WARN_ONCE(k.k->type != BCH_INODE_FS, - "inode %llu not found when updating", inum)) - return -ENOENT; - - ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode_u); - if (WARN_ONCE(ret, - "error %i unpacking inode %llu", ret, inum)) - return -ENOENT; - - BUG_ON(inode_u->bi_size != inode->ei_inode.bi_size); - - BUG_ON(inode_u->bi_size != inode->ei_inode.bi_size && - !(inode_u->bi_flags & BCH_INODE_I_SIZE_DIRTY) && - inode_u->bi_size > i_size_read(&inode->v)); + *inode_u = inode->ei_inode; if (set) { ret = set(inode, inode_u, p); @@ -505,8 +490,6 @@ static int __bch2_link(struct bch_fs *c, struct bch_inode_unpacked inode_u; int ret; - lockdep_assert_held(&inode->v.i_rwsem); - bch2_trans_init(&trans, c); retry: bch2_trans_begin(&trans); @@ -543,6 +526,8 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir, struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode); int ret; + lockdep_assert_held(&inode->v.i_rwsem); + ret = __bch2_link(c, inode, dir, dentry); if (unlikely(ret)) return ret; -- cgit v1.2.3 From 2ea9004864b918be34e742e38fb08d868600d020 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 17 Jul 2018 14:12:42 -0400 Subject: bcachefs: Fix mtime/ctime updates Also make inode flags consistent with how the rest of the inode is updated Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 3 +- fs/bcachefs/fs-io.c | 45 ++++++++++++++++-------- fs/bcachefs/fs-ioctl.c | 92 +++++--------------------------------------------- fs/bcachefs/fs-ioctl.h | 73 ++++++++++++++++++++++++++++++++++++++- fs/bcachefs/fs.c | 32 +++++++----------- fs/bcachefs/fs.h | 7 ++-- fs/bcachefs/xattr.c | 2 +- 7 files changed, 130 insertions(+), 124 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index eaf5c8e138fb..7ee2022d9501 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -286,10 +286,9 @@ static int inode_update_for_set_acl_fn(struct bch_inode_info *inode, void *p) { struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct timespec64 now = current_time(&inode->v); umode_t mode = (unsigned long) p; - bi->bi_ctime = timespec_to_bch2_time(c, now); + bi->bi_ctime = bch2_current_time(c); bi->bi_mode = mode; return 0; } diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 29d289b0dfa5..33c379ecf5a1 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -177,23 +177,40 @@ static int bch2_quota_reservation_add(struct bch_fs *c, /* i_size updates: */ +struct inode_new_size { + loff_t new_size; + u64 now; + unsigned fields; +}; + static int inode_set_size(struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { - loff_t *new_i_size = p; + struct inode_new_size *s = p; - lockdep_assert_held(&inode->ei_update_lock); + bi->bi_size = s->new_size; + if (s->fields & ATTR_ATIME) + bi->bi_atime = s->now; + if (s->fields & ATTR_MTIME) + bi->bi_mtime = s->now; + if (s->fields & ATTR_CTIME) + bi->bi_ctime = s->now; - bi->bi_size = *new_i_size; return 0; } static int __must_check bch2_write_inode_size(struct bch_fs *c, struct bch_inode_info *inode, - loff_t new_size) + loff_t new_size, unsigned fields) { - return __bch2_write_inode(c, inode, inode_set_size, &new_size, 0); + struct inode_new_size s = { + .new_size = new_size, + .now = bch2_current_time(c), + .fields = fields, + }; + + return bch2_write_inode(c, inode, inode_set_size, &s, fields); } static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, @@ -241,6 +258,7 @@ static int i_sectors_dirty_finish_fn(struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { + struct bch_fs *c = inode->v.i_sb->s_fs_info; struct i_sectors_hook *h = p; if (h->new_i_size != U64_MAX && @@ -249,6 +267,7 @@ static int i_sectors_dirty_finish_fn(struct bch_inode_info *inode, bi->bi_size = h->new_i_size; bi->bi_sectors += h->sectors; bi->bi_flags &= ~h->flags; + bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); return 0; } @@ -259,7 +278,7 @@ static int i_sectors_dirty_finish(struct bch_fs *c, struct i_sectors_hook *h) mutex_lock(&h->inode->ei_update_lock); i_sectors_acct(c, h->inode, &h->quota_res, h->sectors); - ret = __bch2_write_inode(c, h->inode, i_sectors_dirty_finish_fn, h, 0); + ret = bch2_write_inode(c, h->inode, i_sectors_dirty_finish_fn, h, 0); if (!ret && h->new_i_size != U64_MAX) i_size_write(&h->inode->v, h->new_i_size); @@ -289,7 +308,7 @@ static int i_sectors_dirty_start(struct bch_fs *c, struct i_sectors_hook *h) int ret; mutex_lock(&h->inode->ei_update_lock); - ret = __bch2_write_inode(c, h->inode, i_sectors_dirty_start_fn, h, 0); + ret = bch2_write_inode(c, h->inode, i_sectors_dirty_start_fn, h, 0); mutex_unlock(&h->inode->ei_update_lock); return ret; @@ -2223,9 +2242,8 @@ static int bch2_extend(struct bch_inode_info *inode, struct iattr *iattr) setattr_copy(NULL, &inode->v, iattr); mutex_lock(&inode->ei_update_lock); - inode_set_ctime_current(&inode->v); - inode->v.i_mtime = inode_get_ctime(&inode->v); - ret = bch2_write_inode_size(c, inode, inode->v.i_size); + ret = bch2_write_inode_size(c, inode, inode->v.i_size, + ATTR_MTIME|ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); return ret; @@ -2284,8 +2302,6 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) /* ATTR_MODE will never be set here, ns argument isn't needed: */ setattr_copy(NULL, &inode->v, iattr); - inode_set_ctime_current(&inode->v); - inode->v.i_mtime = inode_get_ctime(&inode->v); out: ret = i_sectors_dirty_finish(c, &i_sectors_hook) ?: ret; err_put_pagecache: @@ -2617,7 +2633,7 @@ btree_iter_err: i_size_write(&inode->v, end); mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode_size(c, inode, inode->v.i_size); + ret = bch2_write_inode_size(c, inode, inode->v.i_size, 0); mutex_unlock(&inode->ei_update_lock); } @@ -2633,7 +2649,8 @@ btree_iter_err: if (inode->ei_inode.bi_size != inode->v.i_size) { mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode_size(c, inode, inode->v.i_size); + ret = bch2_write_inode_size(c, inode, + inode->v.i_size, 0); mutex_unlock(&inode->ei_update_lock); } } diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 895ccc79e782..a89786f295cf 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -12,79 +12,6 @@ #define FS_IOC_GOINGDOWN _IOR('X', 125, __u32) -/* Inode flags: */ - -/* bcachefs inode flags -> vfs inode flags: */ -static const unsigned bch_flags_to_vfs[] = { - [__BCH_INODE_SYNC] = S_SYNC, - [__BCH_INODE_IMMUTABLE] = S_IMMUTABLE, - [__BCH_INODE_APPEND] = S_APPEND, - [__BCH_INODE_NOATIME] = S_NOATIME, -}; - -/* bcachefs inode flags -> FS_IOC_GETFLAGS: */ -static const unsigned bch_flags_to_uflags[] = { - [__BCH_INODE_SYNC] = FS_SYNC_FL, - [__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL, - [__BCH_INODE_APPEND] = FS_APPEND_FL, - [__BCH_INODE_NODUMP] = FS_NODUMP_FL, - [__BCH_INODE_NOATIME] = FS_NOATIME_FL, -}; - -/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ -static const unsigned bch_flags_to_xflags[] = { - [__BCH_INODE_SYNC] = FS_XFLAG_SYNC, - [__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE, - [__BCH_INODE_APPEND] = FS_XFLAG_APPEND, - [__BCH_INODE_NODUMP] = FS_XFLAG_NODUMP, - [__BCH_INODE_NOATIME] = FS_XFLAG_NOATIME, - //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT; -}; - -#define set_flags(_map, _in, _out) \ -do { \ - unsigned _i; \ - \ - for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ - if ((_in) & (1 << _i)) \ - (_out) |= _map[_i]; \ - else \ - (_out) &= ~_map[_i]; \ -} while (0) - -#define map_flags(_map, _in) \ -({ \ - unsigned _out = 0; \ - \ - set_flags(_map, _in, _out); \ - _out; \ -}) - -#define map_flags_rev(_map, _in) \ -({ \ - unsigned _i, _out = 0; \ - \ - for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ - if ((_in) & _map[_i]) { \ - (_out) |= 1 << _i; \ - (_in) &= ~_map[_i]; \ - } \ - (_out); \ -}) - -#define map_defined(_map) \ -({ \ - unsigned _in = ~0; \ - \ - map_flags_rev(_map, _in); \ -}) - -/* Set VFS inode flags from bcachefs inode: */ -void bch2_inode_flags_to_vfs(struct bch_inode_info *inode) -{ - set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags); -} - struct flags_set { unsigned mask; unsigned flags; @@ -96,6 +23,7 @@ static int bch2_inode_flags_set(struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { + struct bch_fs *c = inode->v.i_sb->s_fs_info; /* * We're relying on btree locking here for exclusion with other ioctl * calls - use the flags in the btree (@bi), not inode->i_flags: @@ -108,14 +36,15 @@ static int bch2_inode_flags_set(struct bch_inode_info *inode, !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; - if (!S_ISREG(inode->v.i_mode) && - !S_ISDIR(inode->v.i_mode) && + if (!S_ISREG(bi->bi_mode) && + !S_ISDIR(bi->bi_mode) && (newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags) return -EINVAL; bi->bi_flags &= ~s->mask; bi->bi_flags |= newflags; - inode_set_ctime_current(&inode->v); + + bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); return 0; } @@ -153,10 +82,8 @@ static int bch2_ioc_setflags(struct bch_fs *c, } mutex_lock(&inode->ei_update_lock); - ret = __bch2_write_inode(c, inode, bch2_inode_flags_set, &s, 0); - - if (!ret) - bch2_inode_flags_to_vfs(inode); + ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s, + ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); setflags_out: @@ -242,9 +169,8 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c, if (ret) goto err_unlock; - ret = __bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, 0); - if (!ret) - bch2_inode_flags_to_vfs(inode); + ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, + ATTR_CTIME); err_unlock: mutex_unlock(&inode->ei_update_lock); err: diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h index 2d117ef80ab2..f201980ef2c3 100644 --- a/fs/bcachefs/fs-ioctl.h +++ b/fs/bcachefs/fs-ioctl.h @@ -2,7 +2,78 @@ #ifndef _BCACHEFS_FS_IOCTL_H #define _BCACHEFS_FS_IOCTL_H -void bch2_inode_flags_to_vfs(struct bch_inode_info *); +/* Inode flags: */ + +/* bcachefs inode flags -> vfs inode flags: */ +static const unsigned bch_flags_to_vfs[] = { + [__BCH_INODE_SYNC] = S_SYNC, + [__BCH_INODE_IMMUTABLE] = S_IMMUTABLE, + [__BCH_INODE_APPEND] = S_APPEND, + [__BCH_INODE_NOATIME] = S_NOATIME, +}; + +/* bcachefs inode flags -> FS_IOC_GETFLAGS: */ +static const unsigned bch_flags_to_uflags[] = { + [__BCH_INODE_SYNC] = FS_SYNC_FL, + [__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL, + [__BCH_INODE_APPEND] = FS_APPEND_FL, + [__BCH_INODE_NODUMP] = FS_NODUMP_FL, + [__BCH_INODE_NOATIME] = FS_NOATIME_FL, +}; + +/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ +static const unsigned bch_flags_to_xflags[] = { + [__BCH_INODE_SYNC] = FS_XFLAG_SYNC, + [__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE, + [__BCH_INODE_APPEND] = FS_XFLAG_APPEND, + [__BCH_INODE_NODUMP] = FS_XFLAG_NODUMP, + [__BCH_INODE_NOATIME] = FS_XFLAG_NOATIME, + //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT; +}; + +#define set_flags(_map, _in, _out) \ +do { \ + unsigned _i; \ + \ + for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ + if ((_in) & (1 << _i)) \ + (_out) |= _map[_i]; \ + else \ + (_out) &= ~_map[_i]; \ +} while (0) + +#define map_flags(_map, _in) \ +({ \ + unsigned _out = 0; \ + \ + set_flags(_map, _in, _out); \ + _out; \ +}) + +#define map_flags_rev(_map, _in) \ +({ \ + unsigned _i, _out = 0; \ + \ + for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ + if ((_in) & _map[_i]) { \ + (_out) |= 1 << _i; \ + (_in) &= ~_map[_i]; \ + } \ + (_out); \ +}) + +#define map_defined(_map) \ +({ \ + unsigned _in = ~0; \ + \ + map_flags_rev(_map, _in); \ +}) + +/* Set VFS inode flags from bcachefs inode: */ +static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode) +{ + set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags); +} long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long); long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 53107d02cbb6..2e2a5acae0eb 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -147,6 +147,8 @@ void bch2_inode_update_after_write(struct bch_fs *c, inode->ei_inode = *bi; inode->ei_qid = bch_qid(bi); + + bch2_inode_flags_to_vfs(inode); } int __must_check bch2_write_inode_trans(struct btree_trans *trans, @@ -187,10 +189,10 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, return 0; } -int __must_check __bch2_write_inode(struct bch_fs *c, - struct bch_inode_info *inode, - inode_set_fn set, - void *p, unsigned fields) +int __must_check bch2_write_inode(struct bch_fs *c, + struct bch_inode_info *inode, + inode_set_fn set, + void *p, unsigned fields) { struct btree_trans trans; struct bch_inode_unpacked inode_u; @@ -271,9 +273,8 @@ static int inode_update_for_create_fn(struct bch_inode_info *inode, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_inode_unpacked *new_inode = p; - struct timespec64 now = current_time(&inode->v); - bi->bi_mtime = bi->bi_ctime = timespec_to_bch2_time(c, now); + bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); if (S_ISDIR(new_inode->bi_mode)) bi->bi_nlink++; @@ -469,9 +470,8 @@ static int inode_update_for_link_fn(struct bch_inode_info *inode, void *p) { struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct timespec64 now = current_time(&inode->v); - bi->bi_ctime = timespec_to_bch2_time(c, now); + bi->bi_ctime = bch2_current_time(c); if (bi->bi_flags & BCH_INODE_UNLINKED) bi->bi_flags &= ~BCH_INODE_UNLINKED; @@ -543,9 +543,8 @@ static int inode_update_dir_for_unlink_fn(struct bch_inode_info *inode, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_inode_info *unlink_inode = p; - struct timespec64 now = current_time(&inode->v); - bi->bi_mtime = bi->bi_ctime = timespec_to_bch2_time(c, now); + bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); bi->bi_nlink -= S_ISDIR(unlink_inode->v.i_mode); @@ -557,9 +556,8 @@ static int inode_update_for_unlink_fn(struct bch_inode_info *inode, void *p) { struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct timespec64 now = current_time(&inode->v); - bi->bi_ctime = timespec_to_bch2_time(c, now); + bi->bi_ctime = bch2_current_time(c); if (bi->bi_nlink) bi->bi_nlink--; else @@ -740,8 +738,6 @@ static int bch2_rename2(struct mnt_idmap *idmap, { struct bch_fs *c = src_vdir->i_sb->s_fs_info; struct rename_info i = { - .now = timespec_to_bch2_time(c, - current_time(src_vdir)), .src_dir = to_bch_ei(src_vdir), .dst_dir = to_bch_ei(dst_vdir), .src_inode = to_bch_ei(src_dentry->d_inode), @@ -778,7 +774,7 @@ static int bch2_rename2(struct mnt_idmap *idmap, bch2_trans_init(&trans, c); retry: bch2_trans_begin(&trans); - i.now = timespec_to_bch2_time(c, current_time(src_vdir)), + i.now = bch2_current_time(c); ret = bch2_dirent_rename(&trans, i.src_dir, &src_dentry->d_name, @@ -1271,8 +1267,6 @@ static void bch2_vfs_inode_init(struct bch_fs *c, inode->ei_quota_reserved = 0; inode->ei_str_hash = bch2_hash_info_init(c, bi); - bch2_inode_flags_to_vfs(inode); - inode->v.i_mapping->a_ops = &bch_address_space_operations; switch (inode->v.i_mode & S_IFMT) { @@ -1346,8 +1340,8 @@ static int bch2_vfs_write_inode(struct inode *vinode, int ret; mutex_lock(&inode->ei_update_lock); - ret = __bch2_write_inode(c, inode, inode_update_times_fn, NULL, - ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); + ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, + ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); if (c->opts.journal_flush_disabled) diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index e8dd566285fc..4fdc11762cd7 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -2,6 +2,7 @@ #ifndef _BCACHEFS_FS_H #define _BCACHEFS_FS_H +#include "inode.h" #include "opts.h" #include "str_hash.h" #include "quota_types.h" @@ -81,10 +82,8 @@ int __must_check bch2_write_inode_trans(struct btree_trans *, struct bch_inode_info *, struct bch_inode_unpacked *, inode_set_fn, void *); -int __must_check __bch2_write_inode(struct bch_fs *, struct bch_inode_info *, - inode_set_fn, void *, unsigned); -int __must_check bch2_write_inode(struct bch_fs *, - struct bch_inode_info *); +int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *, + inode_set_fn, void *, unsigned); void bch2_vfs_exit(void); int bch2_vfs_init(void); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index f0440d12a031..cb84bdabb6ed 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -436,7 +436,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, } mutex_lock(&inode->ei_update_lock); - ret = __bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0); + ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0); mutex_unlock(&inode->ei_update_lock); if (value && -- cgit v1.2.3 From fc88796d1ce84181bbf4fb3618305a417454b806 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 17 Jul 2018 15:28:11 -0400 Subject: bcachefs: bch2_trans_update() now takes struct btree_insert_entry Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 2 +- fs/bcachefs/btree_update.h | 11 +++++++++-- fs/bcachefs/btree_update_leaf.c | 20 -------------------- fs/bcachefs/dirent.c | 8 +++++--- fs/bcachefs/fs.c | 2 +- fs/bcachefs/inode.c | 3 ++- fs/bcachefs/str_hash.h | 6 +++--- 7 files changed, 21 insertions(+), 31 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 7ee2022d9501..c81e5365ec84 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -375,7 +375,7 @@ int bch2_acl_chmod(struct btree_trans *trans, goto err; } - bch2_trans_update(trans, iter, &new->k_i, 0); + bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &new->k_i)); *new_acl = acl; acl = NULL; err: diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 451d486fb032..31b72895f6eb 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -140,8 +140,15 @@ int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *, /* new transactional interface: */ -void bch2_trans_update(struct btree_trans *, struct btree_iter *, - struct bkey_i *, unsigned); +static inline void +bch2_trans_update(struct btree_trans *trans, + struct btree_insert_entry entry) +{ + BUG_ON(trans->nr_updates >= ARRAY_SIZE(trans->updates)); + + trans->updates[trans->nr_updates++] = entry; +} + int bch2_trans_commit(struct btree_trans *, struct disk_reservation *, struct extent_insert_hook *, diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 5cd20b572759..7ce2e35dafa2 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -581,26 +581,6 @@ err: goto out; } -void bch2_trans_update(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_i *k, - unsigned extra_journal_res) -{ - struct btree_insert_entry *i; - - BUG_ON(trans->nr_updates >= ARRAY_SIZE(trans->updates)); - - i = &trans->updates[trans->nr_updates++]; - - *i = (struct btree_insert_entry) { - .iter = iter, - .k = k, - .extra_res = extra_journal_res, - }; - - btree_insert_entry_checks(trans->c, i); -} - int bch2_trans_commit(struct btree_trans *trans, struct disk_reservation *disk_res, struct extent_insert_hook *hook, diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 9e5936faf1af..18078cc2ca62 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -290,7 +290,9 @@ int bch2_dirent_rename(struct btree_trans *trans, * new_dst at the src position: */ new_dst->k.p = src_iter->pos; - bch2_trans_update(trans, src_iter, &new_dst->k_i, 0); + bch2_trans_update(trans, + BTREE_INSERT_ENTRY(src_iter, + &new_dst->k_i)); return 0; } else { /* If we're overwriting, we can't insert new_dst @@ -313,8 +315,8 @@ int bch2_dirent_rename(struct btree_trans *trans, } } - bch2_trans_update(trans, src_iter, &new_src->k_i, 0); - bch2_trans_update(trans, dst_iter, &new_dst->k_i, 0); + bch2_trans_update(trans, BTREE_INSERT_ENTRY(src_iter, &new_src->k_i)); + bch2_trans_update(trans, BTREE_INSERT_ENTRY(dst_iter, &new_dst->k_i)); return 0; } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 2e2a5acae0eb..f10ee147d389 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -185,7 +185,7 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, return PTR_ERR(inode_p); bch2_inode_pack(inode_p, inode_u); - bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); + bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &inode_p->inode.k_i)); return 0; } diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 2d635555bffb..f40ec37d7f0f 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -346,7 +346,8 @@ again: inode_u->bi_generation = bkey_generation(k); bch2_inode_pack(inode_p, inode_u); - bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); + bch2_trans_update(trans, + BTREE_INSERT_ENTRY(iter, &inode_p->inode.k_i)); return 0; } } diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 0947fdcdc4cd..fbd6c3372677 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -255,14 +255,14 @@ not_found: return -ENOENT; insert->k.p = slot->pos; - bch2_trans_update(trans, slot, insert, 0); + bch2_trans_update(trans, BTREE_INSERT_ENTRY(slot, insert)); return 0; found: if (flags & BCH_HASH_SET_MUST_CREATE) return -EEXIST; insert->k.p = iter->pos; - bch2_trans_update(trans, iter, insert, 0); + bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, insert)); return 0; } @@ -297,7 +297,7 @@ static inline int bch2_hash_delete_at(struct btree_trans *trans, delete->k.p = iter->pos; delete->k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED; - bch2_trans_update(trans, iter, delete, 0); + bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, delete)); return 0; } -- cgit v1.2.3 From 19ee5f2ac4b34658b417073f4edc27ade11a01ae Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 20 Jul 2018 22:23:42 -0400 Subject: bcachefs: Use ei_update_lock consistently This is prep work for using deferred btree updates for inode updates - the way inodes are done now we're relying on btree locking for ei_inode and ei_update_lock could probably be removed, but it'll actually be needed when we switch to deferred updates. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 8 +++++--- fs/bcachefs/fs.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index c81e5365ec84..2856736f7224 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -304,13 +304,14 @@ int bch2_set_acl(struct mnt_idmap *idmap, umode_t mode = inode->v.i_mode; int ret; + mutex_lock(&inode->ei_update_lock); + bch2_trans_init(&trans, c); + if (type == ACL_TYPE_ACCESS && acl) { ret = posix_acl_update_mode(idmap, &inode->v, &mode, &acl); if (ret) - return ret; + goto err; } - - bch2_trans_init(&trans, c); retry: bch2_trans_begin(&trans); @@ -336,6 +337,7 @@ retry: set_cached_acl(&inode->v, type, acl); err: bch2_trans_exit(&trans); + mutex_unlock(&inode->ei_update_lock); return ret; } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index f10ee147d389..a4d82252bc49 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -51,6 +51,30 @@ static void journal_seq_copy(struct bch_inode_info *dst, } while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old); } +static inline int ptrcmp(void *l, void *r) +{ + return (l > r) - (l < r); +} + +#define __bch2_lock_inodes(_lock, ...) \ +do { \ + struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \ + unsigned i; \ + \ + bubble_sort(&a[1], ARRAY_SIZE(a) - 1 , ptrcmp); \ + \ + for (i = ARRAY_SIZE(a) - 1; a[i]; --i) \ + if (a[i] != a[i - 1]) { \ + if (_lock) \ + mutex_lock_nested(&a[i]->ei_update_lock, i);\ + else \ + mutex_unlock(&a[i]->ei_update_lock); \ + } \ +} while (0) + +#define bch2_lock_inodes(...) __bch2_lock_inodes(true, __VA_ARGS__) +#define bch2_unlock_inodes(...) __bch2_lock_inodes(false, __VA_ARGS__) + static void __pagecache_lock_put(struct pagecache_lock *lock, long i) { BUG_ON(atomic_long_read(&lock->v) == 0); @@ -161,6 +185,8 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, struct bkey_inode_buf *inode_p; int ret; + lockdep_assert_held(&inode->ei_update_lock); + iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(inode->v.i_ino, 0), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); @@ -422,6 +448,9 @@ out: posix_acl_release(acl); return inode; err_trans: + if (!tmpfile) + mutex_unlock(&dir->ei_update_lock); + bch2_trans_exit(&trans); make_bad_inode(&inode->v); iput(&inode->v); @@ -490,6 +519,7 @@ static int __bch2_link(struct bch_fs *c, struct bch_inode_unpacked inode_u; int ret; + mutex_lock(&inode->ei_update_lock); bch2_trans_init(&trans, c); retry: bch2_trans_begin(&trans); @@ -515,6 +545,7 @@ retry: bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME); bch2_trans_exit(&trans); + mutex_unlock(&inode->ei_update_lock); return ret; } @@ -575,6 +606,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) struct btree_trans trans; int ret; + bch2_lock_inodes(dir, inode); bch2_trans_init(&trans, c); retry: bch2_trans_begin(&trans); @@ -607,6 +639,7 @@ retry: ATTR_MTIME); err: bch2_trans_exit(&trans); + bch2_unlock_inodes(dir, inode); return ret; } @@ -771,6 +804,11 @@ static int bch2_rename2(struct mnt_idmap *idmap, return ret; } + bch2_lock_inodes(i.src_dir, + i.dst_dir, + i.src_inode, + i.dst_inode); + bch2_trans_init(&trans, c); retry: bch2_trans_begin(&trans); @@ -818,6 +856,10 @@ retry: ATTR_CTIME); err: bch2_trans_exit(&trans); + bch2_unlock_inodes(i.src_dir, + i.dst_dir, + i.src_inode, + i.dst_inode); return ret; } -- cgit v1.2.3 From d96b3ffe3884d8402ca64d1a1ae880460339ec63 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 22 Jul 2018 13:15:51 -0400 Subject: bcachefs: fix rename + fsync Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index a4d82252bc49..cd29404e0b9b 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -849,6 +849,10 @@ retry: journal_seq_copy(i.dst_dir, journal_seq); } + journal_seq_copy(i.src_inode, journal_seq); + if (i.dst_inode) + journal_seq_copy(i.dst_inode, journal_seq); + bch2_inode_update_after_write(c, i.src_inode, &src_inode_u, ATTR_CTIME); if (i.dst_inode) -- cgit v1.2.3 From 73ab6f356db737d9997c830730ab927bbdf6b678 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Jul 2018 05:48:35 -0400 Subject: bcachefs: fix fsync after create Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index cd29404e0b9b..5963f88b8156 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -327,6 +327,7 @@ __bch2_create(struct mnt_idmap *idmap, struct bch_inode_unpacked inode_u; struct bch_hash_info hash_info; struct posix_acl *default_acl = NULL, *acl = NULL; + u64 journal_seq = 0; int ret; bch2_inode_init(c, &inode_u, 0, 0, 0, rdev, &dir->ei_inode); @@ -391,7 +392,7 @@ retry: &inode_u) : 0) ?: bch2_trans_commit(&trans, NULL, NULL, - &inode->ei_journal_seq, + &journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); if (ret == -EINTR) @@ -409,6 +410,7 @@ retry: } bch2_vfs_inode_init(c, inode, &inode_u); + journal_seq_copy(inode, journal_seq); set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl); -- cgit v1.2.3 From 5b650fd11a00271b9d4c033d1d0780826e050137 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 24 Jul 2018 14:54:39 -0400 Subject: bcachefs: Account for internal fragmentation better Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 3 +- fs/bcachefs/btree_update_interior.c | 3 +- fs/bcachefs/buckets.c | 115 +++++++++++++++++++++--------------- fs/bcachefs/buckets.h | 2 - fs/bcachefs/buckets_types.h | 4 +- fs/bcachefs/chardev.c | 4 +- fs/bcachefs/fs.c | 9 ++- fs/bcachefs/sysfs.c | 14 ++++- 8 files changed, 93 insertions(+), 61 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index a82677d053b0..1fbb9c657fc6 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -493,7 +493,8 @@ static void bch2_gc_start(struct bch_fs *c) struct bch_fs_usage *p = per_cpu_ptr(c->usage_percpu, cpu); - memset(p->s, 0, sizeof(p->s)); + memset(p->replicas, 0, sizeof(p->replicas)); + memset(p->buckets, 0, sizeof(p->buckets)); } percpu_up_write(&c->usage_lock); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index aba01a77e4af..a37b5edea699 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -184,7 +184,8 @@ found: */ replicas = bch2_extent_nr_dirty_ptrs(k); if (replicas) - stats->s[replicas - 1].data[BCH_DATA_BTREE] -= c->opts.btree_node_size; + stats->replicas[replicas - 1].data[BCH_DATA_BTREE] -= + c->opts.btree_node_size; /* * We're dropping @k from the btree, but it's still live until the diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index c0dc0ce1f585..56b197bff4f0 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -73,6 +73,8 @@ #include +static inline u64 __bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage); + #ifdef DEBUG_BUCKETS #define lg_local_lock lg_global_lock @@ -84,18 +86,24 @@ static void bch2_fs_stats_verify(struct bch_fs *c) __bch2_fs_usage_read(c); unsigned i, j; - for (i = 0; i < ARRAY_SIZE(stats.s); i++) { - for (j = 0; j < ARRAY_SIZE(stats.s[i].data); j++) - if ((s64) stats.s[i].data[j] < 0) - panic("replicas %u %s underflow: %lli\n", + for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) { + for (j = 0; j < ARRAY_SIZE(stats.replicas[i].data); j++) + if ((s64) stats.replicas[i].data[j] < 0) + panic("replicas %u %s sectors underflow: %lli\n", i + 1, bch_data_types[j], - stats.s[i].data[j]); + stats.replicas[i].data[j]); - if ((s64) stats.s[i].persistent_reserved < 0) + if ((s64) stats.replicas[i].persistent_reserved < 0) panic("replicas %u reserved underflow: %lli\n", - i + 1, stats.s[i].persistent_reserved); + i + 1, stats.replicas[i].persistent_reserved); } + for (j = 0; j < ARRAY_SIZE(stats.buckets); j++) + if ((s64) stats.replicas[i].data_buckets[j] < 0) + panic("%s buckets underflow: %lli\n", + bch_data_types[j], + stats.buckets[j]); + if ((s64) stats.online_reserved < 0) panic("sectors_online_reserved underflow: %lli\n", stats.online_reserved); @@ -238,6 +246,7 @@ bch2_fs_usage_read(struct bch_fs *c) } struct fs_usage_sum { + u64 hidden; u64 data; u64 reserved; }; @@ -247,14 +256,21 @@ static inline struct fs_usage_sum __fs_usage_sum(struct bch_fs_usage stats) struct fs_usage_sum sum = { 0 }; unsigned i, j; - for (i = 0; i < ARRAY_SIZE(stats.s); i++) { - u64 a = 0; + /* + * For superblock and journal we count bucket usage, not sector usage, + * because any internal fragmentation should _not_ be counted as + * free space: + */ + for (j = 1; j < BCH_DATA_BTREE; j++) + sum.hidden += stats.buckets[j]; - for (j = 0; j < ARRAY_SIZE(stats.s[i].data); j++) - a += stats.s[i].data[j]; + for (i = 0; i < ARRAY_SIZE(stats.replicas); i++) { + for (j = BCH_DATA_BTREE; + j < ARRAY_SIZE(stats.replicas[i].data); + j++) + sum.data += stats.replicas[i].data[j] * (i + 1); - sum.data += a * (i + 1); - sum.reserved += stats.s[i].persistent_reserved * (i + 1); + sum.reserved += stats.replicas[i].persistent_reserved * (i + 1); } sum.reserved += stats.online_reserved; @@ -270,14 +286,14 @@ static u64 reserve_factor(u64 r) static u64 avail_factor(u64 r) { - return (r << RESERVE_FACTOR) / (1 << RESERVE_FACTOR) + 1; + return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1); } -u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats) +static inline u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats) { struct fs_usage_sum sum = __fs_usage_sum(stats); - return sum.data + reserve_factor(sum.reserved); + return sum.hidden + sum.data + reserve_factor(sum.reserved); } u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats) @@ -285,9 +301,9 @@ u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats) return min(c->capacity, __bch2_fs_sectors_used(c, stats)); } -u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats) +static u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats) { - return avail_factor(c->capacity - bch2_fs_sectors_used(c, stats)); + return c->capacity - bch2_fs_sectors_used(c, stats); } static inline int is_unavailable_bucket(struct bucket_mark m) @@ -323,9 +339,9 @@ static bool bucket_became_unavailable(struct bch_fs *c, } void bch2_fs_usage_apply(struct bch_fs *c, - struct bch_fs_usage *stats, - struct disk_reservation *disk_res, - struct gc_pos gc_pos) + struct bch_fs_usage *stats, + struct disk_reservation *disk_res, + struct gc_pos gc_pos) { struct fs_usage_sum sum = __fs_usage_sum(*stats); s64 added = sum.data + sum.reserved; @@ -358,6 +374,7 @@ void bch2_fs_usage_apply(struct bch_fs *c, } static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, + struct bch_fs_usage *stats, struct bucket_mark old, struct bucket_mark new) { struct bch_dev_usage *dev_usage; @@ -374,6 +391,9 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, bch2_data_types[new.data_type]); } + stats->buckets[bucket_type(old)] -= ca->mi.bucket_size; + stats->buckets[bucket_type(new)] += ca->mi.bucket_size; + preempt_disable(); dev_usage = this_cpu_ptr(ca->usage_percpu); @@ -399,17 +419,18 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, bch2_dev_stats_verify(ca); } -#define bucket_data_cmpxchg(c, ca, g, new, expr) \ +#define bucket_data_cmpxchg(c, ca, stats, g, new, expr) \ ({ \ struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \ \ - bch2_dev_usage_update(c, ca, _old, new); \ + bch2_dev_usage_update(c, ca, stats, _old, new); \ _old; \ }) void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, size_t b, struct bucket_mark *old) { + struct bch_fs_usage *stats = this_cpu_ptr(c->usage_percpu); struct bucket *g; struct bucket_mark new; @@ -417,7 +438,7 @@ void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, g = bucket(ca, b); - *old = bucket_data_cmpxchg(c, ca, g, new, ({ + *old = bucket_data_cmpxchg(c, ca, stats, g, new, ({ BUG_ON(!is_available_bucket(new)); new.owned_by_allocator = 1; @@ -436,6 +457,7 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, size_t b, bool owned_by_allocator, struct gc_pos pos, unsigned flags) { + struct bch_fs_usage *stats = this_cpu_ptr(c->usage_percpu); struct bucket *g; struct bucket_mark old, new; @@ -446,7 +468,7 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, gc_will_visit(c, pos)) return; - old = bucket_data_cmpxchg(c, ca, g, new, ({ + old = bucket_data_cmpxchg(c, ca, stats, g, new, ({ new.owned_by_allocator = owned_by_allocator; })); @@ -466,10 +488,12 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, unsigned sectors, struct gc_pos pos, unsigned flags) { + struct bch_fs_usage *stats; struct bucket *g; struct bucket_mark old, new; - BUG_ON(!type); + BUG_ON(type != BCH_DATA_SB && + type != BCH_DATA_JOURNAL); if (likely(c)) { percpu_rwsem_assert_held(&c->usage_lock); @@ -479,16 +503,17 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, return; } - rcu_read_lock(); + preempt_disable(); + stats = this_cpu_ptr(c->usage_percpu); g = bucket(ca, b); - old = bucket_data_cmpxchg(c, ca, g, new, ({ + old = bucket_data_cmpxchg(c, ca, stats, g, new, ({ new.data_type = type; checked_add(new.dirty_sectors, sectors); - new.dirty_sectors += sectors; })); - rcu_read_unlock(); + stats->replicas[0].data[type] += sectors; + preempt_enable(); BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) && bucket_became_unavailable(c, old, new)); @@ -589,7 +614,7 @@ static void bch2_mark_pointer(struct bch_fs *c, old.v.counter, new.v.counter)) != old.v.counter); - bch2_dev_usage_update(c, ca, old, new); + bch2_dev_usage_update(c, ca, stats, old, new); BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) && bucket_became_unavailable(c, old, new)); @@ -601,6 +626,10 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, struct bch_fs_usage *stats, u64 journal_seq, unsigned flags) { + unsigned replicas = bch2_extent_nr_dirty_ptrs(k); + + BUG_ON(replicas && replicas - 1 > ARRAY_SIZE(stats->replicas)); + /* * synchronization w.r.t. GC: * @@ -643,32 +672,22 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const struct bch_extent_ptr *ptr; struct bch_extent_crc_unpacked crc; - unsigned replicas = 0; BUG_ON(!sectors); - extent_for_each_ptr_crc(e, ptr, crc) { + extent_for_each_ptr_crc(e, ptr, crc) bch2_mark_pointer(c, e, ptr, crc, sectors, data_type, stats, journal_seq, flags); - replicas += !ptr->cached; - } - if (replicas) { - BUG_ON(replicas - 1 > ARRAY_SIZE(stats->s)); - stats->s[replicas - 1].data[data_type] += sectors; - } + if (replicas) + stats->replicas[replicas - 1].data[data_type] += sectors; break; } - case BCH_RESERVATION: { - struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); - - if (r.v->nr_replicas) { - BUG_ON(r.v->nr_replicas - 1 > ARRAY_SIZE(stats->s)); - stats->s[r.v->nr_replicas - 1].persistent_reserved += sectors; - } + case BCH_RESERVATION: + if (replicas) + stats->replicas[replicas - 1].persistent_reserved += sectors; break; } - } percpu_up_read(&c->usage_lock); } @@ -681,7 +700,7 @@ static u64 __recalc_sectors_available(struct bch_fs *c) for_each_possible_cpu(cpu) per_cpu_ptr(c->usage_percpu, cpu)->available_cache = 0; - return bch2_fs_sectors_free(c, bch2_fs_usage_read(c)); + return avail_factor(bch2_fs_sectors_free(c, bch2_fs_usage_read(c))); } /* Used by gc when it's starting: */ diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 016201ba1b8b..9aeccbb11d54 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -173,9 +173,7 @@ struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *); void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *, struct disk_reservation *, struct gc_pos); -u64 __bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage); u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage); -u64 bch2_fs_sectors_free(struct bch_fs *, struct bch_fs_usage); static inline bool is_available_bucket(struct bucket_mark mark) { diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index d528194ccf7e..9968570832e3 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -69,7 +69,9 @@ struct bch_fs_usage { struct { u64 data[BCH_DATA_NR]; u64 persistent_reserved; - } s[BCH_REPLICAS_MAX]; + } replicas[BCH_REPLICAS_MAX]; + + u64 buckets[BCH_DATA_NR]; }; /* diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 283828fe2dc3..db0f990bebf4 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -404,10 +404,10 @@ static long bch2_ioctl_usage(struct bch_fs *c, for (i = 0; i < BCH_REPLICAS_MAX; i++) { dst.persistent_reserved[i] = - src.s[i].persistent_reserved; + src.replicas[i].persistent_reserved; for (j = 0; j < BCH_DATA_NR; j++) - dst.sectors[j][i] = src.s[i].data[j]; + dst.sectors[j][i] = src.replicas[i].data[j]; } ret = copy_to_user(&user_arg->fs, &dst, sizeof(dst)); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 5963f88b8156..67ddad95f91a 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1428,13 +1428,16 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct bch_fs *c = sb->s_fs_info; + struct bch_fs_usage usage = bch2_fs_usage_read(c); + u64 hidden_metadata = usage.buckets[BCH_DATA_SB] + + usage.buckets[BCH_DATA_JOURNAL]; + unsigned shift = sb->s_blocksize_bits - 9; u64 fsid; buf->f_type = BCACHEFS_STATFS_MAGIC; buf->f_bsize = sb->s_blocksize; - buf->f_blocks = c->capacity >> PAGE_SECTOR_SHIFT; - buf->f_bfree = bch2_fs_sectors_free(c, bch2_fs_usage_read(c)) >> - PAGE_SECTOR_SHIFT; + buf->f_blocks = (c->capacity - hidden_metadata) >> shift; + buf->f_bfree = (c->capacity - bch2_fs_sectors_used(c, usage)) >> shift; buf->f_bavail = buf->f_bfree; buf->f_files = atomic_long_read(&c->nr_inodes); buf->f_ffree = U64_MAX; diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index db8af44c7921..4ce7168e930b 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -238,7 +238,7 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) "capacity:\t\t%llu\n", c->capacity); - for (replicas = 0; replicas < ARRAY_SIZE(stats.s); replicas++) { + for (replicas = 0; replicas < ARRAY_SIZE(stats.replicas); replicas++) { out += scnprintf(out, end - out, "%u replicas:\n", replicas + 1); @@ -247,12 +247,20 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) out += scnprintf(out, end - out, "\t%s:\t\t%llu\n", bch2_data_types[type], - stats.s[replicas].data[type]); + stats.replicas[replicas].data[type]); out += scnprintf(out, end - out, "\treserved:\t%llu\n", - stats.s[replicas].persistent_reserved); + stats.replicas[replicas].persistent_reserved); } + out += scnprintf(out, end - out, "bucket usage\n"); + + for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++) + out += scnprintf(out, end - out, + "\t%s:\t\t%llu\n", + bch2_data_types[type], + stats.buckets[type]); + out += scnprintf(out, end - out, "online reserved:\t%llu\n", stats.online_reserved); -- cgit v1.2.3 From fc3268c13c1925df9bdc427ffe9bd5466f672b83 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 8 Aug 2018 19:53:30 -0400 Subject: bcachefs: kill extent_insert_hook Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 2 +- fs/bcachefs/alloc.c | 2 +- fs/bcachefs/btree_iter.c | 6 +-- fs/bcachefs/btree_types.h | 18 +------- fs/bcachefs/btree_update.h | 27 +++--------- fs/bcachefs/btree_update_interior.h | 9 ---- fs/bcachefs/btree_update_leaf.c | 57 ++++++------------------ fs/bcachefs/extents.c | 88 +++++++------------------------------ fs/bcachefs/extents.h | 1 - fs/bcachefs/fs-io.c | 2 +- fs/bcachefs/fs.c | 12 ++--- fs/bcachefs/fsck.c | 34 +++++++------- fs/bcachefs/inode.c | 37 +++++----------- fs/bcachefs/inode.h | 2 - fs/bcachefs/io.c | 4 +- fs/bcachefs/journal_io.c | 2 +- fs/bcachefs/migrate.c | 2 +- fs/bcachefs/move.c | 2 +- fs/bcachefs/quota.c | 8 ++-- fs/bcachefs/recovery.c | 4 +- fs/bcachefs/super.c | 3 +- fs/bcachefs/tests.c | 30 ++++++------- 22 files changed, 98 insertions(+), 254 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 2856736f7224..0074b3eb196d 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -322,7 +322,7 @@ retry: bch2_write_inode_trans(&trans, inode, &inode_u, inode_update_for_set_acl_fn, (void *)(unsigned long) mode) ?: - bch2_trans_commit(&trans, NULL, NULL, + bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); diff --git a/fs/bcachefs/alloc.c b/fs/bcachefs/alloc.c index e6aeab0b47c7..82f27a57dc61 100644 --- a/fs/bcachefs/alloc.c +++ b/fs/bcachefs/alloc.c @@ -319,7 +319,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca, bch2_btree_iter_set_pos(iter, a->k.p); - return bch2_btree_insert_at(c, NULL, NULL, journal_seq, + return bch2_btree_insert_at(c, NULL, journal_seq, BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE| BTREE_INSERT_USE_ALLOC_RESERVE| diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index aad7d8ff3f53..754f35f6b56c 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -975,8 +975,6 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) if (__bch2_btree_iter_relock(iter)) return 0; - iter->flags &= ~BTREE_ITER_AT_END_OF_LEAF; - /* * XXX: correctly using BTREE_ITER_UPTODATE should make using check_pos * here unnecessary @@ -1155,10 +1153,8 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_ iter->flags & BTREE_ITER_IS_EXTENTS)) __btree_iter_advance(l); - if (!k && btree_iter_pos_after_node(iter, l->b)) { + if (!k && btree_iter_pos_after_node(iter, l->b)) btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); - iter->flags |= BTREE_ITER_AT_END_OF_LEAF; - } } void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 03c319611d72..5053ed5f2762 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -196,11 +196,7 @@ enum btree_iter_type { * @pos or the first key strictly greater than @pos */ #define BTREE_ITER_IS_EXTENTS (1 << 4) -/* - * indicates we need to call bch2_btree_iter_traverse() to revalidate iterator: - */ -#define BTREE_ITER_AT_END_OF_LEAF (1 << 5) -#define BTREE_ITER_ERROR (1 << 6) +#define BTREE_ITER_ERROR (1 << 5) enum btree_iter_uptodate { BTREE_ITER_UPTODATE = 0, @@ -256,12 +252,6 @@ struct btree_iter { struct btree_insert_entry { struct btree_iter *iter; struct bkey_i *k; - unsigned extra_res; - /* - * true if entire key was inserted - can only be false for - * extents - */ - bool done; }; struct btree_trans { @@ -467,12 +457,6 @@ enum btree_insert_ret { BTREE_INSERT_NEED_GC_LOCK, }; -struct extent_insert_hook { - enum btree_insert_ret - (*fn)(struct extent_insert_hook *, struct bpos, struct bpos, - struct bkey_s_c, const struct bkey_i *); -}; - enum btree_gc_coalesce_fail_reason { BTREE_GC_COALESCE_FAIL_RESERVE_GET, BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC, diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 31b72895f6eb..f6b0082235af 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -23,7 +23,6 @@ struct btree_insert { struct disk_reservation *disk_res; struct journal_res journal_res; u64 *journal_seq; - struct extent_insert_hook *hook; unsigned flags; bool did_work; @@ -37,15 +36,6 @@ int __bch2_btree_insert_at(struct btree_insert *); ((struct btree_insert_entry) { \ .iter = (_iter), \ .k = (_k), \ - .done = false, \ - }) - -#define BTREE_INSERT_ENTRY_EXTRA_RES(_iter, _k, _extra) \ - ((struct btree_insert_entry) { \ - .iter = (_iter), \ - .k = (_k), \ - .extra_res = (_extra), \ - .done = false, \ }) /** @@ -61,13 +51,11 @@ int __bch2_btree_insert_at(struct btree_insert *); * -EROFS: filesystem read only * -EIO: journal or btree node IO error */ -#define bch2_btree_insert_at(_c, _disk_res, _hook, \ - _journal_seq, _flags, ...) \ +#define bch2_btree_insert_at(_c, _disk_res, _journal_seq, _flags, ...) \ __bch2_btree_insert_at(&(struct btree_insert) { \ .c = (_c), \ .disk_res = (_disk_res), \ .journal_seq = (_journal_seq), \ - .hook = (_hook), \ .flags = (_flags), \ .nr = COUNT_ARGS(__VA_ARGS__), \ .entries = (struct btree_insert_entry[]) { \ @@ -121,17 +109,13 @@ enum { int bch2_btree_delete_at(struct btree_iter *, unsigned); int bch2_btree_insert_list_at(struct btree_iter *, struct keylist *, - struct disk_reservation *, - struct extent_insert_hook *, u64 *, unsigned); + struct disk_reservation *, u64 *, unsigned); int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, - struct disk_reservation *, - struct extent_insert_hook *, u64 *, int flags); + struct disk_reservation *, u64 *, int flags); int bch2_btree_delete_range(struct bch_fs *, enum btree_id, - struct bpos, struct bpos, struct bversion, - struct disk_reservation *, - struct extent_insert_hook *, u64 *); + struct bpos, struct bpos, u64 *); int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *, __le64, unsigned); @@ -151,7 +135,6 @@ bch2_trans_update(struct btree_trans *trans, int bch2_trans_commit(struct btree_trans *, struct disk_reservation *, - struct extent_insert_hook *, u64 *, unsigned); #define bch2_trans_do(_c, _journal_seq, _flags, _do) \ @@ -164,7 +147,7 @@ int bch2_trans_commit(struct btree_trans *, do { \ bch2_trans_begin(&trans); \ \ - _ret = (_do) ?: bch2_trans_commit(&trans, NULL, NULL, \ + _ret = (_do) ?: bch2_trans_commit(&trans, NULL, \ (_journal_seq), (_flags)); \ } while (_ret == -EINTR); \ \ diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 4125cddded61..b24988352b03 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -161,15 +161,6 @@ static inline void bch2_foreground_maybe_merge_sibling(struct bch_fs *c, { struct btree *b; - /* - * iterators are inconsistent when they hit end of leaf, until - * traversed again - * - * XXX inconsistent how? - */ - if (iter->flags & BTREE_ITER_AT_END_OF_LEAF) - return; - if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE) return; diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 598d7a107792..6b8954493e05 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -336,14 +336,12 @@ static inline int do_btree_insert_at(struct btree_insert *trans, unsigned u64s; int ret; - trans_for_each_entry(trans, i) { - BUG_ON(i->done); + trans_for_each_entry(trans, i) BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK); - } u64s = 0; trans_for_each_entry(trans, i) - u64s += jset_u64s(i->k->k.u64s + i->extra_res); + u64s += jset_u64s(i->k->k.u64s); memset(&trans->journal_res, 0, sizeof(trans->journal_res)); @@ -374,7 +372,7 @@ static inline int do_btree_insert_at(struct btree_insert *trans, if (!same_leaf_as_prev(trans, i)) u64s = 0; - u64s += i->k->k.u64s + i->extra_res; + u64s += i->k->k.u64s; switch (btree_key_can_insert(trans, i, &u64s)) { case BTREE_INSERT_OK: break; @@ -406,28 +404,14 @@ static inline int do_btree_insert_at(struct btree_insert *trans, trans_for_each_entry(trans, i) { switch (btree_insert_key_leaf(trans, i)) { case BTREE_INSERT_OK: - i->done = true; break; case BTREE_INSERT_NEED_TRAVERSE: + BUG_ON((trans->flags & BTREE_INSERT_ATOMIC)); ret = -EINTR; - break; - case BTREE_INSERT_BTREE_NODE_FULL: - ret = -EINTR; - *split = i->iter; - break; - case BTREE_INSERT_ENOSPC: - ret = -ENOSPC; - break; + goto out; default: BUG(); } - - /* - * If we did some work (i.e. inserted part of an extent), - * we have to do all the other updates as well: - */ - if (!trans->did_work && (ret || *split)) - break; } out: multi_unlock_write(trans); @@ -523,11 +507,6 @@ out: trans->did_work && !btree_node_locked(linked, 0)); } - - /* make sure we didn't lose an error: */ - if (!ret) - trans_for_each_entry(trans, i) - BUG_ON(!i->done); } BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR); @@ -614,7 +593,6 @@ err: int bch2_trans_commit(struct btree_trans *trans, struct disk_reservation *disk_res, - struct extent_insert_hook *hook, u64 *journal_seq, unsigned flags) { @@ -642,7 +620,7 @@ int bch2_btree_delete_at(struct btree_iter *iter, unsigned flags) bkey_init(&k.k); k.k.p = iter->pos; - return bch2_btree_insert_at(iter->c, NULL, NULL, NULL, + return bch2_btree_insert_at(iter->c, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE|flags, BTREE_INSERT_ENTRY(iter, &k)); @@ -651,7 +629,6 @@ int bch2_btree_delete_at(struct btree_iter *iter, unsigned flags) int bch2_btree_insert_list_at(struct btree_iter *iter, struct keylist *keys, struct disk_reservation *disk_res, - struct extent_insert_hook *hook, u64 *journal_seq, unsigned flags) { BUG_ON(flags & BTREE_INSERT_ATOMIC); @@ -659,7 +636,7 @@ int bch2_btree_insert_list_at(struct btree_iter *iter, bch2_verify_keylist_sorted(keys); while (!bch2_keylist_empty(keys)) { - int ret = bch2_btree_insert_at(iter->c, disk_res, hook, + int ret = bch2_btree_insert_at(iter->c, disk_res, journal_seq, flags, BTREE_INSERT_ENTRY(iter, bch2_keylist_front(keys))); if (ret) @@ -681,7 +658,6 @@ int bch2_btree_insert_list_at(struct btree_iter *iter, int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, struct disk_reservation *disk_res, - struct extent_insert_hook *hook, u64 *journal_seq, int flags) { struct btree_iter iter; @@ -689,7 +665,7 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, bch2_btree_iter_init(&iter, c, id, bkey_start_pos(&k->k), BTREE_ITER_INTENT); - ret = bch2_btree_insert_at(c, disk_res, hook, journal_seq, flags, + ret = bch2_btree_insert_at(c, disk_res, journal_seq, flags, BTREE_INSERT_ENTRY(&iter, k)); bch2_btree_iter_unlock(&iter); @@ -702,12 +678,8 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, * Range is a half open interval - [start, end) */ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, - struct bpos start, - struct bpos end, - struct bversion version, - struct disk_reservation *disk_res, - struct extent_insert_hook *hook, - u64 *journal_seq) + struct bpos start, struct bpos end, + u64 *journal_seq) { struct btree_iter iter; struct bkey_s_c k; @@ -717,14 +689,12 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, BTREE_ITER_INTENT); while ((k = bch2_btree_iter_peek(&iter)).k && - !(ret = btree_iter_err(k))) { + !(ret = btree_iter_err(k)) && + bkey_cmp(iter.pos, end) < 0) { unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); /* really shouldn't be using a bare, unpadded bkey_i */ struct bkey_i delete; - if (bkey_cmp(iter.pos, end) >= 0) - break; - bkey_init(&delete.k); /* @@ -738,7 +708,6 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, * bkey_start_pos(k.k)). */ delete.k.p = iter.pos; - delete.k.version = version; if (iter.flags & BTREE_ITER_IS_EXTENTS) { /* create the biggest key we can */ @@ -746,7 +715,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, bch2_cut_back(end, &delete.k); } - ret = bch2_btree_insert_at(c, disk_res, hook, journal_seq, + ret = bch2_btree_insert_at(c, NULL, journal_seq, BTREE_INSERT_NOFAIL, BTREE_INSERT_ENTRY(&iter, &delete)); if (ret) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 59d2eaea9edf..9f39e9dea51a 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1252,52 +1252,6 @@ static void extent_insert_committed(struct extent_insert_state *s) s->trans->did_work = true; } -static enum btree_insert_ret -__extent_insert_advance_pos(struct extent_insert_state *s, - struct bpos next_pos, - struct bkey_s_c k) -{ - struct extent_insert_hook *hook = s->trans->hook; - enum btree_insert_ret ret; - - if (hook) - ret = hook->fn(hook, s->committed, next_pos, k, s->insert->k); - else - ret = BTREE_INSERT_OK; - - if (ret == BTREE_INSERT_OK) - s->committed = next_pos; - - return ret; -} - -/* - * Update iter->pos, marking how much of @insert we've processed, and call hook - * fn: - */ -static enum btree_insert_ret -extent_insert_advance_pos(struct extent_insert_state *s, struct bkey_s_c k) -{ - struct btree *b = s->insert->iter->l[0].b; - struct bpos next_pos = bpos_min(s->insert->k->k.p, - k.k ? k.k->p : b->key.k.p); - enum btree_insert_ret ret; - - /* hole? */ - if (k.k && bkey_cmp(s->committed, bkey_start_pos(k.k)) < 0) { - ret = __extent_insert_advance_pos(s, bkey_start_pos(k.k), - bkey_s_c_null); - if (ret != BTREE_INSERT_OK) - return ret; - } - - /* avoid redundant calls to hook fn: */ - if (!bkey_cmp(s->committed, next_pos)) - return BTREE_INSERT_OK; - - return __extent_insert_advance_pos(s, next_pos, k); -} - void bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter) { struct btree *b = iter->l[0].b; @@ -1468,8 +1422,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, } } -static enum btree_insert_ret -__bch2_insert_fixup_extent(struct extent_insert_state *s) +static void __bch2_insert_fixup_extent(struct extent_insert_state *s) { struct btree_iter *iter = s->insert->iter; struct btree_iter_level *l = &iter->l[0]; @@ -1477,7 +1430,6 @@ __bch2_insert_fixup_extent(struct extent_insert_state *s) struct bkey_packed *_k; struct bkey unpacked; struct bkey_i *insert = s->insert->k; - enum btree_insert_ret ret = BTREE_INSERT_OK; while (bkey_cmp(s->committed, insert->k.p) < 0 && (_k = bch2_btree_node_iter_peek_filter(&l->iter, b, @@ -1491,9 +1443,7 @@ __bch2_insert_fixup_extent(struct extent_insert_state *s) if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0) break; - ret = extent_insert_advance_pos(s, k.s_c); - if (ret) - break; + s->committed = bpos_min(s->insert->k->k.p, k.k->p); if (!bkey_whiteout(k.k)) s->update_journal = true; @@ -1547,9 +1497,8 @@ next: break; } - if (ret == BTREE_INSERT_OK && - bkey_cmp(s->committed, insert->k.p) < 0) - ret = extent_insert_advance_pos(s, bkey_s_c_null); + if (bkey_cmp(s->committed, insert->k.p) < 0) + s->committed = bpos_min(s->insert->k->k.p, b->key.k.p); /* * may have skipped past some deleted extents greater than the insert @@ -1563,8 +1512,6 @@ next: bkey_cmp_left_packed(b, _k, &s->committed) > 0) l->iter = node_iter; } - - return ret; } /** @@ -1610,16 +1557,13 @@ enum btree_insert_ret bch2_insert_fixup_extent(struct btree_insert *trans, struct btree_insert_entry *insert) { - struct bch_fs *c = trans->c; - struct btree_iter *iter = insert->iter; - struct btree_iter_level *l = &iter->l[0]; - struct btree *b = l->b; - enum btree_insert_ret ret = BTREE_INSERT_OK; - + struct bch_fs *c = trans->c; + struct btree_iter *iter = insert->iter; + struct btree *b = iter->l[0].b; struct extent_insert_state s = { .trans = trans, .insert = insert, - .committed = insert->iter->pos, + .committed = iter->pos, .whiteout = *insert->k, .update_journal = !bkey_whiteout(&insert->k->k), @@ -1644,7 +1588,7 @@ bch2_insert_fixup_extent(struct btree_insert *trans, bkey_start_offset(&insert->k->k), insert->k->k.size); - ret = __bch2_insert_fixup_extent(&s); + __bch2_insert_fixup_extent(&s); extent_insert_committed(&s); @@ -1653,16 +1597,14 @@ bch2_insert_fixup_extent(struct btree_insert *trans, EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k))); EBUG_ON(bkey_cmp(iter->pos, s.committed)); - EBUG_ON((bkey_cmp(iter->pos, b->key.k.p) == 0) != - !!(iter->flags & BTREE_ITER_AT_END_OF_LEAF)); - - if (insert->k->k.size && (iter->flags & BTREE_ITER_AT_END_OF_LEAF)) - ret = BTREE_INSERT_NEED_TRAVERSE; - WARN_ONCE((ret == BTREE_INSERT_OK) != (insert->k->k.size == 0), - "ret %u insert->k.size %u", ret, insert->k->k.size); + if (insert->k->k.size) { + /* got to the end of this leaf node */ + BUG_ON(bkey_cmp(iter->pos, b->key.k.p)); + return BTREE_INSERT_NEED_TRAVERSE; + } - return ret; + return BTREE_INSERT_OK; } const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k) diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 0721d1829f98..66143d8d3895 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -12,7 +12,6 @@ struct btree_node_iter; struct btree_node_iter_large; struct btree_insert; struct btree_insert_entry; -struct extent_insert_hook; struct bch_devs_mask; union bch_extent_crc; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 2ab2d612e90c..195af78cb474 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -326,7 +326,7 @@ static int bch2_extent_update(struct btree_trans *trans, BTREE_INSERT_ENTRY(inode_iter, &inode_p.inode.k_i)); } - ret = bch2_trans_commit(trans, disk_res, NULL, + ret = bch2_trans_commit(trans, disk_res, &inode->ei_journal_seq, BTREE_INSERT_NOFAIL| BTREE_INSERT_ATOMIC| diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 67ddad95f91a..b67cf83f7fcd 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -229,7 +229,7 @@ retry: bch2_trans_begin(&trans); ret = bch2_write_inode_trans(&trans, inode, &inode_u, set, p) ?: - bch2_trans_commit(&trans, NULL, NULL, + bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK| @@ -391,7 +391,7 @@ retry: inode_update_for_create_fn, &inode_u) : 0) ?: - bch2_trans_commit(&trans, NULL, NULL, + bch2_trans_commit(&trans, NULL, &journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); @@ -535,7 +535,7 @@ retry: bch2_write_inode_trans(&trans, inode, &inode_u, inode_update_for_link_fn, NULL) ?: - bch2_trans_commit(&trans, NULL, NULL, + bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); @@ -622,7 +622,7 @@ retry: bch2_write_inode_trans(&trans, inode, &inode_u, inode_update_for_unlink_fn, NULL) ?: - bch2_trans_commit(&trans, NULL, NULL, + bch2_trans_commit(&trans, NULL, &dir->ei_journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK| @@ -832,7 +832,7 @@ retry: ? bch2_write_inode_trans(&trans, i.dst_inode, &dst_inode_u, inode_update_for_rename_fn, &i) : 0 ) ?: - bch2_trans_commit(&trans, NULL, NULL, + bch2_trans_commit(&trans, NULL, &journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); @@ -958,7 +958,7 @@ retry: (iattr->ia_valid & ATTR_MODE ? bch2_acl_chmod(&trans, inode, iattr->ia_mode, &acl) : 0) ?: - bch2_trans_commit(&trans, NULL, NULL, + bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK| diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 33fff198858a..2430833dbce8 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -73,8 +73,7 @@ static int reattach_inode(struct bch_fs *c, bch2_inode_pack(&packed, lostfound_inode); ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i, - NULL, NULL, NULL, - BTREE_INSERT_NOFAIL); + NULL, NULL, BTREE_INSERT_NOFAIL); if (ret) { bch_err(c, "error %i reattaching inode %llu while updating lost+found", ret, inum); @@ -202,7 +201,7 @@ retry: } ret = bch2_hash_delete_at(&trans, desc, info, iter) ?: - bch2_trans_commit(&trans, NULL, NULL, NULL, + bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL); err: @@ -290,6 +289,13 @@ fsck_err: return ret; } +static int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size) +{ + return bch2_btree_delete_range(c, BTREE_ID_EXTENTS, + POS(inode_nr, round_up(new_size, block_bytes(c)) >> 9), + POS(inode_nr + 1, 0), NULL); +} + /* * Walk extents: verify that extents have a corresponding S_ISREG inode, and * that i_size an i_sectors are consistent @@ -320,7 +326,7 @@ static int check_extents(struct bch_fs *c) k.k->type, k.k->p.inode, w.inode.bi_mode)) { bch2_btree_iter_unlock(&iter); - ret = bch2_inode_truncate(c, k.k->p.inode, 0, NULL, NULL); + ret = bch2_inode_truncate(c, k.k->p.inode, 0); if (ret) goto err; continue; @@ -342,10 +348,7 @@ static int check_extents(struct bch_fs *c) bch2_inode_pack(&p, &w.inode); ret = bch2_btree_insert(c, BTREE_ID_INODES, - &p.inode.k_i, - NULL, - NULL, - NULL, + &p.inode.k_i, NULL, NULL, BTREE_INSERT_NOFAIL); if (ret) { bch_err(c, "error in fs gc: error %i " @@ -366,8 +369,7 @@ static int check_extents(struct bch_fs *c) bch2_btree_iter_unlock(&iter); ret = bch2_inode_truncate(c, k.k->p.inode, - round_up(w.inode.bi_size, PAGE_SIZE) >> 9, - NULL, NULL); + w.inode.bi_size); if (ret) goto err; continue; @@ -508,7 +510,7 @@ static int check_dirents(struct bch_fs *c) bkey_reassemble(&n->k_i, d.s_c); n->v.d_type = mode_to_type(target.bi_mode); - ret = bch2_btree_insert_at(c, NULL, NULL, NULL, + ret = bch2_btree_insert_at(c, NULL, NULL, BTREE_INSERT_NOFAIL, BTREE_INSERT_ENTRY(iter, &n->k_i)); kfree(n); @@ -602,7 +604,7 @@ create_root: bch2_inode_pack(&packed, root_inode); return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i, - NULL, NULL, NULL, BTREE_INSERT_NOFAIL); + NULL, NULL, BTREE_INSERT_NOFAIL); } /* Get lost+found, create if it doesn't exist: */ @@ -646,7 +648,7 @@ create_lostfound: bch2_inode_pack(&packed, root_inode); ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i, - NULL, NULL, NULL, BTREE_INSERT_NOFAIL); + NULL, NULL, BTREE_INSERT_NOFAIL); if (ret) return ret; @@ -1094,9 +1096,7 @@ static int check_inode(struct bch_fs *c, * just switch units to bytes and that issue goes away */ - ret = bch2_inode_truncate(c, u.bi_inum, - round_up(u.bi_size, PAGE_SIZE) >> 9, - NULL, NULL); + ret = bch2_inode_truncate(c, u.bi_inum, u.bi_size); if (ret) { bch_err(c, "error in fs gc: error %i " "truncating inode", ret); @@ -1142,7 +1142,7 @@ static int check_inode(struct bch_fs *c, bch2_inode_pack(&p, &u); - ret = bch2_btree_insert_at(c, NULL, NULL, NULL, + ret = bch2_btree_insert_at(c, NULL, NULL, BTREE_INSERT_NOFAIL, BTREE_INSERT_ENTRY(iter, &p.inode.k_i)); if (ret && ret != -EINTR) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 002232ffed62..debdbf58dd79 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -371,33 +371,14 @@ int bch2_inode_create(struct bch_fs *c, struct bch_inode_unpacked *inode_u, __bch2_inode_create(&trans, inode_u, min, max, hint)); } -int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size, - struct extent_insert_hook *hook, u64 *journal_seq) -{ - return bch2_btree_delete_range(c, BTREE_ID_EXTENTS, - POS(inode_nr, new_size), - POS(inode_nr + 1, 0), - ZERO_VERSION, NULL, hook, - journal_seq); -} - int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) { struct btree_iter iter; struct bkey_i_inode_generation delete; + struct bpos start = POS(inode_nr, 0); + struct bpos end = POS(inode_nr + 1, 0); int ret; - ret = bch2_inode_truncate(c, inode_nr, 0, NULL, NULL); - if (ret < 0) - return ret; - - ret = bch2_btree_delete_range(c, BTREE_ID_XATTRS, - POS(inode_nr, 0), - POS(inode_nr + 1, 0), - ZERO_VERSION, NULL, NULL, NULL); - if (ret < 0) - return ret; - /* * If this was a directory, there shouldn't be any real dirents left - * but there could be whiteouts (from hash collisions) that we should @@ -406,11 +387,13 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) * XXX: the dirent could ideally would delete whiteouts when they're no * longer needed */ - ret = bch2_btree_delete_range(c, BTREE_ID_DIRENTS, - POS(inode_nr, 0), - POS(inode_nr + 1, 0), - ZERO_VERSION, NULL, NULL, NULL); - if (ret < 0) + ret = bch2_btree_delete_range(c, BTREE_ID_EXTENTS, + start, end, NULL) ?: + bch2_btree_delete_range(c, BTREE_ID_XATTRS, + start, end, NULL) ?: + bch2_btree_delete_range(c, BTREE_ID_DIRENTS, + start, end, NULL); + if (ret) return ret; bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(inode_nr, 0), @@ -454,7 +437,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) delete.v.bi_generation = cpu_to_le32(bi_generation); } - ret = bch2_btree_insert_at(c, NULL, NULL, NULL, + ret = bch2_btree_insert_at(c, NULL, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL, BTREE_INSERT_ENTRY(&iter, &delete.k_i)); diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index ce423a5f2af5..8713b51d3af7 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -46,8 +46,6 @@ int __bch2_inode_create(struct btree_trans *, int bch2_inode_create(struct bch_fs *, struct bch_inode_unpacked *, u64, u64, u64 *); -int bch2_inode_truncate(struct bch_fs *, u64, u64, - struct extent_insert_hook *, u64 *); int bch2_inode_rm(struct bch_fs *, u64); int bch2_inode_find_by_inum(struct bch_fs *, u64, diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index d1935ef1d6c3..50cc87b7875d 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -302,7 +302,7 @@ int bch2_write_index_default(struct bch_write_op *op) BTREE_ITER_INTENT); ret = bch2_btree_insert_list_at(&iter, keys, &op->res, - NULL, op_journal_seq(op), + op_journal_seq(op), BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE); bch2_btree_iter_unlock(&iter); @@ -1403,7 +1403,7 @@ retry: if (!bch2_extent_narrow_crcs(e, new_crc)) goto out; - ret = bch2_btree_insert_at(c, NULL, NULL, NULL, + ret = bch2_btree_insert_at(c, NULL, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_NOWAIT, diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 00c454673a04..16ea32dc1fa4 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -904,7 +904,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) bch2_disk_reservation_init(c, 0); ret = bch2_btree_insert(c, entry->btree_id, k, - &disk_res, NULL, NULL, + &disk_res, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_JOURNAL_REPLAY); } diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 8f618dc5160d..9337a8729a5b 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -79,7 +79,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) iter.pos = bkey_start_pos(&tmp.key.k); - ret = bch2_btree_insert_at(c, NULL, NULL, NULL, + ret = bch2_btree_insert_at(c, NULL, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL, BTREE_INSERT_ENTRY(&iter, &tmp.key)); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index b6310a60d5b7..93083cfff9bf 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -158,7 +158,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) break; ret = bch2_btree_insert_at(c, &op->res, - NULL, op_journal_seq(op), + op_journal_seq(op), BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE| diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 0a305ad08188..79a7f82868d6 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -541,7 +541,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags) ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS, POS(QTYP_USR, 0), POS(QTYP_USR + 1, 0), - ZERO_VERSION, NULL, NULL, NULL); + NULL); if (ret) return ret; } @@ -553,7 +553,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags) ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS, POS(QTYP_GRP, 0), POS(QTYP_GRP + 1, 0), - ZERO_VERSION, NULL, NULL, NULL); + NULL); if (ret) return ret; } @@ -565,7 +565,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags) ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS, POS(QTYP_PRJ, 0), POS(QTYP_PRJ + 1, 0), - ZERO_VERSION, NULL, NULL, NULL); + NULL); if (ret) return ret; } @@ -764,7 +764,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, if (qdq->d_fieldmask & QC_INO_HARD) new_quota.v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit); - ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0, + ret = bch2_btree_insert_at(c, NULL, NULL, 0, BTREE_INSERT_ENTRY(&iter, &new_quota.k_i)); bch2_btree_iter_unlock(&iter); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 624d97dc4537..3deb59a675e1 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -331,7 +331,7 @@ int bch2_fs_initialize(struct bch_fs *c) err = "error creating root directory"; ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed_inode.inode.k_i, - NULL, NULL, NULL, 0); + NULL, NULL, 0); if (ret) goto err; @@ -344,7 +344,7 @@ int bch2_fs_initialize(struct bch_fs *c) err = "error creating lost+found"; ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed_inode.inode.k_i, - NULL, NULL, NULL, 0); + NULL, NULL, 0); if (ret) goto err; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 63e4d97d15d7..ffeffd50b083 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1246,8 +1246,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) ret = bch2_btree_delete_range(c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0), POS(ca->dev_idx + 1, 0), - ZERO_VERSION, - NULL, NULL, NULL); + NULL); if (ret) { bch_err(ca, "Remove failed, error deleting alloc info"); goto err; diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index a408fa9ed8b5..f0d28b45a610 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -15,12 +15,12 @@ static void delete_test_keys(struct bch_fs *c) ret = bch2_btree_delete_range(c, BTREE_ID_EXTENTS, POS(0, 0), POS(0, U64_MAX), - ZERO_VERSION, NULL, NULL, NULL); + NULL); BUG_ON(ret); ret = bch2_btree_delete_range(c, BTREE_ID_DIRENTS, POS(0, 0), POS(0, U64_MAX), - ZERO_VERSION, NULL, NULL, NULL); + NULL); BUG_ON(ret); } @@ -40,7 +40,7 @@ static void test_delete(struct bch_fs *c, u64 nr) ret = bch2_btree_iter_traverse(&iter); BUG_ON(ret); - ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0, + ret = bch2_btree_insert_at(c, NULL, NULL, 0, BTREE_INSERT_ENTRY(&iter, &k.k_i)); BUG_ON(ret); @@ -69,7 +69,7 @@ static void test_delete_written(struct bch_fs *c, u64 nr) ret = bch2_btree_iter_traverse(&iter); BUG_ON(ret); - ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0, + ret = bch2_btree_insert_at(c, NULL, NULL, 0, BTREE_INSERT_ENTRY(&iter, &k.k_i)); BUG_ON(ret); @@ -99,7 +99,7 @@ static void test_iterate(struct bch_fs *c, u64 nr) k.k.p.offset = i; ret = bch2_btree_insert(c, BTREE_ID_DIRENTS, &k.k_i, - NULL, NULL, NULL, 0); + NULL, NULL, 0); BUG_ON(ret); } @@ -141,7 +141,7 @@ static void test_iterate_extents(struct bch_fs *c, u64 nr) k.k.size = 8; ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i, - NULL, NULL, NULL, 0); + NULL, NULL, 0); BUG_ON(ret); } @@ -186,7 +186,7 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr) k.k.p.offset = i * 2; ret = bch2_btree_insert(c, BTREE_ID_DIRENTS, &k.k_i, - NULL, NULL, NULL, 0); + NULL, NULL, 0); BUG_ON(ret); } @@ -236,7 +236,7 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) k.k.size = 8; ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i, - NULL, NULL, NULL, 0); + NULL, NULL, 0); BUG_ON(ret); } @@ -289,7 +289,7 @@ static void insert_test_extent(struct bch_fs *c, k.k_i.k.version.lo = test_version++; ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i, - NULL, NULL, NULL, 0); + NULL, NULL, 0); BUG_ON(ret); } @@ -352,7 +352,7 @@ static void rand_insert(struct bch_fs *c, u64 nr) k.k.p.offset = test_rand(); ret = bch2_btree_insert(c, BTREE_ID_DIRENTS, &k.k_i, - NULL, NULL, NULL, 0); + NULL, NULL, 0); BUG_ON(ret); } } @@ -393,7 +393,7 @@ static void rand_mixed(struct bch_fs *c, u64 nr) bkey_cookie_init(&k.k_i); k.k.p = iter.pos; - ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0, + ret = bch2_btree_insert_at(c, NULL, NULL, 0, BTREE_INSERT_ENTRY(&iter, &k.k_i)); BUG_ON(ret); } @@ -414,7 +414,7 @@ static void rand_delete(struct bch_fs *c, u64 nr) k.k.p.offset = test_rand(); ret = bch2_btree_insert(c, BTREE_ID_DIRENTS, &k, - NULL, NULL, NULL, 0); + NULL, NULL, 0); BUG_ON(ret); } } @@ -433,7 +433,7 @@ static void seq_insert(struct bch_fs *c, u64 nr) BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k) { insert.k.p = iter.pos; - ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0, + ret = bch2_btree_insert_at(c, NULL, NULL, 0, BTREE_INSERT_ENTRY(&iter, &insert.k_i)); BUG_ON(ret); @@ -465,7 +465,7 @@ static void seq_overwrite(struct bch_fs *c, u64 nr) bkey_reassemble(&u.k_i, k); - ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0, + ret = bch2_btree_insert_at(c, NULL, NULL, 0, BTREE_INSERT_ENTRY(&iter, &u.k_i)); BUG_ON(ret); } @@ -478,7 +478,7 @@ static void seq_delete(struct bch_fs *c, u64 nr) ret = bch2_btree_delete_range(c, BTREE_ID_DIRENTS, POS(0, 0), POS(0, U64_MAX), - ZERO_VERSION, NULL, NULL, NULL); + NULL); BUG_ON(ret); } -- cgit v1.2.3 From 1742237ba1db942b84a697509543fc5a9a25fcfa Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 27 Sep 2018 21:08:39 -0400 Subject: bcachefs: extent_for_each_ptr_decode() Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 31 ++++++++-------- fs/bcachefs/extents.c | 76 +++++++++++++++++++-------------------- fs/bcachefs/extents.h | 95 ++++++++++++++++++++++++++++--------------------- fs/bcachefs/fs-io.c | 8 ++--- fs/bcachefs/fs.c | 18 +++++----- fs/bcachefs/move.c | 23 ++++++------ fs/bcachefs/rebalance.c | 31 ++++++++-------- fs/bcachefs/sysfs.c | 12 +++---- 8 files changed, 154 insertions(+), 140 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index c6544f35eb09..84972b67f193 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -546,20 +546,19 @@ static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors) */ static void bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c_extent e, - const struct bch_extent_ptr *ptr, - struct bch_extent_crc_unpacked crc, + struct extent_ptr_decoded p, s64 sectors, enum bch_data_type data_type, unsigned replicas, struct bch_fs_usage *fs_usage, u64 journal_seq, unsigned flags) { struct bucket_mark old, new; - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - struct bucket *g = PTR_BUCKET(ca, ptr); + struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); + struct bucket *g = PTR_BUCKET(ca, &p.ptr); s64 uncompressed_sectors = sectors; u64 v; - if (crc.compression_type) { + if (p.crc.compression_type) { unsigned old_sectors, new_sectors; if (sectors > 0) { @@ -570,8 +569,8 @@ static void bch2_mark_pointer(struct bch_fs *c, new_sectors = e.k->size + sectors; } - sectors = -__disk_sectors(crc, old_sectors) - +__disk_sectors(crc, new_sectors); + sectors = -__disk_sectors(p.crc, old_sectors) + +__disk_sectors(p.crc, new_sectors); } /* @@ -584,8 +583,8 @@ static void bch2_mark_pointer(struct bch_fs *c, * caller's responsibility to not apply @fs_usage if gc is in progress. */ fs_usage->replicas - [!ptr->cached && replicas ? replicas - 1 : 0].data - [!ptr->cached ? data_type : BCH_DATA_CACHED] += + [!p.ptr.cached && replicas ? replicas - 1 : 0].data + [!p.ptr.cached ? data_type : BCH_DATA_CACHED] += uncompressed_sectors; if (flags & BCH_BUCKET_MARK_GC_WILL_VISIT) { @@ -607,14 +606,14 @@ static void bch2_mark_pointer(struct bch_fs *c, * the allocator invalidating a bucket after we've already * checked the gen */ - if (gen_after(new.gen, ptr->gen)) { + if (gen_after(new.gen, p.ptr.gen)) { BUG_ON(!test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags)); - EBUG_ON(!ptr->cached && + EBUG_ON(!p.ptr.cached && test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)); return; } - if (!ptr->cached) + if (!p.ptr.cached) checked_add(new.dirty_sectors, sectors); else checked_add(new.cached_sectors, sectors); @@ -695,13 +694,13 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT: case BCH_EXTENT_CACHED: { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; BUG_ON(!sectors); - extent_for_each_ptr_crc(e, ptr, crc) - bch2_mark_pointer(c, e, ptr, crc, sectors, data_type, + extent_for_each_ptr_decode(e, p, entry) + bch2_mark_pointer(c, e, p, sectors, data_type, replicas, stats, journal_seq, flags); break; } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 45d1f9f29d7c..0441e42bb1c4 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -231,21 +231,21 @@ unsigned bch2_extent_durability(struct bch_fs *c, struct bkey_s_c_extent e) unsigned bch2_extent_is_compressed(struct bkey_s_c k) { - struct bkey_s_c_extent e; - const struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; unsigned ret = 0; switch (k.k->type) { case BCH_EXTENT: - case BCH_EXTENT_CACHED: - e = bkey_s_c_to_extent(k); + case BCH_EXTENT_CACHED: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; - extent_for_each_ptr_crc(e, ptr, crc) - if (!ptr->cached && - crc.compression_type != BCH_COMPRESSION_NONE && - crc.compressed_size < crc.live_size) - ret = max_t(unsigned, ret, crc.compressed_size); + extent_for_each_ptr_decode(e, p, entry) + if (!p.ptr.cached && + p.crc.compression_type != BCH_COMPRESSION_NONE && + p.crc.compressed_size < p.crc.live_size) + ret = max_t(unsigned, ret, p.crc.compressed_size); + } } return ret; @@ -254,17 +254,17 @@ unsigned bch2_extent_is_compressed(struct bkey_s_c k) bool bch2_extent_matches_ptr(struct bch_fs *c, struct bkey_s_c_extent e, struct bch_extent_ptr m, u64 offset) { - const struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; - extent_for_each_ptr_crc(e, ptr, crc) - if (ptr->dev == m.dev && - ptr->gen == m.gen && - (s64) ptr->offset + crc.offset - bkey_start_offset(e.k) == + extent_for_each_ptr_decode(e, p, entry) + if (p.ptr.dev == m.dev && + p.ptr.gen == m.gen && + (s64) p.ptr.offset + p.crc.offset - bkey_start_offset(e.k) == (s64) m.offset - offset) - return ptr; + return true; - return NULL; + return false; } /* Doesn't cleanup redundant crcs */ @@ -323,7 +323,7 @@ bool bch2_extent_narrow_crcs(struct bkey_i_extent *e, struct bch_extent_crc_unpacked n) { struct bch_extent_crc_unpacked u; - struct bch_extent_ptr *ptr; + struct extent_ptr_decoded p; union bch_extent_entry *i; /* Find a checksum entry that covers only live data: */ @@ -345,11 +345,11 @@ bool bch2_extent_narrow_crcs(struct bkey_i_extent *e, bch2_extent_crc_append(e, n); restart_narrow_pointers: - extent_for_each_ptr_crc(extent_i_to_s(e), ptr, u) - if (can_narrow_crc(u, n)) { - ptr->offset += u.offset; - extent_ptr_append(e, *ptr); - __bch2_extent_drop_ptr(extent_i_to_s(e), ptr); + extent_for_each_ptr_decode(extent_i_to_s(e), p, i) + if (can_narrow_crc(p.crc, n)) { + i->ptr.offset += p.crc.offset; + extent_ptr_append(e, i->ptr); + __bch2_extent_drop_ptr(extent_i_to_s(e), &i->ptr); goto restart_narrow_pointers; } @@ -475,6 +475,8 @@ void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k) entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k)); entry = extent_entry_next(entry)) { switch (extent_entry_type(entry)) { + case BCH_EXTENT_ENTRY_ptr: + break; case BCH_EXTENT_ENTRY_crc32: entry->crc32.csum = swab32(entry->crc32.csum); break; @@ -488,8 +490,6 @@ void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k) entry->crc128.csum.lo = (__force __le64) swab64((__force u64) entry->crc128.csum.lo); break; - case BCH_EXTENT_ENTRY_ptr: - break; } } break; @@ -605,28 +605,28 @@ static int extent_pick_read_device(struct bch_fs *c, struct bch_devs_mask *avoid, struct extent_ptr_decoded *pick) { - const struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; struct bch_dev *ca; int ret = 0; - extent_for_each_ptr_crc(e, ptr, crc) { - ca = bch_dev_bkey_exists(c, ptr->dev); + extent_for_each_ptr_decode(e, p, entry) { + ca = bch_dev_bkey_exists(c, p.ptr.dev); - if (ptr->cached && ptr_stale(ca, ptr)) + if (p.ptr.cached && ptr_stale(ca, &p.ptr)) continue; - if (avoid && test_bit(ptr->dev, avoid->d)) - continue; + /* + * XXX: need to make avoid work correctly for stripe ptrs + */ - if (ret && !dev_latency_better(c, ptr, &pick->ptr)) + if (avoid && test_bit(p.ptr.dev, avoid->d)) continue; - *pick = (struct extent_ptr_decoded) { - .ptr = *ptr, - .crc = crc, - }; + if (ret && !dev_latency_better(c, &p.ptr, &pick->ptr)) + continue; + *pick = p; ret = 1; } diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 6c8498d4b295..b1b9c189867a 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -182,12 +182,24 @@ static inline size_t extent_entry_u64s(const union bch_extent_entry *entry) static inline bool extent_entry_is_ptr(const union bch_extent_entry *e) { - return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr; + switch (extent_entry_type(e)) { + case BCH_EXTENT_ENTRY_ptr: + return true; + default: + return false; + } } static inline bool extent_entry_is_crc(const union bch_extent_entry *e) { - return !extent_entry_is_ptr(e); + switch (extent_entry_type(e)) { + case BCH_EXTENT_ENTRY_crc32: + case BCH_EXTENT_ENTRY_crc64: + case BCH_EXTENT_ENTRY_crc128: + return true; + default: + return false; + } } union bch_extent_crc { @@ -310,23 +322,25 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) #define extent_for_each_entry(_e, _entry) \ extent_for_each_entry_from(_e, _entry, (_e).v->start) -/* Iterate over crcs only: */ +/* Iterate over pointers only: */ -#define __extent_crc_next(_e, _p) \ +#define extent_ptr_next(_e, _ptr) \ ({ \ - typeof(&(_e).v->start[0]) _entry = _p; \ + typeof(&(_e).v->start[0]) _entry; \ \ - while ((_entry) < extent_entry_last(_e) && \ - !extent_entry_is_crc(_entry)) \ - (_entry) = extent_entry_next(_entry); \ + extent_for_each_entry_from(_e, _entry, to_entry(_ptr)) \ + if (extent_entry_is_ptr(_entry)) \ + break; \ \ - entry_to_crc(_entry < extent_entry_last(_e) ? _entry : NULL); \ + _entry < extent_entry_last(_e) ? entry_to_ptr(_entry) : NULL; \ }) -#define __extent_for_each_crc(_e, _crc) \ - for ((_crc) = __extent_crc_next(_e, (_e).v->start); \ - (_crc); \ - (_crc) = __extent_crc_next(_e, extent_entry_next(to_entry(_crc)))) +#define extent_for_each_ptr(_e, _ptr) \ + for ((_ptr) = &(_e).v->start->ptr; \ + ((_ptr) = extent_ptr_next(_e, _ptr)); \ + (_ptr)++) + +/* Iterate over crcs only: */ #define extent_crc_next(_e, _crc, _iter) \ ({ \ @@ -347,43 +361,44 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) /* Iterate over pointers, with crcs: */ -#define extent_ptr_crc_next(_e, _ptr, _crc) \ +static inline struct extent_ptr_decoded +__extent_ptr_decoded_init(const struct bkey *k) +{ + return (struct extent_ptr_decoded) { + .crc = bch2_extent_crc_unpack(k, NULL), + }; +} + +#define EXTENT_ITERATE_EC (1 << 0) + +#define __extent_ptr_next_decode(_e, _ptr, _entry) \ ({ \ __label__ out; \ - typeof(&(_e).v->start[0]) _entry; \ \ - extent_for_each_entry_from(_e, _entry, to_entry(_ptr)) \ - if (extent_entry_is_crc(_entry)) { \ - (_crc) = bch2_extent_crc_unpack((_e).k, entry_to_crc(_entry));\ - } else { \ - _ptr = entry_to_ptr(_entry); \ + extent_for_each_entry_from(_e, _entry, _entry) \ + switch (extent_entry_type(_entry)) { \ + case BCH_EXTENT_ENTRY_ptr: \ + (_ptr).ptr = _entry->ptr; \ goto out; \ + case BCH_EXTENT_ENTRY_crc32: \ + case BCH_EXTENT_ENTRY_crc64: \ + case BCH_EXTENT_ENTRY_crc128: \ + (_ptr).crc = bch2_extent_crc_unpack((_e).k, \ + entry_to_crc(_entry)); \ + break; \ } \ \ - _ptr = NULL; \ out: \ - _ptr; \ + _entry < extent_entry_last(_e); \ }) -#define extent_for_each_ptr_crc(_e, _ptr, _crc) \ - for ((_crc) = bch2_extent_crc_unpack((_e).k, NULL), \ - (_ptr) = &(_e).v->start->ptr; \ - ((_ptr) = extent_ptr_crc_next(_e, _ptr, _crc)); \ - (_ptr)++) - -/* Iterate over pointers only, and from a given position: */ - -#define extent_ptr_next(_e, _ptr) \ -({ \ - struct bch_extent_crc_unpacked _crc; \ - \ - extent_ptr_crc_next(_e, _ptr, _crc); \ -}) +#define extent_for_each_ptr_decode(_e, _ptr, _entry) \ + for ((_ptr) = __extent_ptr_decoded_init((_e).k), \ + (_entry) = (_e).v->start; \ + __extent_ptr_next_decode(_e, _ptr, _entry); \ + (_entry) = extent_entry_next(_entry)) -#define extent_for_each_ptr(_e, _ptr) \ - for ((_ptr) = &(_e).v->start->ptr; \ - ((_ptr) = extent_ptr_next(_e, _ptr)); \ - (_ptr)++) +/* Iterate over pointers backwards: */ #define extent_ptr_prev(_e, _ptr) \ ({ \ diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 2902e5f925ef..eecf792198e4 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -920,12 +920,12 @@ static void bchfs_read(struct bch_fs *c, struct btree_iter *iter, if (bkey_extent_is_data(k.k)) { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - struct bch_extent_crc_unpacked crc; const union bch_extent_entry *i; + struct extent_ptr_decoded p; - extent_for_each_crc(e, crc, i) - want_full_extent |= ((crc.csum_type != 0) | - (crc.compression_type != 0)); + extent_for_each_ptr_decode(e, p, i) + want_full_extent |= ((p.crc.csum_type != 0) | + (p.crc.compression_type != 0)); } readpage_bio_extend(readpages_iter, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b67cf83f7fcd..88bf88c047ae 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1053,27 +1053,27 @@ static int bch2_fill_extent(struct fiemap_extent_info *info, { if (bkey_extent_is_data(&k->k)) { struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k); - const struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; int ret; - extent_for_each_ptr_crc(e, ptr, crc) { + extent_for_each_ptr_decode(e, p, entry) { int flags2 = 0; - u64 offset = ptr->offset; + u64 offset = p.ptr.offset; - if (crc.compression_type) + if (p.crc.compression_type) flags2 |= FIEMAP_EXTENT_ENCODED; else - offset += crc.offset; + offset += p.crc.offset; if ((offset & (PAGE_SECTORS - 1)) || (e.k->size & (PAGE_SECTORS - 1))) flags2 |= FIEMAP_EXTENT_NOT_ALIGNED; ret = fiemap_fill_next_extent(info, - bkey_start_offset(e.k) << 9, - offset << 9, - e.k->size << 9, flags|flags2); + bkey_start_offset(e.k) << 9, + offset << 9, + e.k->size << 9, flags|flags2); if (ret) return ret; } diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 96f04f349fb1..1e63d0e5ce53 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -67,8 +67,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op) struct bkey_i_extent *insert, *new = bkey_i_to_extent(bch2_keylist_front(keys)); BKEY_PADDED(k) _new, _insert; - struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; bool did_work = false; int nr; @@ -99,14 +99,15 @@ static int bch2_migrate_index_update(struct bch_write_op *op) bch2_cut_back(insert->k.p, &new->k); if (m->data_cmd == DATA_REWRITE) { - ptr = (struct bch_extent_ptr *) + struct bch_extent_ptr *ptr = (void *) bch2_extent_has_device(extent_i_to_s_c(insert), m->data_opts.rewrite_dev); + BUG_ON(!ptr); bch2_extent_drop_ptr(extent_i_to_s(insert), ptr); } - extent_for_each_ptr_crc(extent_i_to_s(new), ptr, crc) { - if (bch2_extent_has_device(extent_i_to_s_c(insert), ptr->dev)) { + extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) { + if (bch2_extent_has_device(extent_i_to_s_c(insert), p.ptr.dev)) { /* * raced with another move op? extent already * has a pointer to the device we just wrote @@ -115,8 +116,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op) continue; } - bch2_extent_crc_append(insert, crc); - extent_ptr_append(insert, *ptr); + bch2_extent_crc_append(insert, p.crc); + extent_ptr_append(insert, p.ptr); did_work = true; } @@ -379,8 +380,8 @@ static int bch2_move_extent(struct bch_fs *c, struct data_opts data_opts) { struct moving_io *io; - const struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; unsigned sectors = e.k->size, pages; int ret = -ENOMEM; @@ -393,8 +394,8 @@ static int bch2_move_extent(struct bch_fs *c, SECTORS_IN_FLIGHT_PER_DEVICE); /* write path might have to decompress data: */ - extent_for_each_ptr_crc(e, ptr, crc) - sectors = max_t(unsigned, sectors, crc.uncompressed_size); + extent_for_each_ptr_decode(e, p, entry) + sectors = max_t(unsigned, sectors, p.crc.uncompressed_size); pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); io = kzalloc(sizeof(struct moving_io) + diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 461af44dbde7..570dbae5a240 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -18,17 +18,16 @@ #include static inline bool rebalance_ptr_pred(struct bch_fs *c, - const struct bch_extent_ptr *ptr, - struct bch_extent_crc_unpacked crc, + struct extent_ptr_decoded p, struct bch_io_opts *io_opts) { if (io_opts->background_target && - !bch2_dev_in_target(c, ptr->dev, io_opts->background_target) && - !ptr->cached) + !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target) && + !p.ptr.cached) return true; if (io_opts->background_compression && - crc.compression_type != + p.crc.compression_type != bch2_compression_opt_to_type[io_opts->background_compression]) return true; @@ -39,8 +38,8 @@ void bch2_rebalance_add_key(struct bch_fs *c, struct bkey_s_c k, struct bch_io_opts *io_opts) { - const struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; struct bkey_s_c_extent e; if (!bkey_extent_is_data(k.k)) @@ -52,13 +51,13 @@ void bch2_rebalance_add_key(struct bch_fs *c, e = bkey_s_c_to_extent(k); - extent_for_each_ptr_crc(e, ptr, crc) - if (rebalance_ptr_pred(c, ptr, crc, io_opts)) { - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + extent_for_each_ptr_decode(e, p, entry) + if (rebalance_ptr_pred(c, p, io_opts)) { + struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); - if (atomic64_add_return(crc.compressed_size, + if (atomic64_add_return(p.crc.compressed_size, &ca->rebalance_work) == - crc.compressed_size) + p.crc.compressed_size) rebalance_wakeup(c); } } @@ -76,16 +75,16 @@ static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg, struct bch_io_opts *io_opts, struct data_opts *data_opts) { - const struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; /* Make sure we have room to add a new pointer: */ if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX > BKEY_EXTENT_VAL_U64s_MAX) return DATA_SKIP; - extent_for_each_ptr_crc(e, ptr, crc) - if (rebalance_ptr_pred(c, ptr, crc, io_opts)) + extent_for_each_ptr_decode(e, p, entry) + if (rebalance_ptr_pred(c, p, io_opts)) goto found; return DATA_SKIP; diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index b7a65bc20430..ee91bcc6433c 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -283,19 +283,19 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k) if (k.k->type == BCH_EXTENT) { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; - extent_for_each_ptr_crc(e, ptr, crc) { - if (crc.compression_type == BCH_COMPRESSION_NONE) { + extent_for_each_ptr_decode(e, p, entry) { + if (p.crc.compression_type == BCH_COMPRESSION_NONE) { nr_uncompressed_extents++; uncompressed_sectors += e.k->size; } else { nr_compressed_extents++; compressed_sectors_compressed += - crc.compressed_size; + p.crc.compressed_size; compressed_sectors_uncompressed += - crc.uncompressed_size; + p.crc.uncompressed_size; } /* only looking at the first ptr */ -- cgit v1.2.3 From 319f9ac38eaba628d69b6ddbf402b35487315fc1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 9 Nov 2018 01:24:07 -0500 Subject: bcachefs: revamp to_text methods Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 14 ++--- fs/bcachefs/alloc_background.h | 2 +- fs/bcachefs/bkey.c | 4 +- fs/bcachefs/bkey_methods.c | 60 +++++++------------ fs/bcachefs/bkey_methods.h | 16 ++--- fs/bcachefs/bset.c | 82 +++++++++++++------------- fs/bcachefs/bset.h | 4 +- fs/bcachefs/btree_cache.c | 85 +++++++++++++-------------- fs/bcachefs/btree_cache.h | 4 +- fs/bcachefs/btree_io.c | 65 ++++++++++---------- fs/bcachefs/btree_iter.c | 19 ++---- fs/bcachefs/btree_update_interior.c | 16 ++--- fs/bcachefs/debug.c | 17 +++--- fs/bcachefs/dirent.c | 15 ++--- fs/bcachefs/dirent.h | 2 +- fs/bcachefs/disk_groups.c | 56 +++++++----------- fs/bcachefs/disk_groups.h | 5 +- fs/bcachefs/extents.c | 92 ++++++++++++----------------- fs/bcachefs/extents.h | 5 +- fs/bcachefs/fs.c | 2 +- fs/bcachefs/fsck.c | 40 ++++++++----- fs/bcachefs/inode.c | 11 ++-- fs/bcachefs/inode.h | 2 +- fs/bcachefs/journal.c | 88 +++++++++++++--------------- fs/bcachefs/journal_io.c | 7 ++- fs/bcachefs/opts.c | 36 ++++++------ fs/bcachefs/opts.h | 7 ++- fs/bcachefs/quota.c | 15 ++--- fs/bcachefs/quota.h | 2 +- fs/bcachefs/rebalance.c | 34 +++++------ fs/bcachefs/replicas.c | 51 +++++++--------- fs/bcachefs/replicas.h | 3 +- fs/bcachefs/super-io.c | 25 ++++---- fs/bcachefs/super-io.h | 6 +- fs/bcachefs/super.c | 7 +-- fs/bcachefs/sysfs.c | 114 ++++++++++++++++-------------------- fs/bcachefs/util.c | 89 ++++++++++++---------------- fs/bcachefs/util.h | 33 +++++++++-- fs/bcachefs/xattr.c | 48 ++++++++------- fs/bcachefs/xattr.h | 2 +- 40 files changed, 550 insertions(+), 635 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index a4c4a08aed59..291d352ee370 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -94,17 +94,17 @@ const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k) return NULL; } -int bch2_alloc_to_text(struct bch_fs *c, char *buf, - size_t size, struct bkey_s_c k) +void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) { - buf[0] = '\0'; - switch (k.k->type) { - case BCH_ALLOC: + case BCH_ALLOC: { + struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); + + pr_buf(out, "gen %u", a.v->gen); break; } - - return 0; + } } static inline unsigned get_alloc_field(const u8 **p, unsigned bytes) diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 33224070e827..99535fa60214 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -9,7 +9,7 @@ #define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9) const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c); -int bch2_alloc_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c); +void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_alloc_ops (struct bkey_ops) { \ .key_invalid = bch2_alloc_invalid, \ diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index c0e86ada1c53..d7e022ba2027 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -60,8 +60,8 @@ static void bch2_bkey_pack_verify(const struct bkey_packed *packed, char buf1[160], buf2[160]; char buf3[160], buf4[160]; - bch2_bkey_to_text(buf1, sizeof(buf1), unpacked); - bch2_bkey_to_text(buf2, sizeof(buf2), &tmp); + bch2_bkey_to_text(&PBUF(buf1), unpacked); + bch2_bkey_to_text(&PBUF(buf2), &tmp); bch2_to_binary(buf3, (void *) unpacked, 80); bch2_to_binary(buf4, high_word(format, packed), 80); diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index b3f5f28b8761..7335fbbb3f61 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -111,7 +111,7 @@ void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) if (invalid) { char buf[160]; - bch2_bkey_val_to_text(c, type, buf, sizeof(buf), k); + bch2_bkey_val_to_text(&PBUF(buf), c, type, k); bch2_fs_bug(c, "invalid bkey %s: %s", buf, invalid); return; } @@ -121,73 +121,57 @@ void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) ops->key_debugcheck(c, b, k); } -#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__)) - -int bch2_bpos_to_text(char *buf, size_t size, struct bpos pos) +void bch2_bpos_to_text(struct printbuf *out, struct bpos pos) { - char *out = buf, *end = buf + size; - if (!bkey_cmp(pos, POS_MIN)) - p("POS_MIN"); + pr_buf(out, "POS_MIN"); else if (!bkey_cmp(pos, POS_MAX)) - p("POS_MAX"); + pr_buf(out, "POS_MAX"); else - p("%llu:%llu", pos.inode, pos.offset); - - return out - buf; + pr_buf(out, "%llu:%llu", pos.inode, pos.offset); } -int bch2_bkey_to_text(char *buf, size_t size, const struct bkey *k) +void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k) { - char *out = buf, *end = buf + size; - - p("u64s %u type %u ", k->u64s, k->type); + pr_buf(out, "u64s %u type %u ", k->u64s, k->type); - out += bch2_bpos_to_text(out, end - out, k->p); + bch2_bpos_to_text(out, k->p); - p(" snap %u len %u ver %llu", k->p.snapshot, k->size, k->version.lo); - - return out - buf; + pr_buf(out, " snap %u len %u ver %llu", + k->p.snapshot, k->size, k->version.lo); } -int bch2_val_to_text(struct bch_fs *c, enum bkey_type type, - char *buf, size_t size, struct bkey_s_c k) +void bch2_val_to_text(struct printbuf *out, struct bch_fs *c, + enum bkey_type type, struct bkey_s_c k) { const struct bkey_ops *ops = &bch2_bkey_ops[type]; - char *out = buf, *end = buf + size; switch (k.k->type) { case KEY_TYPE_DELETED: - p(" deleted"); + pr_buf(out, " deleted"); break; case KEY_TYPE_DISCARD: - p(" discard"); + pr_buf(out, " discard"); break; case KEY_TYPE_ERROR: - p(" error"); + pr_buf(out, " error"); break; case KEY_TYPE_COOKIE: - p(" cookie"); + pr_buf(out, " cookie"); break; default: if (k.k->type >= KEY_TYPE_GENERIC_NR && ops->val_to_text) - out += ops->val_to_text(c, out, end - out, k); + ops->val_to_text(out, c, k); break; } - - return out - buf; } -int bch2_bkey_val_to_text(struct bch_fs *c, enum bkey_type type, - char *buf, size_t size, struct bkey_s_c k) +void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c, + enum bkey_type type, struct bkey_s_c k) { - char *out = buf, *end = buf + size; - - out += bch2_bkey_to_text(out, end - out, k.k); - out += scnprintf(out, end - out, ": "); - out += bch2_val_to_text(c, type, out, end - out, k); - - return out - buf; + bch2_bkey_to_text(out, k.k); + pr_buf(out, ": "); + bch2_val_to_text(out, c, type, k); } void bch2_bkey_swab(enum bkey_type type, diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index 6ee774ba3d7a..be6041e92c05 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -46,8 +46,8 @@ struct bkey_ops { struct bkey_s_c); void (*key_debugcheck)(struct bch_fs *, struct btree *, struct bkey_s_c); - int (*val_to_text)(struct bch_fs *, char *, - size_t, struct bkey_s_c); + void (*val_to_text)(struct printbuf *, struct bch_fs *, + struct bkey_s_c); void (*swab)(const struct bkey_format *, struct bkey_packed *); key_filter_fn key_normalize; key_merge_fn key_merge; @@ -62,12 +62,12 @@ const char *bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c); void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c); -int bch2_bpos_to_text(char *, size_t, struct bpos); -int bch2_bkey_to_text(char *, size_t, const struct bkey *); -int bch2_val_to_text(struct bch_fs *, enum bkey_type, - char *, size_t, struct bkey_s_c); -int bch2_bkey_val_to_text(struct bch_fs *, enum bkey_type, - char *, size_t, struct bkey_s_c); +void bch2_bpos_to_text(struct printbuf *, struct bpos); +void bch2_bkey_to_text(struct printbuf *, const struct bkey *); +void bch2_val_to_text(struct printbuf *, struct bch_fs *, enum bkey_type, + struct bkey_s_c); +void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *, + enum bkey_type, struct bkey_s_c); void bch2_bkey_swab(enum bkey_type, const struct bkey_format *, struct bkey_packed *); diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index 7fc8fb85069f..ac84aac4a263 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -56,7 +56,7 @@ void bch2_dump_bset(struct btree *b, struct bset *i, unsigned set) _k = _n, k = n) { _n = bkey_next(_k); - bch2_bkey_to_text(buf, sizeof(buf), &k); + bch2_bkey_to_text(&PBUF(buf), &k); printk(KERN_ERR "block %u key %5u: %s\n", set, __btree_node_key_to_offset(b, _k), buf); @@ -106,7 +106,7 @@ void bch2_dump_btree_node_iter(struct btree *b, struct bkey uk = bkey_unpack_key(b, k); char buf[100]; - bch2_bkey_to_text(buf, sizeof(buf), &uk); + bch2_bkey_to_text(&PBUF(buf), &uk); printk(KERN_ERR "set %zu key %zi/%u: %s\n", t - b->set, k->_data - bset(b, t)->_data, bset(b, t)->u64s, buf); } @@ -150,8 +150,8 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter, char buf1[80], buf2[80]; bch2_dump_btree_node(b); - bch2_bkey_to_text(buf1, sizeof(buf1), &ku); - bch2_bkey_to_text(buf2, sizeof(buf2), &nu); + bch2_bkey_to_text(&PBUF(buf1), &ku); + bch2_bkey_to_text(&PBUF(buf2), &nu); printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n", buf1, buf2); printk(KERN_ERR "iter was:"); @@ -212,8 +212,8 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, char buf2[100]; bch2_dump_btree_node(b); - bch2_bkey_to_text(buf1, sizeof(buf1), &k1); - bch2_bkey_to_text(buf2, sizeof(buf2), &k2); + bch2_bkey_to_text(&PBUF(buf1), &k1); + bch2_bkey_to_text(&PBUF(buf2), &k2); panic("prev > insert:\n" "prev key %5u %s\n" @@ -234,8 +234,8 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, char buf2[100]; bch2_dump_btree_node(b); - bch2_bkey_to_text(buf1, sizeof(buf1), &k1); - bch2_bkey_to_text(buf2, sizeof(buf2), &k2); + bch2_bkey_to_text(&PBUF(buf1), &k1); + bch2_bkey_to_text(&PBUF(buf2), &k2); panic("insert > next:\n" "insert key %5u %s\n" @@ -1767,8 +1767,8 @@ void bch2_btree_keys_stats(struct btree *b, struct bset_stats *stats) } } -int bch2_bkey_print_bfloat(struct btree *b, struct bkey_packed *k, - char *buf, size_t size) +void bch2_bfloat_to_text(struct printbuf *out, struct btree *b, + struct bkey_packed *k) { struct bset_tree *t = bch2_bkey_to_bset(b, k); struct bkey_packed *l, *r, *p; @@ -1776,28 +1776,29 @@ int bch2_bkey_print_bfloat(struct btree *b, struct bkey_packed *k, char buf1[200], buf2[200]; unsigned j, inorder; - if (!size) - return 0; + if (out->pos != out->end) + *out->pos = '\0'; if (!bset_has_ro_aux_tree(t)) - goto out; + return; inorder = bkey_to_cacheline(b, t, k); if (!inorder || inorder >= t->size) - goto out; + return; j = __inorder_to_eytzinger1(inorder, t->size, t->extra); if (k != tree_to_bkey(b, t, j)) - goto out; + return; switch (bkey_float(b, t, j)->exponent) { case BFLOAT_FAILED_UNPACKED: uk = bkey_unpack_key(b, k); - return scnprintf(buf, size, - " failed unpacked at depth %u\n" - "\t%llu:%llu\n", - ilog2(j), - uk.p.inode, uk.p.offset); + pr_buf(out, + " failed unpacked at depth %u\n" + "\t%llu:%llu\n", + ilog2(j), + uk.p.inode, uk.p.offset); + break; case BFLOAT_FAILED_PREV: p = tree_to_prev_bkey(b, t, j); l = is_power_of_2(j) @@ -1812,28 +1813,27 @@ int bch2_bkey_print_bfloat(struct btree *b, struct bkey_packed *k, bch2_to_binary(buf1, high_word(&b->format, p), b->nr_key_bits); bch2_to_binary(buf2, high_word(&b->format, k), b->nr_key_bits); - return scnprintf(buf, size, - " failed prev at depth %u\n" - "\tkey starts at bit %u but first differing bit at %u\n" - "\t%llu:%llu\n" - "\t%llu:%llu\n" - "\t%s\n" - "\t%s\n", - ilog2(j), - bch2_bkey_greatest_differing_bit(b, l, r), - bch2_bkey_greatest_differing_bit(b, p, k), - uk.p.inode, uk.p.offset, - up.p.inode, up.p.offset, - buf1, buf2); + pr_buf(out, + " failed prev at depth %u\n" + "\tkey starts at bit %u but first differing bit at %u\n" + "\t%llu:%llu\n" + "\t%llu:%llu\n" + "\t%s\n" + "\t%s\n", + ilog2(j), + bch2_bkey_greatest_differing_bit(b, l, r), + bch2_bkey_greatest_differing_bit(b, p, k), + uk.p.inode, uk.p.offset, + up.p.inode, up.p.offset, + buf1, buf2); + break; case BFLOAT_FAILED_OVERFLOW: uk = bkey_unpack_key(b, k); - return scnprintf(buf, size, - " failed overflow at depth %u\n" - "\t%llu:%llu\n", - ilog2(j), - uk.p.inode, uk.p.offset); + pr_buf(out, + " failed overflow at depth %u\n" + "\t%llu:%llu\n", + ilog2(j), + uk.p.inode, uk.p.offset); + break; } -out: - *buf = '\0'; - return 0; } diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h index 1b0122dad2bc..5d03036620b9 100644 --- a/fs/bcachefs/bset.h +++ b/fs/bcachefs/bset.h @@ -607,8 +607,8 @@ struct bset_stats { }; void bch2_btree_keys_stats(struct btree *, struct bset_stats *); -int bch2_bkey_print_bfloat(struct btree *, struct bkey_packed *, - char *, size_t); +void bch2_bfloat_to_text(struct printbuf *, struct btree *, + struct bkey_packed *); /* Debug stuff */ diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 3cb3da363d11..846d5e816aa2 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -888,55 +888,54 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter, bch2_btree_node_fill(c, iter, k, level, SIX_LOCK_read, false); } -int bch2_print_btree_node(struct bch_fs *c, struct btree *b, - char *buf, size_t len) +void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, + struct btree *b) { const struct bkey_format *f = &b->format; struct bset_stats stats; - char ptrs[100]; memset(&stats, 0, sizeof(stats)); - bch2_val_to_text(c, BKEY_TYPE_BTREE, ptrs, sizeof(ptrs), - bkey_i_to_s_c(&b->key)); bch2_btree_keys_stats(b, &stats); - return scnprintf(buf, len, - "l %u %llu:%llu - %llu:%llu:\n" - " ptrs: %s\n" - " format: u64s %u fields %u %u %u %u %u\n" - " unpack fn len: %u\n" - " bytes used %zu/%zu (%zu%% full)\n" - " sib u64s: %u, %u (merge threshold %zu)\n" - " nr packed keys %u\n" - " nr unpacked keys %u\n" - " floats %zu\n" - " failed unpacked %zu\n" - " failed prev %zu\n" - " failed overflow %zu\n", - b->level, - b->data->min_key.inode, - b->data->min_key.offset, - b->data->max_key.inode, - b->data->max_key.offset, - ptrs, - f->key_u64s, - f->bits_per_field[0], - f->bits_per_field[1], - f->bits_per_field[2], - f->bits_per_field[3], - f->bits_per_field[4], - b->unpack_fn_len, - b->nr.live_u64s * sizeof(u64), - btree_bytes(c) - sizeof(struct btree_node), - b->nr.live_u64s * 100 / btree_max_u64s(c), - b->sib_u64s[0], - b->sib_u64s[1], - BTREE_FOREGROUND_MERGE_THRESHOLD(c), - b->nr.packed_keys, - b->nr.unpacked_keys, - stats.floats, - stats.failed_unpacked, - stats.failed_prev, - stats.failed_overflow); + pr_buf(out, + "l %u %llu:%llu - %llu:%llu:\n" + " ptrs: ", + b->level, + b->data->min_key.inode, + b->data->min_key.offset, + b->data->max_key.inode, + b->data->max_key.offset); + bch2_val_to_text(out, c, BKEY_TYPE_BTREE, + bkey_i_to_s_c(&b->key)); + pr_buf(out, "\n" + " format: u64s %u fields %u %u %u %u %u\n" + " unpack fn len: %u\n" + " bytes used %zu/%zu (%zu%% full)\n" + " sib u64s: %u, %u (merge threshold %zu)\n" + " nr packed keys %u\n" + " nr unpacked keys %u\n" + " floats %zu\n" + " failed unpacked %zu\n" + " failed prev %zu\n" + " failed overflow %zu\n", + f->key_u64s, + f->bits_per_field[0], + f->bits_per_field[1], + f->bits_per_field[2], + f->bits_per_field[3], + f->bits_per_field[4], + b->unpack_fn_len, + b->nr.live_u64s * sizeof(u64), + btree_bytes(c) - sizeof(struct btree_node), + b->nr.live_u64s * 100 / btree_max_u64s(c), + b->sib_u64s[0], + b->sib_u64s[1], + BTREE_FOREGROUND_MERGE_THRESHOLD(c), + b->nr.packed_keys, + b->nr.unpacked_keys, + stats.floats, + stats.failed_unpacked, + stats.failed_prev, + stats.failed_overflow); } diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index f7b9bcfe09a3..cb7f66fc8bd4 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -85,7 +85,7 @@ static inline unsigned btree_blocks(struct bch_fs *c) #define btree_node_root(_c, _b) ((_c)->btree_roots[(_b)->btree_id].b) -int bch2_print_btree_node(struct bch_fs *, struct btree *, - char *, size_t); +void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, + struct btree *); #endif /* _BCACHEFS_BTREE_CACHE_H */ diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 1036b72f1ae6..f1c31e74348a 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -913,26 +913,20 @@ static void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset) vstruct_end(i) - (void *) i->_data); } -static int btree_err_msg(struct bch_fs *c, struct btree *b, struct bset *i, - unsigned offset, int write, char *buf, size_t len) +static void btree_err_msg(struct printbuf *out, struct bch_fs *c, + struct btree *b, struct bset *i, + unsigned offset, int write) { - char *out = buf, *end = buf + len; - - out += scnprintf(out, end - out, - "error validating btree node %s" - "at btree %u level %u/%u\n" - "pos %llu:%llu node offset %u", - write ? "before write " : "", - b->btree_id, b->level, - c->btree_roots[b->btree_id].level, - b->key.k.p.inode, b->key.k.p.offset, - b->written); + pr_buf(out, "error validating btree node %s" + "at btree %u level %u/%u\n" + "pos %llu:%llu node offset %u", + write ? "before write " : "", + b->btree_id, b->level, + c->btree_roots[b->btree_id].level, + b->key.k.p.inode, b->key.k.p.offset, + b->written); if (i) - out += scnprintf(out, end - out, - " bset u64s %u", - le16_to_cpu(i->u64s)); - - return out - buf; + pr_buf(out, " bset u64s %u", le16_to_cpu(i->u64s)); } enum btree_err_type { @@ -949,10 +943,11 @@ enum btree_validate_ret { #define btree_err(type, c, b, i, msg, ...) \ ({ \ __label__ out; \ - char _buf[300], *out = _buf, *end = out + sizeof(_buf); \ + char _buf[300]; \ + struct printbuf out = PBUF(_buf); \ \ - out += btree_err_msg(c, b, i, b->written, write, out, end - out);\ - out += scnprintf(out, end - out, ": " msg, ##__VA_ARGS__); \ + btree_err_msg(&out, c, b, i, b->written, write); \ + pr_buf(&out, ": " msg, ##__VA_ARGS__); \ \ if (type == BTREE_ERR_FIXABLE && \ write == READ && \ @@ -1117,7 +1112,7 @@ static int validate_bset(struct bch_fs *c, struct btree *b, if (invalid) { char buf[160]; - bch2_bkey_val_to_text(c, type, buf, sizeof(buf), u); + bch2_bkey_val_to_text(&PBUF(buf), c, type, u); btree_err(BTREE_ERR_FIXABLE, c, b, i, "invalid bkey:\n%s\n%s", invalid, buf); @@ -1302,7 +1297,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry !bversion_cmp(u.k->version, MAX_VERSION))) { char buf[160]; - bch2_bkey_val_to_text(c, type, buf, sizeof(buf), u); + bch2_bkey_val_to_text(&PBUF(buf), c, type, u); btree_err(BTREE_ERR_FIXABLE, c, b, i, "invalid bkey %s: %s", buf, invalid); @@ -2060,7 +2055,7 @@ void bch2_btree_verify_flushed(struct bch_fs *c) ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf) { - char *out = buf, *end = buf + PAGE_SIZE; + struct printbuf out = _PBUF(buf, PAGE_SIZE); struct bucket_table *tbl; struct rhash_head *pos; struct btree *b; @@ -2077,18 +2072,18 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf) !(b->will_make_reachable & 1)) continue; - out += scnprintf(out, end - out, "%p d %u l %u w %u b %u r %u:%lu c %u p %u\n", - b, - (flags & (1 << BTREE_NODE_dirty)) != 0, - b->level, - b->written, - !list_empty_careful(&b->write_blocked), - b->will_make_reachable != 0, - b->will_make_reachable & 1, - b->writes[ idx].wait.list.first != NULL, - b->writes[!idx].wait.list.first != NULL); + pr_buf(&out, "%p d %u l %u w %u b %u r %u:%lu c %u p %u\n", + b, + (flags & (1 << BTREE_NODE_dirty)) != 0, + b->level, + b->written, + !list_empty_careful(&b->write_blocked), + b->will_make_reachable != 0, + b->will_make_reachable & 1, + b->writes[ idx].wait.list.first != NULL, + b->writes[!idx].wait.list.first != NULL); } rcu_read_unlock(); - return out - buf; + return out.pos - buf; } diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 1ba59c53c36f..ea37fa21ed6e 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -427,7 +427,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, char buf[100]; struct bkey uk = bkey_unpack_key(b, k); - bch2_bkey_to_text(buf, sizeof(buf), &uk); + bch2_bkey_to_text(&PBUF(buf), &uk); panic("prev key should be before iter pos:\n%s\n%llu:%llu\n", buf, iter->pos.inode, iter->pos.offset); } @@ -437,7 +437,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, char buf[100]; struct bkey uk = bkey_unpack_key(b, k); - bch2_bkey_to_text(buf, sizeof(buf), &uk); + bch2_bkey_to_text(&PBUF(buf), &uk); panic("iter should be after current key:\n" "iter pos %llu:%llu\n" "cur key %s\n", @@ -687,7 +687,7 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b) char buf[100]; struct bkey uk = bkey_unpack_key(b, k); - bch2_bkey_to_text(buf, sizeof(buf), &uk); + bch2_bkey_to_text(&PBUF(buf), &uk); panic("parent iter doesn't point to new node:\n%s\n%llu:%llu\n", buf, b->key.k.p.inode, b->key.k.p.offset); } @@ -1451,18 +1451,7 @@ recheck: : KEY_OFFSET_MAX) - n.p.offset)); - //EBUG_ON(!n.size); - if (!n.size) { - char buf[100]; - bch2_dump_btree_node(iter->l[0].b); - - bch2_bkey_to_text(buf, sizeof(buf), k.k); - panic("iter at %llu:%llu\n" - "next key %s\n", - iter->pos.inode, - iter->pos.offset, - buf); - } + EBUG_ON(!n.size); iter->k = n; iter->uptodate = BTREE_ITER_UPTODATE; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 4ec448718fd8..92bacd16fdc3 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2150,20 +2150,20 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) ssize_t bch2_btree_updates_print(struct bch_fs *c, char *buf) { - char *out = buf, *end = buf + PAGE_SIZE; + struct printbuf out = _PBUF(buf, PAGE_SIZE); struct btree_update *as; mutex_lock(&c->btree_interior_update_lock); list_for_each_entry(as, &c->btree_interior_update_list, list) - out += scnprintf(out, end - out, "%p m %u w %u r %u j %llu\n", - as, - as->mode, - as->nodes_written, - atomic_read(&as->cl.remaining) & CLOSURE_REMAINING_MASK, - as->journal.seq); + pr_buf(&out, "%p m %u w %u r %u j %llu\n", + as, + as->mode, + as->nodes_written, + atomic_read(&as->cl.remaining) & CLOSURE_REMAINING_MASK, + as->journal.seq); mutex_unlock(&c->btree_interior_update_lock); - return out - buf; + return out.pos - buf; } size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *c) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 550bb10bbb7b..0a9efe57d5a9 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -223,8 +223,8 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, k = bch2_btree_iter_peek(&iter); while (k.k && !(err = btree_iter_err(k))) { - bch2_bkey_val_to_text(i->c, bkey_type(0, i->id), - i->buf, sizeof(i->buf), k); + bch2_bkey_val_to_text(&PBUF(i->buf), i->c, + bkey_type(0, i->id), k); i->bytes = strlen(i->buf); BUG_ON(i->bytes >= PAGE_SIZE); i->buf[i->bytes] = '\n'; @@ -272,8 +272,8 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, return i->ret; for_each_btree_node(&iter, i->c, i->id, i->from, 0, b) { - i->bytes = bch2_print_btree_node(i->c, b, i->buf, - sizeof(i->buf)); + bch2_btree_node_to_text(&PBUF(i->buf), i->c, b); + i->bytes = strlen(i->buf); err = flush_buf(i); if (err) break; @@ -330,17 +330,16 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, bch2_btree_node_iter_peek(&l->iter, l->b); if (l->b != prev_node) { - i->bytes = bch2_print_btree_node(i->c, l->b, i->buf, - sizeof(i->buf)); + bch2_btree_node_to_text(&PBUF(i->buf), i->c, l->b); + i->bytes = strlen(i->buf); err = flush_buf(i); if (err) break; } prev_node = l->b; - i->bytes = bch2_bkey_print_bfloat(l->b, _k, i->buf, - sizeof(i->buf)); - + bch2_bfloat_to_text(&PBUF(i->buf), l->b, _k); + i->bytes = strlen(i->buf); err = flush_buf(i); if (err) break; diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 0651f5575131..c1a611b4d9ec 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -110,26 +110,23 @@ const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k) } } -int bch2_dirent_to_text(struct bch_fs *c, char *buf, - size_t size, struct bkey_s_c k) +void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) { - char *out = buf, *end = buf + size; struct bkey_s_c_dirent d; switch (k.k->type) { case BCH_DIRENT: d = bkey_s_c_to_dirent(k); - out += bch_scnmemcpy(out, end - out, d.v->d_name, - bch2_dirent_name_bytes(d)); - out += scnprintf(out, end - out, " -> %llu", d.v->d_inum); + bch_scnmemcpy(out, d.v->d_name, + bch2_dirent_name_bytes(d)); + pr_buf(out, " -> %llu", d.v->d_inum); break; case BCH_DIRENT_WHITEOUT: - out += scnprintf(out, end - out, "whiteout"); + pr_buf(out, "whiteout"); break; } - - return out - buf; } static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 30d2143d4ca7..2afb0baed11a 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -7,7 +7,7 @@ extern const struct bch_hash_desc bch2_dirent_hash_desc; const char *bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c); -int bch2_dirent_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c); +void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_dirent_ops (struct bkey_ops) { \ .key_invalid = bch2_dirent_invalid, \ diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index 48f472a384f1..ee10308131e9 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -83,11 +83,10 @@ err: return err; } -static size_t bch2_sb_disk_groups_to_text(char *buf, size_t size, +static void bch2_sb_disk_groups_to_text(struct printbuf *out, struct bch_sb *sb, struct bch_sb_field *f) { - char *out = buf, *end = buf + size; struct bch_sb_field_disk_groups *groups = field_to_type(f, disk_groups); struct bch_disk_group *g; @@ -97,18 +96,14 @@ static size_t bch2_sb_disk_groups_to_text(char *buf, size_t size, g < groups->entries + nr_groups; g++) { if (g != groups->entries) - out += scnprintf(out, end - out, " "); + pr_buf(out, " "); if (BCH_GROUP_DELETED(g)) - out += scnprintf(out, end - out, "[deleted]"); + pr_buf(out, "[deleted]"); else - out += scnprintf(out, end - out, - "[parent %llu name %s]", - BCH_GROUP_PARENT(g), - g->label); + pr_buf(out, "[parent %llu name %s]", + BCH_GROUP_PARENT(g), g->label); } - - return out - buf; } const struct bch_sb_field_ops bch_sb_field_ops_disk_groups = { @@ -343,10 +338,10 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *sb, const char *name) return v; } -int bch2_disk_path_print(struct bch_sb_handle *sb, - char *buf, size_t len, unsigned v) +void bch2_disk_path_to_text(struct printbuf *out, + struct bch_sb_handle *sb, + unsigned v) { - char *out = buf, *end = out + len; struct bch_sb_field_disk_groups *groups = bch2_sb_get_disk_groups(sb->sb); struct bch_disk_group *g; @@ -374,26 +369,18 @@ int bch2_disk_path_print(struct bch_sb_handle *sb, } while (nr) { - unsigned b = 0; - v = path[--nr]; g = groups->entries + v; - if (end != out) - b = min_t(size_t, end - out, - strnlen(g->label, sizeof(g->label))); - memcpy(out, g->label, b); - if (b < end - out) - out[b] = '\0'; - out += b; + bch_scnmemcpy(out, g->label, + strnlen(g->label, sizeof(g->label))); if (nr) - out += scnprintf(out, end - out, "."); + pr_buf(out, "."); } - - return out - buf; + return; inval: - return scnprintf(buf, len, "invalid group %u", v); + pr_buf(out, "invalid group %u", v); } int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) @@ -452,14 +439,14 @@ int bch2_opt_target_parse(struct bch_fs *c, const char *buf, u64 *v) return -EINVAL; } -int bch2_opt_target_print(struct bch_fs *c, char *buf, size_t len, u64 v) +void bch2_opt_target_to_text(struct printbuf *out, struct bch_fs *c, u64 v) { struct target t = target_decode(v); - int ret; switch (t.type) { case TARGET_NULL: - return scnprintf(buf, len, "none"); + pr_buf(out, "none"); + break; case TARGET_DEV: { struct bch_dev *ca; @@ -469,13 +456,12 @@ int bch2_opt_target_print(struct bch_fs *c, char *buf, size_t len, u64 v) : NULL; if (ca && percpu_ref_tryget(&ca->io_ref)) { - ret = scnprintf(buf, len, "/dev/%pg", - ca->disk_sb.bdev); + pr_buf(out, "/dev/%pg", ca->disk_sb.bdev); percpu_ref_put(&ca->io_ref); } else if (ca) { - ret = scnprintf(buf, len, "offline device %u", t.dev); + pr_buf(out, "offline device %u", t.dev); } else { - ret = scnprintf(buf, len, "invalid device %u", t.dev); + pr_buf(out, "invalid device %u", t.dev); } rcu_read_unlock(); @@ -483,12 +469,10 @@ int bch2_opt_target_print(struct bch_fs *c, char *buf, size_t len, u64 v) } case TARGET_GROUP: mutex_lock(&c->sb_lock); - ret = bch2_disk_path_print(&c->disk_sb, buf, len, t.group); + bch2_disk_path_to_text(out, &c->disk_sb, t.group); mutex_unlock(&c->sb_lock); break; default: BUG(); } - - return ret; } diff --git a/fs/bcachefs/disk_groups.h b/fs/bcachefs/disk_groups.h index d202eb3a9de6..ceb75f86b615 100644 --- a/fs/bcachefs/disk_groups.h +++ b/fs/bcachefs/disk_groups.h @@ -59,10 +59,11 @@ bool bch2_dev_in_target(struct bch_fs *, unsigned, unsigned); int bch2_disk_path_find(struct bch_sb_handle *, const char *); int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *); -int bch2_disk_path_print(struct bch_sb_handle *, char *, size_t, unsigned); +void bch2_disk_path_to_text(struct printbuf *, struct bch_sb_handle *, + unsigned); int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *); -int bch2_opt_target_print(struct bch_fs *, char *, size_t, u64); +void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, u64); int bch2_sb_disk_groups_to_cpu(struct bch_fs *); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 1606826e7802..a7223e7c8793 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -464,21 +464,18 @@ static const char *extent_ptr_invalid(const struct bch_fs *c, return NULL; } -static size_t extent_print_ptrs(struct bch_fs *c, char *buf, - size_t size, struct bkey_s_c_extent e) +static void extent_print_ptrs(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c_extent e) { - char *out = buf, *end = buf + size; const union bch_extent_entry *entry; struct bch_extent_crc_unpacked crc; const struct bch_extent_ptr *ptr; struct bch_dev *ca; bool first = true; -#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__)) - extent_for_each_entry(e, entry) { if (!first) - p(" "); + pr_buf(out, " "); switch (__extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_crc32: @@ -486,12 +483,12 @@ static size_t extent_print_ptrs(struct bch_fs *c, char *buf, case BCH_EXTENT_ENTRY_crc128: crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry)); - p("crc: c_size %u size %u offset %u nonce %u csum %u compress %u", - crc.compressed_size, - crc.uncompressed_size, - crc.offset, crc.nonce, - crc.csum_type, - crc.compression_type); + pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u", + crc.compressed_size, + crc.uncompressed_size, + crc.offset, crc.nonce, + crc.csum_type, + crc.compression_type); break; case BCH_EXTENT_ENTRY_ptr: ptr = entry_to_ptr(entry); @@ -499,14 +496,14 @@ static size_t extent_print_ptrs(struct bch_fs *c, char *buf, ? bch_dev_bkey_exists(c, ptr->dev) : NULL; - p("ptr: %u:%llu gen %u%s%s", ptr->dev, - (u64) ptr->offset, ptr->gen, - ptr->cached ? " cached" : "", - ca && ptr_stale(ca, ptr) - ? " stale" : ""); + pr_buf(out, "ptr: %u:%llu gen %u%s%s", ptr->dev, + (u64) ptr->offset, ptr->gen, + ptr->cached ? " cached" : "", + ca && ptr_stale(ca, ptr) + ? " stale" : ""); break; default: - p("(invalid extent entry %.16llx)", *((u64 *) entry)); + pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); goto out; } @@ -514,9 +511,7 @@ static size_t extent_print_ptrs(struct bch_fs *c, char *buf, } out: if (bkey_extent_is_cached(e.k)) - p(" cached"); -#undef p - return out - buf; + pr_buf(out, " cached"); } static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f, @@ -681,8 +676,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) && !bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) { - bch2_bkey_val_to_text(c, btree_node_type(b), - buf, sizeof(buf), k); + bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k); bch2_fs_bug(c, "btree key bad (replicas not marked in superblock):\n%s", buf); @@ -691,29 +685,23 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, return; err: - bch2_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k); - bch2_fs_bug(c, "%s btree pointer %s: bucket %zi " - "gen %i mark %08x", - err, buf, PTR_BUCKET_NR(ca, ptr), - mark.gen, (unsigned) mark.v.counter); + bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k); + bch2_fs_bug(c, "%s btree pointer %s: bucket %zi gen %i mark %08x", + err, buf, PTR_BUCKET_NR(ca, ptr), + mark.gen, (unsigned) mark.v.counter); } -int bch2_btree_ptr_to_text(struct bch_fs *c, char *buf, - size_t size, struct bkey_s_c k) +void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) { - char *out = buf, *end = buf + size; const char *invalid; -#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__)) - if (bkey_extent_is_data(k.k)) - out += extent_print_ptrs(c, buf, size, bkey_s_c_to_extent(k)); + extent_print_ptrs(out, c, bkey_s_c_to_extent(k)); invalid = bch2_btree_ptr_invalid(c, k); if (invalid) - p(" invalid: %s", invalid); -#undef p - return out - buf; + pr_buf(out, " invalid: %s", invalid); } int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b, @@ -1112,8 +1100,8 @@ static void verify_extent_nonoverlapping(struct btree *b, char buf1[100]; char buf2[100]; - bch2_bkey_to_text(buf1, sizeof(buf1), &insert->k); - bch2_bkey_to_text(buf2, sizeof(buf2), &uk); + bch2_bkey_to_text(&PBUF(buf1), &insert->k); + bch2_bkey_to_text(&PBUF(buf2), &uk); bch2_dump_btree_node(b); panic("insert > next :\n" @@ -1705,8 +1693,8 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, } if (replicas > BCH_REPLICAS_MAX) { - bch2_bkey_val_to_text(c, btree_node_type(b), buf, - sizeof(buf), e.s_c); + bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), + e.s_c); bch2_fs_bug(c, "extent key bad (too many replicas: %u): %s", replicas, buf); @@ -1715,8 +1703,8 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) && !bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) { - bch2_bkey_val_to_text(c, btree_node_type(b), - buf, sizeof(buf), e.s_c); + bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), + e.s_c); bch2_fs_bug(c, "extent key bad (replicas not marked in superblock):\n%s", buf); @@ -1726,12 +1714,11 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, return; bad_ptr: - bch2_bkey_val_to_text(c, btree_node_type(b), buf, - sizeof(buf), e.s_c); + bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), + e.s_c); bch2_fs_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu " "gen %i type %u", buf, PTR_BUCKET_NR(ca, ptr), mark.gen, mark.data_type); - return; } void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) @@ -1748,22 +1735,17 @@ void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k } } -int bch2_extent_to_text(struct bch_fs *c, char *buf, - size_t size, struct bkey_s_c k) +void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) { - char *out = buf, *end = buf + size; const char *invalid; -#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__)) - if (bkey_extent_is_data(k.k)) - out += extent_print_ptrs(c, buf, size, bkey_s_c_to_extent(k)); + extent_print_ptrs(out, c, bkey_s_c_to_extent(k)); invalid = bch2_extent_invalid(c, k); if (invalid) - p(" invalid: %s", invalid); -#undef p - return out - buf; + pr_buf(out, " invalid: %s", invalid); } static void bch2_extent_crc_init(union bch_extent_crc *crc, diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 8754a940a476..d121ce5b3225 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -18,7 +18,8 @@ union bch_extent_crc; const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c); -int bch2_btree_ptr_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c); +void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, + struct bkey_s_c); void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *); #define bch2_bkey_btree_ops (struct bkey_ops) { \ @@ -30,7 +31,7 @@ void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *); const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c); -int bch2_extent_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c); +void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_ptr_normalize(struct bch_fs *, struct btree *, struct bkey_s); enum merge_result bch2_extent_merge(struct bch_fs *, struct btree *, struct bkey_i *, struct bkey_i *); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 88bf88c047ae..b6fe2059fe5f 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1619,7 +1619,7 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root) if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) continue; - bch2_opt_to_text(c, buf, sizeof(buf), opt, v, + bch2_opt_to_text(&PBUF(buf), c, opt, v, OPT_SHOW_MOUNT_STYLE); seq_putc(seq, ','); seq_puts(seq, buf); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 7e08592253a6..74b83201c213 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -234,8 +234,9 @@ static int hash_check_duplicates(const struct bch_hash_desc desc, if (fsck_err_on(k2.k->type == desc.key_type && !desc.cmp_bkey(k, k2), c, "duplicate hash table keys:\n%s", - (bch2_bkey_val_to_text(c, bkey_type(0, desc.btree_id), - buf, sizeof(buf), k), buf))) { + (bch2_bkey_val_to_text(&PBUF(buf), c, + bkey_type(0, desc.btree_id), + k), buf))) { ret = fsck_hash_delete_at(desc, &h->info, k_iter); if (ret) return ret; @@ -298,8 +299,9 @@ static int hash_check_key(const struct bch_hash_desc desc, "hashed to %llu chain starts at %llu\n%s", desc.btree_id, k.k->p.offset, hashed, h->chain->pos.offset, - (bch2_bkey_val_to_text(c, bkey_type(0, desc.btree_id), - buf, sizeof(buf), k), buf))) { + (bch2_bkey_val_to_text(&PBUF(buf), c, + bkey_type(0, desc.btree_id), + k), buf))) { ret = hash_redo_key(desc, h, c, k_iter, k, hashed); if (ret) { bch_err(c, "hash_redo_key err %i", ret); @@ -382,8 +384,9 @@ err_redo: "hashed to %llu chain starts at %llu\n%s", buf, strlen(buf), BTREE_ID_DIRENTS, k->k->p.offset, hash, h->chain->pos.offset, - (bch2_bkey_val_to_text(c, bkey_type(0, BTREE_ID_DIRENTS), - buf, sizeof(buf), *k), buf))) { + (bch2_bkey_val_to_text(&PBUF(buf), c, + bkey_type(0, BTREE_ID_DIRENTS), + *k), buf))) { ret = hash_redo_key(bch2_dirent_hash_desc, h, c, iter, *k, hash); if (ret) @@ -525,13 +528,15 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(!w.have_inode, c, "dirent in nonexisting directory:\n%s", - (bch2_bkey_val_to_text(c, (enum bkey_type) BTREE_ID_DIRENTS, - buf, sizeof(buf), k), buf)) || + (bch2_bkey_val_to_text(&PBUF(buf), c, + (enum bkey_type) BTREE_ID_DIRENTS, + k), buf)) || fsck_err_on(!S_ISDIR(w.inode.bi_mode), c, "dirent in non directory inode type %u:\n%s", mode_to_type(w.inode.bi_mode), - (bch2_bkey_val_to_text(c, (enum bkey_type) BTREE_ID_DIRENTS, - buf, sizeof(buf), k), buf))) { + (bch2_bkey_val_to_text(&PBUF(buf), c, + (enum bkey_type) BTREE_ID_DIRENTS, + k), buf))) { ret = bch2_btree_delete_at(iter, 0); if (ret) goto err; @@ -580,8 +585,9 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(d_inum == d.k->p.inode, c, "dirent points to own directory:\n%s", - (bch2_bkey_val_to_text(c, (enum bkey_type) BTREE_ID_DIRENTS, - buf, sizeof(buf), k), buf))) { + (bch2_bkey_val_to_text(&PBUF(buf), c, + (enum bkey_type) BTREE_ID_DIRENTS, + k), buf))) { ret = remove_dirent(c, iter, d); if (ret) goto err; @@ -597,8 +603,9 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(!have_target, c, "dirent points to missing inode:\n%s", - (bch2_bkey_val_to_text(c, (enum bkey_type) BTREE_ID_DIRENTS, - buf, sizeof(buf), k), buf))) { + (bch2_bkey_val_to_text(&PBUF(buf), c, + (enum bkey_type) BTREE_ID_DIRENTS, + k), buf))) { ret = remove_dirent(c, iter, d); if (ret) goto err; @@ -610,8 +617,9 @@ static int check_dirents(struct bch_fs *c) mode_to_type(target.bi_mode), c, "incorrect d_type: should be %u:\n%s", mode_to_type(target.bi_mode), - (bch2_bkey_val_to_text(c, (enum bkey_type) BTREE_ID_DIRENTS, - buf, sizeof(buf), k), buf))) { + (bch2_bkey_val_to_text(&PBUF(buf), c, + (enum bkey_type) BTREE_ID_DIRENTS, + k), buf))) { struct bkey_i_dirent *n; n = kmalloc(bkey_bytes(d.k), GFP_KERNEL); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index debdbf58dd79..0a350c6d0932 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -228,10 +228,9 @@ const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k) } } -int bch2_inode_to_text(struct bch_fs *c, char *buf, - size_t size, struct bkey_s_c k) +void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) { - char *out = buf, *end = out + size; struct bkey_s_c_inode inode; struct bch_inode_unpacked unpacked; @@ -239,18 +238,16 @@ int bch2_inode_to_text(struct bch_fs *c, char *buf, case BCH_INODE_FS: inode = bkey_s_c_to_inode(k); if (bch2_inode_unpack(inode, &unpacked)) { - out += scnprintf(out, end - out, "(unpack error)"); + pr_buf(out, "(unpack error)"); break; } #define BCH_INODE_FIELD(_name, _bits) \ - out += scnprintf(out, end - out, #_name ": %llu ", (u64) unpacked._name); + pr_buf(out, #_name ": %llu ", (u64) unpacked._name); BCH_INODE_FIELDS() #undef BCH_INODE_FIELD break; } - - return out - buf; } void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 8713b51d3af7..897ff65d01cb 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -7,7 +7,7 @@ #include const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c); -int bch2_inode_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c); +void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_inode_ops (struct bkey_ops) { \ .key_invalid = bch2_inode_invalid, \ diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 7499e15a2982..b4d037664628 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1027,38 +1027,38 @@ out: ssize_t bch2_journal_print_debug(struct journal *j, char *buf) { + struct printbuf out = _PBUF(buf, PAGE_SIZE); struct bch_fs *c = container_of(j, struct bch_fs, journal); union journal_res_state *s = &j->reservations; struct bch_dev *ca; unsigned iter; - ssize_t ret = 0; rcu_read_lock(); spin_lock(&j->lock); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "active journal entries:\t%llu\n" - "seq:\t\t\t%llu\n" - "last_seq:\t\t%llu\n" - "last_seq_ondisk:\t%llu\n" - "reservation count:\t%u\n" - "reservation offset:\t%u\n" - "current entry u64s:\t%u\n" - "io in flight:\t\t%i\n" - "need write:\t\t%i\n" - "dirty:\t\t\t%i\n" - "replay done:\t\t%i\n", - fifo_used(&j->pin), - journal_cur_seq(j), - journal_last_seq(j), - j->last_seq_ondisk, - journal_state_count(*s, s->idx), - s->cur_entry_offset, - j->cur_entry_u64s, - s->prev_buf_unwritten, - test_bit(JOURNAL_NEED_WRITE, &j->flags), - journal_entry_is_open(j), - test_bit(JOURNAL_REPLAY_DONE, &j->flags)); + pr_buf(&out, + "active journal entries:\t%llu\n" + "seq:\t\t\t%llu\n" + "last_seq:\t\t%llu\n" + "last_seq_ondisk:\t%llu\n" + "reservation count:\t%u\n" + "reservation offset:\t%u\n" + "current entry u64s:\t%u\n" + "io in flight:\t\t%i\n" + "need write:\t\t%i\n" + "dirty:\t\t\t%i\n" + "replay done:\t\t%i\n", + fifo_used(&j->pin), + journal_cur_seq(j), + journal_last_seq(j), + j->last_seq_ondisk, + journal_state_count(*s, s->idx), + s->cur_entry_offset, + j->cur_entry_u64s, + s->prev_buf_unwritten, + test_bit(JOURNAL_NEED_WRITE, &j->flags), + journal_entry_is_open(j), + test_bit(JOURNAL_REPLAY_DONE, &j->flags)); for_each_member_device_rcu(ca, c, iter, &c->rw_devs[BCH_DATA_JOURNAL]) { @@ -1067,50 +1067,46 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf) if (!ja->nr) continue; - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "dev %u:\n" - "\tnr\t\t%u\n" - "\tcur_idx\t\t%u (seq %llu)\n" - "\tlast_idx\t%u (seq %llu)\n", - iter, ja->nr, - ja->cur_idx, ja->bucket_seq[ja->cur_idx], - ja->last_idx, ja->bucket_seq[ja->last_idx]); + pr_buf(&out, + "dev %u:\n" + "\tnr\t\t%u\n" + "\tcur_idx\t\t%u (seq %llu)\n" + "\tlast_idx\t%u (seq %llu)\n", + iter, ja->nr, + ja->cur_idx, ja->bucket_seq[ja->cur_idx], + ja->last_idx, ja->bucket_seq[ja->last_idx]); } spin_unlock(&j->lock); rcu_read_unlock(); - return ret; + return out.pos - buf; } ssize_t bch2_journal_print_pins(struct journal *j, char *buf) { + struct printbuf out = _PBUF(buf, PAGE_SIZE); struct journal_entry_pin_list *pin_list; struct journal_entry_pin *pin; - ssize_t ret = 0; u64 i; spin_lock(&j->lock); fifo_for_each_entry_ptr(pin_list, &j->pin, i) { - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "%llu: count %u\n", - i, atomic_read(&pin_list->count)); + pr_buf(&out, "%llu: count %u\n", + i, atomic_read(&pin_list->count)); list_for_each_entry(pin, &pin_list->list, list) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "\t%p %pf\n", - pin, pin->flush); + pr_buf(&out, "\t%p %pf\n", + pin, pin->flush); if (!list_empty(&pin_list->flushed)) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "flushed:\n"); + pr_buf(&out, "flushed:\n"); list_for_each_entry(pin, &pin_list->flushed, list) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "\t%p %pf\n", - pin, pin->flush); + pr_buf(&out, "\t%p %pf\n", + pin, pin->flush); } spin_unlock(&j->lock); - return ret; + return out.pos - buf; } diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index eb2fbe235483..4555d55b23dd 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -146,7 +146,6 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, { void *next = vstruct_next(entry); const char *invalid; - char buf[160]; int ret = 0; if (journal_entry_err_on(!k->k.u64s, c, @@ -179,8 +178,10 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, invalid = bch2_bkey_invalid(c, key_type, bkey_i_to_s_c(k)); if (invalid) { - bch2_bkey_val_to_text(c, key_type, buf, sizeof(buf), - bkey_i_to_s_c(k)); + char buf[160]; + + bch2_bkey_val_to_text(&PBUF(buf), c, key_type, + bkey_i_to_s_c(k)); mustfix_fsck_err(c, "invalid %s in journal: %s\n%s", type, invalid, buf); diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index 9351caeb6630..c12af1a86f0b 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -145,7 +145,7 @@ const struct bch_option bch2_opt_table[] = { #define OPT_STR(_choices) .type = BCH_OPT_STR, .choices = _choices #define OPT_FN(_fn) .type = BCH_OPT_FN, \ .parse = _fn##_parse, \ - .print = _fn##_print + .to_text = _fn##_to_text #define BCH_OPT(_name, _bits, _mode, _type, _sb_opt, _default) \ [Opt_##_name] = { \ @@ -235,38 +235,38 @@ int bch2_opt_parse(struct bch_fs *c, const struct bch_option *opt, return 0; } -int bch2_opt_to_text(struct bch_fs *c, char *buf, size_t len, - const struct bch_option *opt, u64 v, - unsigned flags) +void bch2_opt_to_text(struct printbuf *out, struct bch_fs *c, + const struct bch_option *opt, u64 v, + unsigned flags) { - char *out = buf, *end = buf + len; - if (flags & OPT_SHOW_MOUNT_STYLE) { - if (opt->type == BCH_OPT_BOOL) - return scnprintf(out, end - out, "%s%s", - v ? "" : "no", - opt->attr.name); + if (opt->type == BCH_OPT_BOOL) { + pr_buf(out, "%s%s", + v ? "" : "no", + opt->attr.name); + return; + } - out += scnprintf(out, end - out, "%s=", opt->attr.name); + pr_buf(out, "%s=", opt->attr.name); } switch (opt->type) { case BCH_OPT_BOOL: case BCH_OPT_UINT: - out += scnprintf(out, end - out, "%lli", v); + pr_buf(out, "%lli", v); break; case BCH_OPT_STR: - out += (flags & OPT_SHOW_FULL_LIST) - ? bch2_scnprint_string_list(out, end - out, opt->choices, v) - : scnprintf(out, end - out, opt->choices[v]); + if (flags & OPT_SHOW_FULL_LIST) + bch2_string_opt_to_text(out, opt->choices, v); + else + pr_buf(out, opt->choices[v]); break; case BCH_OPT_FN: - return opt->print(c, out, end - out, v); + opt->to_text(out, c, v); + break; default: BUG(); } - - return out - buf; } int bch2_parse_mount_opts(struct bch_opts *opts, char *options) diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 52fb9781d933..47617cd011ff 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -229,6 +229,7 @@ enum bch_opt_id { }; struct bch_fs; +struct printbuf; struct bch_option { struct attribute attr; @@ -245,7 +246,7 @@ struct bch_option { }; struct { int (*parse)(struct bch_fs *, const char *, u64 *); - int (*print)(struct bch_fs *, char *, size_t, u64); + void (*to_text)(struct printbuf *, struct bch_fs *, u64); }; }; @@ -265,8 +266,8 @@ int bch2_opt_parse(struct bch_fs *, const struct bch_option *, const char *, u64 #define OPT_SHOW_FULL_LIST (1 << 0) #define OPT_SHOW_MOUNT_STYLE (1 << 1) -int bch2_opt_to_text(struct bch_fs *, char *, size_t, - const struct bch_option *, u64, unsigned); +void bch2_opt_to_text(struct printbuf *, struct bch_fs *, + const struct bch_option *, u64, unsigned); int bch2_parse_mount_opts(struct bch_opts *, char *); diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 79a7f82868d6..8127f4454dac 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -46,10 +46,9 @@ static const char * const bch2_quota_counters[] = { "inodes", }; -int bch2_quota_to_text(struct bch_fs *c, char *buf, - size_t size, struct bkey_s_c k) +void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) { - char *out = buf, *end = buf + size; struct bkey_s_c_quota dq; unsigned i; @@ -58,14 +57,12 @@ int bch2_quota_to_text(struct bch_fs *c, char *buf, dq = bkey_s_c_to_quota(k); for (i = 0; i < Q_COUNTERS; i++) - out += scnprintf(out, end - out, "%s hardlimit %llu softlimit %llu", - bch2_quota_counters[i], - le64_to_cpu(dq.v->c[i].hardlimit), - le64_to_cpu(dq.v->c[i].softlimit)); + pr_buf(out, "%s hardlimit %llu softlimit %llu", + bch2_quota_counters[i], + le64_to_cpu(dq.v->c[i].hardlimit), + le64_to_cpu(dq.v->c[i].softlimit)); break; } - - return out - buf; } #ifdef CONFIG_BCACHEFS_QUOTA diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h index 9650e518cd64..9c06eb07bccb 100644 --- a/fs/bcachefs/quota.h +++ b/fs/bcachefs/quota.h @@ -8,7 +8,7 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_quota; const char *bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c); -int bch2_quota_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c); +void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_quota_ops (struct bkey_ops) { \ .key_invalid = bch2_quota_invalid, \ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 570dbae5a240..5d246c5b8186 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -252,49 +252,43 @@ static int bch2_rebalance_thread(void *arg) ssize_t bch2_rebalance_work_show(struct bch_fs *c, char *buf) { - char *out = buf, *end = out + PAGE_SIZE; + struct printbuf out = _PBUF(buf, PAGE_SIZE); struct bch_fs_rebalance *r = &c->rebalance; struct rebalance_work w = rebalance_work(c); char h1[21], h2[21]; bch2_hprint(h1, w.dev_most_full_work << 9); bch2_hprint(h2, w.dev_most_full_capacity << 9); - out += scnprintf(out, end - out, - "fullest_dev (%i):\t%s/%s\n", - w.dev_most_full_idx, h1, h2); + pr_buf(&out, "fullest_dev (%i):\t%s/%s\n", + w.dev_most_full_idx, h1, h2); bch2_hprint(h1, w.total_work << 9); bch2_hprint(h2, c->capacity << 9); - out += scnprintf(out, end - out, - "total work:\t\t%s/%s\n", - h1, h2); + pr_buf(&out, "total work:\t\t%s/%s\n", h1, h2); - out += scnprintf(out, end - out, - "rate:\t\t\t%u\n", - r->pd.rate.rate); + pr_buf(&out, "rate:\t\t\t%u\n", r->pd.rate.rate); switch (r->state) { case REBALANCE_WAITING: - out += scnprintf(out, end - out, "waiting\n"); + pr_buf(&out, "waiting\n"); break; case REBALANCE_THROTTLED: bch2_hprint(h1, (r->throttled_until_iotime - atomic_long_read(&c->io_clock[WRITE].now)) << 9); - out += scnprintf(out, end - out, - "throttled for %lu sec or %s io\n", - (r->throttled_until_cputime - jiffies) / HZ, - h1); + pr_buf(&out, "throttled for %lu sec or %s io\n", + (r->throttled_until_cputime - jiffies) / HZ, + h1); break; case REBALANCE_RUNNING: - out += scnprintf(out, end - out, "running\n"); - out += scnprintf(out, end - out, "pos %llu:%llu\n", - r->move_stats.iter.pos.inode, - r->move_stats.iter.pos.offset); + pr_buf(&out, "running\n"); + pr_buf(&out, "pos %llu:%llu\n", + r->move_stats.iter.pos.inode, + r->move_stats.iter.pos.offset); break; } - return out - buf; + return out.pos - buf; } void bch2_rebalance_stop(struct bch_fs *c) diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index a7c3aca1bf01..fb11b97cdeee 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -40,38 +40,31 @@ static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL); } -static int replicas_entry_to_text(struct bch_replicas_entry *e, - char *buf, size_t size) +static void replicas_entry_to_text(struct printbuf *out, + struct bch_replicas_entry *e) { - char *out = buf, *end = out + size; unsigned i; - out += scnprintf(out, end - out, "%u: [", e->data_type); + pr_buf(out, "%u: [", e->data_type); for (i = 0; i < e->nr_devs; i++) - out += scnprintf(out, end - out, - i ? " %u" : "%u", e->devs[i]); - out += scnprintf(out, end - out, "]"); - - return out - buf; + pr_buf(out, i ? " %u" : "%u", e->devs[i]); + pr_buf(out, "]"); } -int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *r, - char *buf, size_t size) +void bch2_cpu_replicas_to_text(struct printbuf *out, + struct bch_replicas_cpu *r) { - char *out = buf, *end = out + size; struct bch_replicas_entry *e; bool first = true; for_each_cpu_replicas_entry(r, e) { if (!first) - out += scnprintf(out, end - out, " "); + pr_buf(out, " "); first = false; - out += replicas_entry_to_text(e, out, end - out); + replicas_entry_to_text(out, e); } - - return out - buf; } static void extent_to_replicas(struct bkey_s_c k, @@ -510,32 +503,28 @@ err: return err; } -const struct bch_sb_field_ops bch_sb_field_ops_replicas = { - .validate = bch2_sb_validate_replicas, -}; - -int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *r, char *buf, size_t size) +static void bch2_sb_replicas_to_text(struct printbuf *out, + struct bch_sb *sb, + struct bch_sb_field *f) { - char *out = buf, *end = out + size; + struct bch_sb_field_replicas *r = field_to_type(f, replicas); struct bch_replicas_entry *e; bool first = true; - if (!r) { - out += scnprintf(out, end - out, "(no replicas section found)"); - return out - buf; - } - for_each_replicas_entry(r, e) { if (!first) - out += scnprintf(out, end - out, " "); + pr_buf(out, " "); first = false; - out += replicas_entry_to_text(e, out, end - out); + replicas_entry_to_text(out, e); } - - return out - buf; } +const struct bch_sb_field_ops bch_sb_field_ops_replicas = { + .validate = bch2_sb_validate_replicas, + .to_text = bch2_sb_replicas_to_text, +}; + /* Query replicas: */ bool bch2_replicas_marked(struct bch_fs *c, diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h index ebbb1334cc2c..d3d81a1a39cd 100644 --- a/fs/bcachefs/replicas.h +++ b/fs/bcachefs/replicas.h @@ -13,8 +13,7 @@ int bch2_mark_replicas(struct bch_fs *, enum bch_data_type, int bch2_mark_bkey_replicas(struct bch_fs *, enum bkey_type, struct bkey_s_c); -int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *, char *, size_t); -int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *, char *, size_t); +void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *); struct replicas_status { struct { diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 58c35d9665eb..0c2b20c9e8c4 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -951,21 +951,20 @@ static const char *bch2_sb_field_validate(struct bch_sb *sb, : NULL; } -size_t bch2_sb_field_to_text(char *buf, size_t size, - struct bch_sb *sb, struct bch_sb_field *f) +void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, + struct bch_sb_field *f) { unsigned type = le32_to_cpu(f->type); - size_t (*to_text)(char *, size_t, struct bch_sb *, - struct bch_sb_field *) = - type < BCH_SB_FIELD_NR - ? bch2_sb_field_ops[type]->to_text - : NULL; + const struct bch_sb_field_ops *ops = type < BCH_SB_FIELD_NR + ? bch2_sb_field_ops[type] : NULL; - if (!to_text) { - if (size) - buf[0] = '\0'; - return 0; - } + if (ops) + pr_buf(out, "%s", bch2_sb_fields[type]); + else + pr_buf(out, "(unknown field %u)", type); + + pr_buf(out, " (size %llu):", vstruct_bytes(f)); - return to_text(buf, size, sb, f); + if (ops && ops->to_text) + bch2_sb_field_ops[type]->to_text(out, sb, f); } diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index 1ea91f71f3b0..ceef650d55dd 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -38,7 +38,7 @@ extern const char * const bch2_sb_fields[]; struct bch_sb_field_ops { const char * (*validate)(struct bch_sb *, struct bch_sb_field *); - size_t (*to_text)(char *, size_t, struct bch_sb *, + void (*to_text)(struct printbuf *, struct bch_sb *, struct bch_sb_field *); }; @@ -136,7 +136,7 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) void bch2_fs_mark_clean(struct bch_fs *, bool); -size_t bch2_sb_field_to_text(char *, size_t, struct bch_sb *, - struct bch_sb_field *); +void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, + struct bch_sb_field *); #endif /* _BCACHEFS_SUPER_IO_H */ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 54d23cf46f95..a22beff7cc96 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1236,10 +1236,9 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) data = bch2_dev_has_data(c, ca); if (data) { char data_has_str[100]; - bch2_scnprint_flag_list(data_has_str, - sizeof(data_has_str), - bch2_data_types, - data); + + bch2_string_opt_to_text(&PBUF(data_has_str), + bch2_data_types, data); bch_err(ca, "Remove failed, still has data (%s)", data_has_str); ret = -EBUSY; goto err; diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index ee91bcc6433c..4ca84de6ab0e 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -230,42 +230,34 @@ static size_t bch2_btree_cache_size(struct bch_fs *c) static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) { - char *out = buf, *end = buf + PAGE_SIZE; + struct printbuf out = _PBUF(buf, PAGE_SIZE); struct bch_fs_usage stats = bch2_fs_usage_read(c); unsigned replicas, type; - out += scnprintf(out, end - out, - "capacity:\t\t%llu\n", - c->capacity); + pr_buf(&out, "capacity:\t\t%llu\n", c->capacity); for (replicas = 0; replicas < ARRAY_SIZE(stats.replicas); replicas++) { - out += scnprintf(out, end - out, - "%u replicas:\n", - replicas + 1); + pr_buf(&out, "%u replicas:\n", replicas + 1); for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++) - out += scnprintf(out, end - out, - "\t%s:\t\t%llu\n", - bch2_data_types[type], - stats.replicas[replicas].data[type]); - out += scnprintf(out, end - out, - "\treserved:\t%llu\n", - stats.replicas[replicas].persistent_reserved); + pr_buf(&out, "\t%s:\t\t%llu\n", + bch2_data_types[type], + stats.replicas[replicas].data[type]); + pr_buf(&out, "\treserved:\t%llu\n", + stats.replicas[replicas].persistent_reserved); } - out += scnprintf(out, end - out, "bucket usage\n"); + pr_buf(&out, "bucket usage\n"); for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++) - out += scnprintf(out, end - out, - "\t%s:\t\t%llu\n", - bch2_data_types[type], - stats.buckets[type]); + pr_buf(&out, "\t%s:\t\t%llu\n", + bch2_data_types[type], + stats.buckets[type]); - out += scnprintf(out, end - out, - "online reserved:\t%llu\n", - stats.online_reserved); + pr_buf(&out, "online reserved:\t%llu\n", + stats.online_reserved); - return out - buf; + return out.pos - buf; } static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) @@ -559,16 +551,16 @@ struct attribute *bch2_fs_internal_files[] = { SHOW(bch2_fs_opts_dir) { - char *out = buf, *end = buf + PAGE_SIZE; + struct printbuf out = _PBUF(buf, PAGE_SIZE); struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir); const struct bch_option *opt = container_of(attr, struct bch_option, attr); int id = opt - bch2_opt_table; u64 v = bch2_opt_get_by_id(&c->opts, id); - out += bch2_opt_to_text(c, out, end - out, opt, v, OPT_SHOW_FULL_LIST); - out += scnprintf(out, end - out, "\n"); + bch2_opt_to_text(&out, c, opt, v, OPT_SHOW_FULL_LIST); + pr_buf(&out, "\n"); - return out - buf; + return out.pos - buf; } STORE(bch2_fs_opts_dir) @@ -742,25 +734,23 @@ static ssize_t show_quantiles(struct bch_fs *c, struct bch_dev *ca, static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf) { + struct printbuf out = _PBUF(buf, PAGE_SIZE); enum alloc_reserve i; - ssize_t ret; spin_lock(&ca->freelist_lock); - ret = scnprintf(buf, PAGE_SIZE, - "free_inc:\t%zu\t%zu\n", - fifo_used(&ca->free_inc), - ca->free_inc.size); + pr_buf(&out, "free_inc:\t%zu\t%zu\n", + fifo_used(&ca->free_inc), + ca->free_inc.size); for (i = 0; i < RESERVE_NR; i++) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "free[%u]:\t%zu\t%zu\n", i, - fifo_used(&ca->free[i]), - ca->free[i].size); + pr_buf(&out, "free[%u]:\t%zu\t%zu\n", i, + fifo_used(&ca->free[i]), + ca->free[i].size); spin_unlock(&ca->freelist_lock); - return ret; + return out.pos - buf; } static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf) @@ -825,11 +815,11 @@ static const char * const bch2_rw[] = { static ssize_t show_dev_iodone(struct bch_dev *ca, char *buf) { - char *out = buf, *end = buf + PAGE_SIZE; + struct printbuf out = _PBUF(buf, PAGE_SIZE); int rw, i, cpu; for (rw = 0; rw < 2; rw++) { - out += scnprintf(out, end - out, "%s:\n", bch2_rw[rw]); + pr_buf(&out, "%s:\n", bch2_rw[rw]); for (i = 1; i < BCH_DATA_NR; i++) { u64 n = 0; @@ -837,19 +827,19 @@ static ssize_t show_dev_iodone(struct bch_dev *ca, char *buf) for_each_possible_cpu(cpu) n += per_cpu_ptr(ca->io_done, cpu)->sectors[rw][i]; - out += scnprintf(out, end - out, "%-12s:%12llu\n", - bch2_data_types[i], n << 9); + pr_buf(&out, "%-12s:%12llu\n", + bch2_data_types[i], n << 9); } } - return out - buf; + return out.pos - buf; } SHOW(bch2_dev) { struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); struct bch_fs *c = ca->fs; - char *out = buf, *end = buf + PAGE_SIZE; + struct printbuf out = _PBUF(buf, PAGE_SIZE); sysfs_printf(uuid, "%pU\n", ca->uuid.b); @@ -863,41 +853,39 @@ SHOW(bch2_dev) if (attr == &sysfs_label) { if (ca->mi.group) { mutex_lock(&c->sb_lock); - out += bch2_disk_path_print(&c->disk_sb, out, end - out, - ca->mi.group - 1); + bch2_disk_path_to_text(&out, &c->disk_sb, + ca->mi.group - 1); mutex_unlock(&c->sb_lock); } else { - out += scnprintf(out, end - out, "none"); + pr_buf(&out, "none"); } - out += scnprintf(out, end - out, "\n"); - return out - buf; + pr_buf(&out, "\n"); + return out.pos - buf; } if (attr == &sysfs_has_data) { - out += bch2_scnprint_flag_list(out, end - out, - bch2_data_types, - bch2_dev_has_data(c, ca)); - out += scnprintf(out, end - out, "\n"); - return out - buf; + bch2_flags_to_text(&out, bch2_data_types, + bch2_dev_has_data(c, ca)); + pr_buf(&out, "\n"); + return out.pos - buf; } sysfs_pd_controller_show(copy_gc, &ca->copygc_pd); if (attr == &sysfs_cache_replacement_policy) { - out += bch2_scnprint_string_list(out, end - out, - bch2_cache_replacement_policies, - ca->mi.replacement); - out += scnprintf(out, end - out, "\n"); - return out - buf; + bch2_string_opt_to_text(&out, + bch2_cache_replacement_policies, + ca->mi.replacement); + pr_buf(&out, "\n"); + return out.pos - buf; } if (attr == &sysfs_state_rw) { - out += bch2_scnprint_string_list(out, end - out, - bch2_dev_state, - ca->mi.state); - out += scnprintf(out, end - out, "\n"); - return out - buf; + bch2_string_opt_to_text(&out, bch2_dev_state, + ca->mi.state); + pr_buf(&out, "\n"); + return out.pos - buf; } if (attr == &sysfs_iodone) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index ed90bd3a5d18..bb6b4383d33f 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -124,47 +124,31 @@ ssize_t bch2_hprint(char *buf, s64 v) return sprintf(buf, "%lli%s%c", v, dec, si_units[u]); } -ssize_t bch2_scnprint_string_list(char *buf, size_t size, - const char * const list[], - size_t selected) +void bch2_string_opt_to_text(struct printbuf *out, + const char * const list[], + size_t selected) { - char *out = buf; size_t i; - if (size) - *out = '\0'; - for (i = 0; list[i]; i++) - out += scnprintf(out, buf + size - out, - i == selected ? "[%s] " : "%s ", list[i]); - - if (out != buf) - *--out = '\0'; - - return out - buf; + pr_buf(out, i == selected ? "[%s] " : "%s ", list[i]); } -ssize_t bch2_scnprint_flag_list(char *buf, size_t size, - const char * const list[], u64 flags) +void bch2_flags_to_text(struct printbuf *out, + const char * const list[], u64 flags) { - char *out = buf, *end = buf + size; unsigned bit, nr = 0; + if (out->pos != out->end) + *out->pos = '\0'; + while (list[nr]) nr++; - if (size) - *out = '\0'; - while (flags && (bit = __ffs(flags)) < nr) { - out += scnprintf(out, end - out, "%s,", list[bit]); + pr_buf(out, "%s,", list[bit]); flags ^= 1 << bit; } - - if (out != buf) - *--out = '\0'; - - return out - buf; } u64 bch2_read_flag_list(char *opt, const char * const list[]) @@ -329,50 +313,50 @@ static const struct time_unit *pick_time_units(u64 ns) return u; } -static size_t pr_time_units(char *buf, size_t len, u64 ns) +static void pr_time_units(struct printbuf *out, u64 ns) { const struct time_unit *u = pick_time_units(ns); - return scnprintf(buf, len, "%llu %s", div_u64(ns, u->nsecs), u->name); + pr_buf(out, "%llu %s", div_u64(ns, u->nsecs), u->name); } size_t bch2_time_stats_print(struct bch2_time_stats *stats, char *buf, size_t len) { - char *out = buf, *end = buf + len; + struct printbuf out = _PBUF(buf, len); const struct time_unit *u; u64 freq = READ_ONCE(stats->average_frequency); u64 q, last_q = 0; int i; - out += scnprintf(out, end - out, "count:\t\t%llu\n", + pr_buf(&out, "count:\t\t%llu\n", stats->count); - out += scnprintf(out, end - out, "rate:\t\t%llu/sec\n", - freq ? div64_u64(NSEC_PER_SEC, freq) : 0); + pr_buf(&out, "rate:\t\t%llu/sec\n", + freq ? div64_u64(NSEC_PER_SEC, freq) : 0); - out += scnprintf(out, end - out, "frequency:\t"); - out += pr_time_units(out, end - out, freq); + pr_buf(&out, "frequency:\t"); + pr_time_units(&out, freq); - out += scnprintf(out, end - out, "\navg duration:\t"); - out += pr_time_units(out, end - out, stats->average_duration); + pr_buf(&out, "\navg duration:\t"); + pr_time_units(&out, stats->average_duration); - out += scnprintf(out, end - out, "\nmax duration:\t"); - out += pr_time_units(out, end - out, stats->max_duration); + pr_buf(&out, "\nmax duration:\t"); + pr_time_units(&out, stats->max_duration); i = eytzinger0_first(NR_QUANTILES); u = pick_time_units(stats->quantiles.entries[i].m); - out += scnprintf(out, end - out, "\nquantiles (%s):\t", u->name); + pr_buf(&out, "\nquantiles (%s):\t", u->name); eytzinger0_for_each(i, NR_QUANTILES) { bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1; q = max(stats->quantiles.entries[i].m, last_q); - out += scnprintf(out, end - out, "%llu%s", - div_u64(q, u->nsecs), - is_last ? "\n" : " "); + pr_buf(&out, "%llu%s", + div_u64(q, u->nsecs), + is_last ? "\n" : " "); last_q = q; } - return out - buf; + return out.pos - buf; } void bch2_time_stats_exit(struct bch2_time_stats *stats) @@ -615,18 +599,17 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) } } -size_t bch_scnmemcpy(char *buf, size_t size, const char *src, size_t len) +void bch_scnmemcpy(struct printbuf *out, + const char *src, size_t len) { - size_t n; - - if (!size) - return 0; + size_t n = printbuf_remaining(out); - n = min(size - 1, len); - memcpy(buf, src, n); - buf[n] = '\0'; - - return n; + if (n) { + n = min(n - 1, len); + memcpy(out->pos, src, n); + out->pos += n; + *out->pos = '\0'; + } } #include "eytzinger.h" diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index cb6bed68abf8..47afd3955c7a 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -235,6 +235,32 @@ do { \ #define ANYSINT_MAX(t) \ ((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1) +struct printbuf { + char *pos; + char *end; +}; + +static inline size_t printbuf_remaining(struct printbuf *buf) +{ + return buf->end - buf->pos; +} + +#define _PBUF(_buf, _len) \ + ((struct printbuf) { \ + .pos = _buf, \ + .end = _buf + _len, \ + }) + +#define PBUF(_buf) _PBUF(_buf, sizeof(_buf)) + +#define pr_buf(_out, ...) \ +do { \ + (_out)->pos += scnprintf((_out)->pos, printbuf_remaining(_out), \ + __VA_ARGS__); \ +} while (0) + +void bch_scnmemcpy(struct printbuf *, const char *, size_t); + int bch2_strtoint_h(const char *, int *); int bch2_strtouint_h(const char *, unsigned int *); int bch2_strtoll_h(const char *, long long *); @@ -311,9 +337,10 @@ ssize_t bch2_hprint(char *buf, s64 v); bool bch2_is_zero(const void *, size_t); -ssize_t bch2_scnprint_string_list(char *, size_t, const char * const[], size_t); +void bch2_string_opt_to_text(struct printbuf *, + const char * const [], size_t); -ssize_t bch2_scnprint_flag_list(char *, size_t, const char * const[], u64); +void bch2_flags_to_text(struct printbuf *, const char * const[], u64); u64 bch2_read_flag_list(char *, const char * const[]); #define NR_QUANTILES 15 @@ -629,8 +656,6 @@ static inline struct bio_vec next_contig_bvec(struct bio *bio, #define bio_for_each_contig_segment(bv, bio, iter) \ __bio_for_each_contig_segment(bv, bio, iter, (bio)->bi_iter) -size_t bch_scnmemcpy(char *, size_t, const char *, size_t); - void sort_cmp_size(void *base, size_t num, size_t size, int (*cmp_func)(const void *, const void *, size_t), void (*swap_func)(void *, void *, size_t)); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 44bf4a2f3c84..7f6258e09a0d 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -111,10 +111,9 @@ const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k) } } -int bch2_xattr_to_text(struct bch_fs *c, char *buf, - size_t size, struct bkey_s_c k) +void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) { - char *out = buf, *end = buf + size; const struct xattr_handler *handler; struct bkey_s_c_xattr xattr; @@ -124,26 +123,22 @@ int bch2_xattr_to_text(struct bch_fs *c, char *buf, handler = bch2_xattr_type_to_handler(xattr.v->x_type); if (handler && handler->prefix) - out += scnprintf(out, end - out, "%s", handler->prefix); + pr_buf(out, "%s", handler->prefix); else if (handler) - out += scnprintf(out, end - out, "(type %u)", - xattr.v->x_type); + pr_buf(out, "(type %u)", xattr.v->x_type); else - out += scnprintf(out, end - out, "(unknown type %u)", - xattr.v->x_type); - - out += bch_scnmemcpy(out, end - out, xattr.v->x_name, - xattr.v->x_name_len); - out += scnprintf(out, end - out, ":"); - out += bch_scnmemcpy(out, end - out, xattr_val(xattr.v), - le16_to_cpu(xattr.v->x_val_len)); + pr_buf(out, "(unknown type %u)", xattr.v->x_type); + + bch_scnmemcpy(out, xattr.v->x_name, + xattr.v->x_name_len); + pr_buf(out, ":"); + bch_scnmemcpy(out, xattr_val(xattr.v), + le16_to_cpu(xattr.v->x_val_len)); break; case BCH_XATTR_WHITEOUT: - out += scnprintf(out, end - out, "whiteout"); + pr_buf(out, "whiteout"); break; } - - return out - buf; } int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, @@ -355,7 +350,7 @@ static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler, struct bch_opts opts = bch2_inode_opts_to_opts(bch2_inode_opts_get(&inode->ei_inode)); const struct bch_option *opt; - int ret, id; + int id; u64 v; id = bch2_opt_lookup(name); @@ -369,9 +364,22 @@ static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler, v = bch2_opt_get_by_id(&opts, id); - ret = bch2_opt_to_text(c, buffer, size, opt, v, 0); + if (!buffer) { + char buf[512]; + struct printbuf out = PBUF(buf); - return ret < size || !buffer ? ret : -ERANGE; + bch2_opt_to_text(&out, c, opt, v, 0); + + return out.pos - buf; + } else { + struct printbuf out = _PBUF(buffer, size); + + bch2_opt_to_text(&out, c, opt, v, 0); + + return printbuf_remaining(&out) + ? (void *) out.pos - buffer + : -ERANGE; + } } struct inode_opt_set { diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index b2fe1dc42b83..63be44b02a2b 100644 --- a/fs/bcachefs/xattr.h +++ b/fs/bcachefs/xattr.h @@ -7,7 +7,7 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc; const char *bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c); -int bch2_xattr_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c); +void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_xattr_ops (struct bkey_ops) { \ .key_invalid = bch2_xattr_invalid, \ -- cgit v1.2.3 From 26609b619fa2301eb7eb5855a7005d99f8a07a73 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 1 Nov 2018 15:10:01 -0400 Subject: bcachefs: Make bkey types globally unique this lets us get rid of a lot of extra switch statements - in a lot of places we dispatch on the btree node type, and then the key type, so this is a nice cleanup across a lot of code. Also improve the on disk format versioning stuff. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 6 +- fs/bcachefs/alloc_background.c | 28 +- fs/bcachefs/alloc_background.h | 2 +- fs/bcachefs/alloc_foreground.c | 11 +- fs/bcachefs/alloc_foreground.h | 2 +- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/bcachefs_format.h | 184 +++---- fs/bcachefs/bkey.c | 2 +- fs/bcachefs/bkey.h | 88 ++- fs/bcachefs/bkey_methods.c | 218 +++++--- fs/bcachefs/bkey_methods.h | 65 +-- fs/bcachefs/bkey_sort.c | 21 +- fs/bcachefs/bkey_sort.h | 9 +- fs/bcachefs/bset.h | 6 +- fs/bcachefs/btree_cache.c | 18 +- fs/bcachefs/btree_cache.h | 6 +- fs/bcachefs/btree_gc.c | 197 ++----- fs/bcachefs/btree_gc.h | 8 +- fs/bcachefs/btree_io.c | 97 ++-- fs/bcachefs/btree_iter.c | 4 +- fs/bcachefs/btree_types.h | 35 +- fs/bcachefs/btree_update.h | 2 +- fs/bcachefs/btree_update_interior.c | 56 +- fs/bcachefs/btree_update_leaf.c | 10 +- fs/bcachefs/buckets.c | 222 ++++---- fs/bcachefs/buckets.h | 4 +- fs/bcachefs/debug.c | 6 +- fs/bcachefs/dirent.c | 72 +-- fs/bcachefs/dirent.h | 2 +- fs/bcachefs/ec.c | 60 +- fs/bcachefs/ec.h | 10 +- fs/bcachefs/extents.c | 1031 ++++++++++++++++------------------- fs/bcachefs/extents.h | 529 ++++++++++-------- fs/bcachefs/fs-io.c | 16 +- fs/bcachefs/fs.c | 12 +- fs/bcachefs/fsck.c | 28 +- fs/bcachefs/inode.c | 108 ++-- fs/bcachefs/inode.h | 12 +- fs/bcachefs/io.c | 18 +- fs/bcachefs/journal_io.c | 58 +- fs/bcachefs/migrate.c | 30 +- fs/bcachefs/move.c | 58 +- fs/bcachefs/move.h | 2 +- fs/bcachefs/movinggc.c | 34 +- fs/bcachefs/opts.h | 3 + fs/bcachefs/quota.c | 56 +- fs/bcachefs/quota.h | 8 +- fs/bcachefs/rebalance.c | 36 +- fs/bcachefs/recovery.c | 23 +- fs/bcachefs/replicas.c | 96 ++-- fs/bcachefs/replicas.h | 5 +- fs/bcachefs/str_hash.h | 9 +- fs/bcachefs/super-io.c | 51 +- fs/bcachefs/super-io.h | 2 + fs/bcachefs/super.c | 2 +- fs/bcachefs/sysfs.c | 2 +- fs/bcachefs/trace.h | 2 +- fs/bcachefs/xattr.c | 102 ++-- fs/bcachefs/xattr.h | 2 +- 59 files changed, 1777 insertions(+), 2010 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index eb6fa4d7c1f6..bcfc9fdce35e 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -24,9 +24,9 @@ static inline int acl_to_xattr_type(int type) { switch (type) { case ACL_TYPE_ACCESS: - return BCH_XATTR_INDEX_POSIX_ACL_ACCESS; + return KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS; case ACL_TYPE_DEFAULT: - return BCH_XATTR_INDEX_POSIX_ACL_DEFAULT; + return KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT; default: BUG(); } @@ -355,7 +355,7 @@ int bch2_acl_chmod(struct btree_trans *trans, iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, &inode->ei_str_hash, inode->v.i_ino, - &X_SEARCH(BCH_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), + &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), BTREE_ITER_INTENT); if (IS_ERR(iter)) return PTR_ERR(iter) != -ENOENT ? PTR_ERR(iter) : 0; diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 390b008b0200..885aff511f97 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -76,22 +76,15 @@ static unsigned bch_alloc_val_u64s(const struct bch_alloc *a) const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k) { + struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); + if (k.k->p.inode >= c->sb.nr_devices || !c->devs[k.k->p.inode]) return "invalid device"; - switch (k.k->type) { - case BCH_ALLOC: { - struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); - - /* allow for unknown fields */ - if (bkey_val_u64s(a.k) < bch_alloc_val_u64s(a.v)) - return "incorrect value size"; - break; - } - default: - return "invalid type"; - } + /* allow for unknown fields */ + if (bkey_val_u64s(a.k) < bch_alloc_val_u64s(a.v)) + return "incorrect value size"; return NULL; } @@ -99,14 +92,9 @@ const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k) void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - switch (k.k->type) { - case BCH_ALLOC: { - struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); + struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); - pr_buf(out, "gen %u", a.v->gen); - break; - } - } + pr_buf(out, "gen %u", a.v->gen); } static inline unsigned get_alloc_field(const u8 **p, unsigned bytes) @@ -158,7 +146,7 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k) struct bucket *g; const u8 *d; - if (k.k->type != BCH_ALLOC) + if (k.k->type != KEY_TYPE_alloc) return; a = bkey_s_c_to_alloc(k); diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 59b6a5f2f890..8ced4e845281 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -11,7 +11,7 @@ const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_alloc_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_alloc (struct bkey_ops) { \ .key_invalid = bch2_alloc_invalid, \ .val_to_text = bch2_alloc_to_text, \ } diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 6e5f6e57da56..ddcf2c407764 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -923,7 +923,8 @@ err: * as allocated out of @ob */ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp, - struct bkey_i_extent *e, unsigned sectors) + struct bkey_i *k, unsigned sectors) + { struct open_bucket *ob; unsigned i; @@ -935,13 +936,11 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp, struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev); struct bch_extent_ptr tmp = ob->ptr; - EBUG_ON(bch2_extent_has_device(extent_i_to_s_c(e), ob->ptr.dev)); - - tmp.cached = bkey_extent_is_cached(&e->k) || - (!ca->mi.durability && wp->type == BCH_DATA_USER); + tmp.cached = !ca->mi.durability && + wp->type == BCH_DATA_USER; tmp.offset += ca->mi.bucket_size - ob->sectors_free; - extent_ptr_append(e, tmp); + bch2_bkey_append_ptr(k, tmp); BUG_ON(sectors > ob->sectors_free); ob->sectors_free -= sectors; diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index c71cf7381729..94389052fa94 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -101,7 +101,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *, struct closure *); void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *, - struct bkey_i_extent *, unsigned); + struct bkey_i *, unsigned); void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *); void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *, diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 258a67d4437b..cd2fff851bbe 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -541,6 +541,7 @@ struct bch_fs { __uuid_t uuid; __uuid_t user_uuid; + u16 version; u16 encoded_extent_max; u8 nr_devices; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index a00e77fa1d37..801156b74335 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -307,15 +307,6 @@ static inline void bkey_init(struct bkey *k) #define __BKEY_PADDED(key, pad) \ struct { struct bkey_i key; __u64 key ## _pad[pad]; } -#define BKEY_VAL_TYPE(name, nr) \ -struct bkey_i_##name { \ - union { \ - struct bkey k; \ - struct bkey_i k_i; \ - }; \ - struct bch_##name v; \ -} - /* * - DELETED keys are used internally to mark keys that should be ignored but * override keys in composition order. Their version number is ignored. @@ -330,19 +321,37 @@ struct bkey_i_##name { \ * by new writes or cluster-wide GC. Node repair can also overwrite them with * the same or a more recent version number, but not with an older version * number. + * + * - WHITEOUT: for hash table btrees */ -#define KEY_TYPE_DELETED 0 -#define KEY_TYPE_DISCARD 1 -#define KEY_TYPE_ERROR 2 -#define KEY_TYPE_COOKIE 3 -#define KEY_TYPE_PERSISTENT_DISCARD 4 -#define KEY_TYPE_GENERIC_NR 128 +#define BCH_BKEY_TYPES() \ + x(deleted, 0) \ + x(discard, 1) \ + x(error, 2) \ + x(cookie, 3) \ + x(whiteout, 4) \ + x(btree_ptr, 5) \ + x(extent, 6) \ + x(reservation, 7) \ + x(inode, 8) \ + x(inode_generation, 9) \ + x(dirent, 10) \ + x(xattr, 11) \ + x(alloc, 12) \ + x(quota, 13) \ + x(stripe, 14) + +enum bch_bkey_type { +#define x(name, nr) KEY_TYPE_##name = nr, + BCH_BKEY_TYPES() +#undef x + KEY_TYPE_MAX, +}; struct bch_cookie { struct bch_val v; __le64 cookie; }; -BKEY_VAL_TYPE(cookie, KEY_TYPE_COOKIE); /* Extents */ @@ -620,21 +629,12 @@ union bch_extent_entry { #undef x }; -enum { - BCH_EXTENT = 128, - - /* - * This is kind of a hack, we're overloading the type for a boolean that - * really should be part of the value - BCH_EXTENT and BCH_EXTENT_CACHED - * have the same value type: - */ - BCH_EXTENT_CACHED = 129, +struct bch_btree_ptr { + struct bch_val v; - /* - * Persistent reservation: - */ - BCH_RESERVATION = 130, -}; + __u64 _data[0]; + struct bch_extent_ptr start[]; +} __attribute__((packed, aligned(8))); struct bch_extent { struct bch_val v; @@ -642,7 +642,6 @@ struct bch_extent { __u64 _data[0]; union bch_extent_entry start[]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(extent, BCH_EXTENT); struct bch_reservation { struct bch_val v; @@ -651,7 +650,6 @@ struct bch_reservation { __u8 nr_replicas; __u8 pad[3]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(reservation, BCH_RESERVATION); /* Maximum size (in u64s) a single pointer could be: */ #define BKEY_EXTENT_PTR_U64s_MAX\ @@ -679,12 +677,6 @@ BKEY_VAL_TYPE(reservation, BCH_RESERVATION); #define BCACHEFS_ROOT_INO 4096 -enum bch_inode_types { - BCH_INODE_FS = 128, - BCH_INODE_BLOCKDEV = 129, - BCH_INODE_GENERATION = 130, -}; - struct bch_inode { struct bch_val v; @@ -693,7 +685,6 @@ struct bch_inode { __le16 bi_mode; __u8 fields[0]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(inode, BCH_INODE_FS); struct bch_inode_generation { struct bch_val v; @@ -701,7 +692,6 @@ struct bch_inode_generation { __le32 bi_generation; __le32 pad; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(inode_generation, BCH_INODE_GENERATION); #define BCH_INODE_FIELDS() \ BCH_INODE_FIELD(bi_atime, 64) \ @@ -766,24 +756,6 @@ enum { LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 32); -struct bch_inode_blockdev { - struct bch_val v; - - __le64 i_size; - __le64 i_flags; - - /* Seconds: */ - __le64 i_ctime; - __le64 i_mtime; - - __uuid_t i_uuid; - __u8 i_label[32]; -} __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(inode_blockdev, BCH_INODE_BLOCKDEV); - -/* Thin provisioned volume, or cache for another block device? */ -LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1) - /* Dirents */ /* @@ -797,11 +769,6 @@ LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1) * collision: */ -enum { - BCH_DIRENT = 128, - BCH_DIRENT_WHITEOUT = 129, -}; - struct bch_dirent { struct bch_val v; @@ -816,7 +783,6 @@ struct bch_dirent { __u8 d_name[]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(dirent, BCH_DIRENT); #define BCH_NAME_MAX (U8_MAX * sizeof(u64) - \ sizeof(struct bkey) - \ @@ -825,16 +791,11 @@ BKEY_VAL_TYPE(dirent, BCH_DIRENT); /* Xattrs */ -enum { - BCH_XATTR = 128, - BCH_XATTR_WHITEOUT = 129, -}; - -#define BCH_XATTR_INDEX_USER 0 -#define BCH_XATTR_INDEX_POSIX_ACL_ACCESS 1 -#define BCH_XATTR_INDEX_POSIX_ACL_DEFAULT 2 -#define BCH_XATTR_INDEX_TRUSTED 3 -#define BCH_XATTR_INDEX_SECURITY 4 +#define KEY_TYPE_XATTR_INDEX_USER 0 +#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS 1 +#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2 +#define KEY_TYPE_XATTR_INDEX_TRUSTED 3 +#define KEY_TYPE_XATTR_INDEX_SECURITY 4 struct bch_xattr { struct bch_val v; @@ -843,14 +804,9 @@ struct bch_xattr { __le16 x_val_len; __u8 x_name[]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(xattr, BCH_XATTR); /* Bucket/allocation information: */ -enum { - BCH_ALLOC = 128, -}; - enum { BCH_ALLOC_FIELD_READ_TIME = 0, BCH_ALLOC_FIELD_WRITE_TIME = 1, @@ -862,14 +818,9 @@ struct bch_alloc { __u8 gen; __u8 data[]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(alloc, BCH_ALLOC); /* Quotas: */ -enum { - BCH_QUOTA = 128, -}; - enum quota_types { QTYP_USR = 0, QTYP_GRP = 1, @@ -892,14 +843,9 @@ struct bch_quota { struct bch_val v; struct bch_quota_counter c[Q_COUNTERS]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(quota, BCH_QUOTA); /* Erasure coding */ -enum { - BCH_STRIPE = 128, -}; - struct bch_stripe { struct bch_val v; __le16 sectors; @@ -913,7 +859,6 @@ struct bch_stripe { struct bch_extent_ptr ptrs[0]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(stripe, BCH_STRIPE); /* Optional/variable size superblock sections: */ @@ -1149,15 +1094,21 @@ struct bch_sb_field_clean { /* Superblock: */ /* - * Version 8: BCH_SB_ENCODED_EXTENT_MAX_BITS - * BCH_MEMBER_DATA_ALLOWED - * Version 9: incompatible extent nonce change + * New versioning scheme: + * One common version number for all on disk data structures - superblock, btree + * nodes, journal entries */ +#define BCH_JSET_VERSION_OLD 2 +#define BCH_BSET_VERSION_OLD 3 + +enum bcachefs_metadata_version { + bcachefs_metadata_version_min = 9, + bcachefs_metadata_version_new_versioning = 10, + bcachefs_metadata_version_bkey_renumber = 10, + bcachefs_metadata_version_max = 11, +}; -#define BCH_SB_VERSION_MIN 7 -#define BCH_SB_VERSION_EXTENT_MAX 8 -#define BCH_SB_VERSION_EXTENT_NONCE_V1 9 -#define BCH_SB_VERSION_MAX 9 +#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) #define BCH_SB_SECTOR 8 #define BCH_SB_MEMBERS_MAX 64 /* XXX kill */ @@ -1176,6 +1127,9 @@ struct bch_sb_layout { /* * @offset - sector where this sb was written * @version - on disk format version + * @version_min - Oldest metadata version this filesystem contains; so we can + * safely drop compatibility code and refuse to mount filesystems + * we'd need it for * @magic - identifies as a bcachefs superblock (BCACHE_MAGIC) * @seq - incremented each time superblock is written * @uuid - used for generating various magic numbers and identifying @@ -1369,11 +1323,6 @@ static inline __u64 __bset_magic(struct bch_sb *sb) /* Journal */ -#define BCACHE_JSET_VERSION_UUIDv1 1 -#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ -#define BCACHE_JSET_VERSION_JKEYS 2 -#define BCACHE_JSET_VERSION 2 - #define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64)) #define BCH_JSET_ENTRY_TYPES() \ @@ -1453,35 +1402,26 @@ LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5); /* Btree: */ -#define DEFINE_BCH_BTREE_IDS() \ - DEF_BTREE_ID(EXTENTS, 0, "extents") \ - DEF_BTREE_ID(INODES, 1, "inodes") \ - DEF_BTREE_ID(DIRENTS, 2, "dirents") \ - DEF_BTREE_ID(XATTRS, 3, "xattrs") \ - DEF_BTREE_ID(ALLOC, 4, "alloc") \ - DEF_BTREE_ID(QUOTAS, 5, "quotas") \ - DEF_BTREE_ID(EC, 6, "erasure_coding") - -#define DEF_BTREE_ID(kwd, val, name) BTREE_ID_##kwd = val, +#define BCH_BTREE_IDS() \ + x(EXTENTS, 0, "extents") \ + x(INODES, 1, "inodes") \ + x(DIRENTS, 2, "dirents") \ + x(XATTRS, 3, "xattrs") \ + x(ALLOC, 4, "alloc") \ + x(QUOTAS, 5, "quotas") \ + x(EC, 6, "erasure_coding") enum btree_id { - DEFINE_BCH_BTREE_IDS() +#define x(kwd, val, name) BTREE_ID_##kwd = val, + BCH_BTREE_IDS() +#undef x BTREE_ID_NR }; -#undef DEF_BTREE_ID - #define BTREE_MAX_DEPTH 4U /* Btree nodes */ -/* Version 1: Seed pointer into btree node checksum - */ -#define BCACHE_BSET_CSUM 1 -#define BCACHE_BSET_KEY_v1 2 -#define BCACHE_BSET_JOURNAL_SEQ 3 -#define BCACHE_BSET_VERSION 3 - /* * Btree nodes * diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index d7e022ba2027..d35cdde299c4 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -488,7 +488,7 @@ enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out, pack_state_finish(&state, out); out->u64s = f->key_u64s; out->format = KEY_FORMAT_LOCAL_BTREE; - out->type = KEY_TYPE_DELETED; + out->type = KEY_TYPE_deleted; #ifdef CONFIG_BCACHEFS_DEBUG if (exact) { diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 9679631a7e89..44044fcd6f9f 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -61,10 +61,12 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes) k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64)); } -#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_DELETED) +#define bkey_val_end(_k) vstruct_idx((_k).v, bkey_val_u64s((_k).k)) + +#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted) #define bkey_whiteout(_k) \ - ((_k)->type == KEY_TYPE_DELETED || (_k)->type == KEY_TYPE_DISCARD) + ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard) #define bkey_packed_typecheck(_k) \ ({ \ @@ -439,7 +441,15 @@ static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k) * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion * functions. */ -#define __BKEY_VAL_ACCESSORS(name, nr, _assert) \ +#define BKEY_VAL_ACCESSORS(name) \ +struct bkey_i_##name { \ + union { \ + struct bkey k; \ + struct bkey_i k_i; \ + }; \ + struct bch_##name v; \ +}; \ + \ struct bkey_s_c_##name { \ union { \ struct { \ @@ -464,20 +474,20 @@ struct bkey_s_##name { \ \ static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k) \ { \ - _assert(k->k.type, nr); \ + EBUG_ON(k->k.type != KEY_TYPE_##name); \ return container_of(&k->k, struct bkey_i_##name, k); \ } \ \ static inline const struct bkey_i_##name * \ bkey_i_to_##name##_c(const struct bkey_i *k) \ { \ - _assert(k->k.type, nr); \ + EBUG_ON(k->k.type != KEY_TYPE_##name); \ return container_of(&k->k, struct bkey_i_##name, k); \ } \ \ static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k) \ { \ - _assert(k.k->type, nr); \ + EBUG_ON(k.k->type != KEY_TYPE_##name); \ return (struct bkey_s_##name) { \ .k = k.k, \ .v = container_of(k.v, struct bch_##name, v), \ @@ -486,7 +496,7 @@ static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k) \ \ static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\ { \ - _assert(k.k->type, nr); \ + EBUG_ON(k.k->type != KEY_TYPE_##name); \ return (struct bkey_s_c_##name) { \ .k = k.k, \ .v = container_of(k.v, struct bch_##name, v), \ @@ -512,7 +522,7 @@ name##_i_to_s_c(const struct bkey_i_##name *k) \ \ static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k) \ { \ - _assert(k->k.type, nr); \ + EBUG_ON(k->k.type != KEY_TYPE_##name); \ return (struct bkey_s_##name) { \ .k = &k->k, \ .v = container_of(&k->v, struct bch_##name, v), \ @@ -522,27 +532,13 @@ static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k) \ static inline struct bkey_s_c_##name \ bkey_i_to_s_c_##name(const struct bkey_i *k) \ { \ - _assert(k->k.type, nr); \ + EBUG_ON(k->k.type != KEY_TYPE_##name); \ return (struct bkey_s_c_##name) { \ .k = &k->k, \ .v = container_of(&k->v, struct bch_##name, v), \ }; \ } \ \ -static inline struct bch_##name * \ -bkey_p_##name##_val(const struct bkey_format *f, \ - struct bkey_packed *k) \ -{ \ - return container_of(bkeyp_val(f, k), struct bch_##name, v); \ -} \ - \ -static inline const struct bch_##name * \ -bkey_p_c_##name##_val(const struct bkey_format *f, \ - const struct bkey_packed *k) \ -{ \ - return container_of(bkeyp_val(f, k), struct bch_##name, v); \ -} \ - \ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\ { \ struct bkey_i_##name *k = \ @@ -550,45 +546,23 @@ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\ \ bkey_init(&k->k); \ memset(&k->v, 0, sizeof(k->v)); \ - k->k.type = nr; \ + k->k.type = KEY_TYPE_##name; \ set_bkey_val_bytes(&k->k, sizeof(k->v)); \ \ return k; \ } -#define __BKEY_VAL_ASSERT(_type, _nr) EBUG_ON(_type != _nr) - -#define BKEY_VAL_ACCESSORS(name, _nr) \ - static inline void __bch_##name##_assert(u8 type, u8 nr) \ - { \ - EBUG_ON(type != _nr); \ - } \ - \ - __BKEY_VAL_ACCESSORS(name, _nr, __bch_##name##_assert) - -BKEY_VAL_ACCESSORS(cookie, KEY_TYPE_COOKIE); - -static inline void __bch2_extent_assert(u8 type, u8 nr) -{ - EBUG_ON(type != BCH_EXTENT && type != BCH_EXTENT_CACHED); -} - -__BKEY_VAL_ACCESSORS(extent, BCH_EXTENT, __bch2_extent_assert); -BKEY_VAL_ACCESSORS(reservation, BCH_RESERVATION); - -BKEY_VAL_ACCESSORS(inode, BCH_INODE_FS); -BKEY_VAL_ACCESSORS(inode_blockdev, BCH_INODE_BLOCKDEV); -BKEY_VAL_ACCESSORS(inode_generation, BCH_INODE_GENERATION); - -BKEY_VAL_ACCESSORS(dirent, BCH_DIRENT); - -BKEY_VAL_ACCESSORS(xattr, BCH_XATTR); - -BKEY_VAL_ACCESSORS(alloc, BCH_ALLOC); - -BKEY_VAL_ACCESSORS(quota, BCH_QUOTA); - -BKEY_VAL_ACCESSORS(stripe, BCH_STRIPE); +BKEY_VAL_ACCESSORS(cookie); +BKEY_VAL_ACCESSORS(btree_ptr); +BKEY_VAL_ACCESSORS(extent); +BKEY_VAL_ACCESSORS(reservation); +BKEY_VAL_ACCESSORS(inode); +BKEY_VAL_ACCESSORS(inode_generation); +BKEY_VAL_ACCESSORS(dirent); +BKEY_VAL_ACCESSORS(xattr); +BKEY_VAL_ACCESSORS(alloc); +BKEY_VAL_ACCESSORS(quota); +BKEY_VAL_ACCESSORS(stripe); /* byte order helpers */ diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 81c66950668c..f518062d896b 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -12,66 +12,84 @@ #include "quota.h" #include "xattr.h" -const struct bkey_ops bch2_bkey_ops[] = { - [BKEY_TYPE_EXTENTS] = bch2_bkey_extent_ops, - [BKEY_TYPE_INODES] = bch2_bkey_inode_ops, - [BKEY_TYPE_DIRENTS] = bch2_bkey_dirent_ops, - [BKEY_TYPE_XATTRS] = bch2_bkey_xattr_ops, - [BKEY_TYPE_ALLOC] = bch2_bkey_alloc_ops, - [BKEY_TYPE_QUOTAS] = bch2_bkey_quota_ops, - [BKEY_TYPE_EC] = bch2_bkey_ec_ops, - [BKEY_TYPE_BTREE] = bch2_bkey_btree_ops, +const char * const bch_bkey_types[] = { +#define x(name, nr) #name, + BCH_BKEY_TYPES() +#undef x + NULL }; -const char *bch2_bkey_val_invalid(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k) +static const char *deleted_key_invalid(const struct bch_fs *c, + struct bkey_s_c k) { - const struct bkey_ops *ops = &bch2_bkey_ops[type]; + return NULL; +} + +const struct bkey_ops bch2_bkey_ops_deleted = { + .key_invalid = deleted_key_invalid, +}; + +const struct bkey_ops bch2_bkey_ops_discard = { + .key_invalid = deleted_key_invalid, +}; - switch (k.k->type) { - case KEY_TYPE_DELETED: - case KEY_TYPE_DISCARD: - return NULL; +static const char *empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k) +{ + if (bkey_val_bytes(k.k)) + return "value size should be zero"; - case KEY_TYPE_ERROR: - return bkey_val_bytes(k.k) != 0 - ? "value size should be zero" - : NULL; + return NULL; +} - case KEY_TYPE_COOKIE: - return bkey_val_bytes(k.k) != sizeof(struct bch_cookie) - ? "incorrect value size" - : NULL; +const struct bkey_ops bch2_bkey_ops_error = { + .key_invalid = empty_val_key_invalid, +}; - default: - if (k.k->type < KEY_TYPE_GENERIC_NR) - return "invalid type"; +static const char *key_type_cookie_invalid(const struct bch_fs *c, + struct bkey_s_c k) +{ + if (bkey_val_bytes(k.k) != sizeof(struct bch_cookie)) + return "incorrect value size"; - return ops->key_invalid(c, k); - } + return NULL; } -const char *__bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k) +const struct bkey_ops bch2_bkey_ops_cookie = { + .key_invalid = key_type_cookie_invalid, +}; + +const struct bkey_ops bch2_bkey_ops_whiteout = { + .key_invalid = empty_val_key_invalid, +}; + +static const struct bkey_ops bch2_bkey_ops[] = { +#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name, + BCH_BKEY_TYPES() +#undef x +}; + +const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k) { - const struct bkey_ops *ops = &bch2_bkey_ops[type]; + if (k.k->type >= KEY_TYPE_MAX) + return "invalid type"; + + return bch2_bkey_ops[k.k->type].key_invalid(c, k); +} +const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, + enum btree_node_type type) +{ if (k.k->u64s < BKEY_U64s) return "u64s too small"; - if (!ops->is_extents) { - if (k.k->size) - return "nonzero size field"; - } else { + if (btree_node_type_is_extents(type)) { if ((k.k->size == 0) != bkey_deleted(k.k)) return "bad size field"; + } else { + if (k.k->size) + return "nonzero size field"; } - if (ops->is_extents && - !k.k->size && - !bkey_deleted(k.k)) - return "zero size field"; - if (k.k->p.snapshot) return "nonzero snapshot"; @@ -82,11 +100,11 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type, return NULL; } -const char *bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k) +const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, + enum btree_node_type type) { - return __bch2_bkey_invalid(c, type, k) ?: - bch2_bkey_val_invalid(c, type, k); + return __bch2_bkey_invalid(c, k, type) ?: + bch2_bkey_val_invalid(c, k); } const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k) @@ -102,24 +120,22 @@ const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k) void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) { - enum bkey_type type = btree_node_type(b); - const struct bkey_ops *ops = &bch2_bkey_ops[type]; + const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type]; const char *invalid; BUG_ON(!k.k->u64s); - invalid = bch2_bkey_invalid(c, type, k) ?: + invalid = bch2_bkey_invalid(c, k, btree_node_type(b)) ?: bch2_bkey_in_btree_node(b, k); if (invalid) { char buf[160]; - bch2_bkey_val_to_text(&PBUF(buf), c, type, k); + bch2_bkey_val_to_text(&PBUF(buf), c, k); bch2_fs_bug(c, "invalid bkey %s: %s", buf, invalid); return; } - if (k.k->type >= KEY_TYPE_GENERIC_NR && - ops->key_debugcheck) + if (ops->key_debugcheck) ops->key_debugcheck(c, b, k); } @@ -144,46 +160,90 @@ void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k) } void bch2_val_to_text(struct printbuf *out, struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k) -{ - const struct bkey_ops *ops = &bch2_bkey_ops[type]; - - switch (k.k->type) { - case KEY_TYPE_DELETED: - pr_buf(out, " deleted"); - break; - case KEY_TYPE_DISCARD: - pr_buf(out, " discard"); - break; - case KEY_TYPE_ERROR: - pr_buf(out, " error"); - break; - case KEY_TYPE_COOKIE: - pr_buf(out, " cookie"); - break; - default: - if (k.k->type >= KEY_TYPE_GENERIC_NR && ops->val_to_text) - ops->val_to_text(out, c, k); - break; - } + struct bkey_s_c k) +{ + const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type]; + + if (likely(ops->val_to_text)) + ops->val_to_text(out, c, k); + else + pr_buf(out, " %s", bch_bkey_types[k.k->type]); } void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k) + struct bkey_s_c k) { bch2_bkey_to_text(out, k.k); pr_buf(out, ": "); - bch2_val_to_text(out, c, type, k); + bch2_val_to_text(out, c, k); } -void bch2_bkey_swab(enum bkey_type type, - const struct bkey_format *f, - struct bkey_packed *k) +void bch2_bkey_swab(const struct bkey_format *f, + struct bkey_packed *k) { - const struct bkey_ops *ops = &bch2_bkey_ops[type]; + const struct bkey_ops *ops = &bch2_bkey_ops[k->type]; bch2_bkey_swab_key(f, k); if (ops->swab) ops->swab(f, k); } + +bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k) +{ + const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type]; + + return ops->key_normalize + ? ops->key_normalize(c, k) + : false; +} + +enum merge_result bch2_bkey_merge(struct bch_fs *c, + struct bkey_i *l, struct bkey_i *r) +{ + const struct bkey_ops *ops = &bch2_bkey_ops[l->k.type]; + + if (!key_merging_disabled(c) && + ops->key_merge && + l->k.type == r->k.type && + !bversion_cmp(l->k.version, r->k.version) && + !bkey_cmp(l->k.p, bkey_start_pos(&r->k))) + return ops->key_merge(c, l, r); + + return BCH_MERGE_NOMERGE; +} + +static const struct old_bkey_type { + u8 btree_node_type; + u8 old; + u8 new; +} bkey_renumber_table[] = { + {BKEY_TYPE_BTREE, 128, KEY_TYPE_btree_ptr }, + {BKEY_TYPE_EXTENTS, 128, KEY_TYPE_extent }, + {BKEY_TYPE_EXTENTS, 129, KEY_TYPE_extent }, + {BKEY_TYPE_EXTENTS, 130, KEY_TYPE_reservation }, + {BKEY_TYPE_INODES, 128, KEY_TYPE_inode }, + {BKEY_TYPE_INODES, 130, KEY_TYPE_inode_generation }, + {BKEY_TYPE_DIRENTS, 128, KEY_TYPE_dirent }, + {BKEY_TYPE_DIRENTS, 129, KEY_TYPE_whiteout }, + {BKEY_TYPE_XATTRS, 128, KEY_TYPE_xattr }, + {BKEY_TYPE_XATTRS, 129, KEY_TYPE_whiteout }, + {BKEY_TYPE_ALLOC, 128, KEY_TYPE_alloc }, + {BKEY_TYPE_QUOTAS, 128, KEY_TYPE_quota }, +}; + +void bch2_bkey_renumber(enum btree_node_type btree_node_type, + struct bkey_packed *k, + int write) +{ + const struct old_bkey_type *i; + + for (i = bkey_renumber_table; + i < bkey_renumber_table + ARRAY_SIZE(bkey_renumber_table); + i++) + if (btree_node_type == i->btree_node_type && + k->type == (write ? i->new : i->old)) { + k->type = write ? i->old : i->new; + break; + } +} diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index 62b86a8e2ba8..a4bfd2aef5bf 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -4,24 +4,12 @@ #include "bkey.h" -#define DEF_BTREE_ID(kwd, val, name) BKEY_TYPE_##kwd = val, - -enum bkey_type { - DEFINE_BCH_BTREE_IDS() - BKEY_TYPE_BTREE, -}; - -#undef DEF_BTREE_ID - -/* Type of a key in btree @id at level @level: */ -static inline enum bkey_type bkey_type(unsigned level, enum btree_id id) -{ - return level ? BKEY_TYPE_BTREE : (enum bkey_type) id; -} - struct bch_fs; struct btree; struct bkey; +enum btree_node_type; + +extern const char * const bch_bkey_types[]; enum merge_result { BCH_MERGE_NOMERGE, @@ -34,12 +22,6 @@ enum merge_result { BCH_MERGE_MERGE, }; -typedef bool (*key_filter_fn)(struct bch_fs *, struct btree *, - struct bkey_s); -typedef enum merge_result (*key_merge_fn)(struct bch_fs *, - struct btree *, - struct bkey_i *, struct bkey_i *); - struct bkey_ops { /* Returns reason for being invalid if invalid, else NULL: */ const char * (*key_invalid)(const struct bch_fs *, @@ -49,41 +31,34 @@ struct bkey_ops { void (*val_to_text)(struct printbuf *, struct bch_fs *, struct bkey_s_c); void (*swab)(const struct bkey_format *, struct bkey_packed *); - key_filter_fn key_normalize; - key_merge_fn key_merge; - bool is_extents; + bool (*key_normalize)(struct bch_fs *, struct bkey_s); + enum merge_result (*key_merge)(struct bch_fs *, + struct bkey_i *, struct bkey_i *); }; -static inline bool bkey_type_needs_gc(enum bkey_type type) -{ - switch (type) { - case BKEY_TYPE_BTREE: - case BKEY_TYPE_EXTENTS: - case BKEY_TYPE_EC: - return true; - default: - return false; - } -} - -const char *bch2_bkey_val_invalid(struct bch_fs *, enum bkey_type, - struct bkey_s_c); -const char *__bch2_bkey_invalid(struct bch_fs *, enum bkey_type, struct bkey_s_c); -const char *bch2_bkey_invalid(struct bch_fs *, enum bkey_type, struct bkey_s_c); +const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c); +const char *__bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, + enum btree_node_type); +const char *bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, + enum btree_node_type); const char *bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c); void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c); void bch2_bpos_to_text(struct printbuf *, struct bpos); void bch2_bkey_to_text(struct printbuf *, const struct bkey *); -void bch2_val_to_text(struct printbuf *, struct bch_fs *, enum bkey_type, +void bch2_val_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *, - enum bkey_type, struct bkey_s_c); + struct bkey_s_c); + +void bch2_bkey_swab(const struct bkey_format *, struct bkey_packed *); + +bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s); -void bch2_bkey_swab(enum bkey_type, const struct bkey_format *, - struct bkey_packed *); +enum merge_result bch2_bkey_merge(struct bch_fs *, + struct bkey_i *, struct bkey_i *); -extern const struct bkey_ops bch2_bkey_ops[]; +void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int); #endif /* _BCACHEFS_BKEY_METHODS_H */ diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c index 706ca77d4b17..12825c1b292f 100644 --- a/fs/bcachefs/bkey_sort.c +++ b/fs/bcachefs/bkey_sort.c @@ -257,7 +257,7 @@ static void extent_sort_append(struct bch_fs *c, bch2_bkey_unpack(b, &tmp.k, k); if (*prev && - bch2_extent_merge(c, b, (void *) *prev, &tmp.k)) + bch2_bkey_merge(c, (void *) *prev, &tmp.k)) return; if (*prev) { @@ -375,7 +375,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, } /* Sort + repack in a new format: */ -static struct btree_nr_keys +struct btree_nr_keys bch2_sort_repack(struct bset *dst, struct btree *src, struct btree_node_iter *src_iter, struct bkey_format *out_f, @@ -411,18 +411,12 @@ bch2_sort_repack_merge(struct bch_fs *c, struct bset *dst, struct btree *src, struct btree_node_iter *iter, struct bkey_format *out_f, - bool filter_whiteouts, - key_filter_fn filter, - key_merge_fn merge) + bool filter_whiteouts) { struct bkey_packed *k, *prev = NULL, *out; struct btree_nr_keys nr; BKEY_PADDED(k) tmp; - if (!filter && !merge) - return bch2_sort_repack(dst, src, iter, out_f, - filter_whiteouts); - memset(&nr, 0, sizeof(nr)); while ((k = bch2_btree_node_iter_next_all(iter, src))) { @@ -435,14 +429,15 @@ bch2_sort_repack_merge(struct bch_fs *c, */ bch2_bkey_unpack(src, &tmp.k, k); - if (filter && filter(c, src, bkey_i_to_s(&tmp.k))) + if (filter_whiteouts && + bch2_bkey_normalize(c, bkey_i_to_s(&tmp.k))) continue; /* prev is always unpacked, for key merging: */ if (prev && - merge && - merge(c, src, (void *) prev, &tmp.k) == BCH_MERGE_MERGE) + bch2_bkey_merge(c, (void *) prev, &tmp.k) == + BCH_MERGE_MERGE) continue; /* @@ -606,7 +601,7 @@ unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst, continue; EBUG_ON(bkeyp_val_u64s(f, in)); - EBUG_ON(in->type != KEY_TYPE_DISCARD); + EBUG_ON(in->type != KEY_TYPE_discard); r.k = bkey_unpack_key(iter->b, in); diff --git a/fs/bcachefs/bkey_sort.h b/fs/bcachefs/bkey_sort.h index 6b1661dd221a..397009181eae 100644 --- a/fs/bcachefs/bkey_sort.h +++ b/fs/bcachefs/bkey_sort.h @@ -47,13 +47,14 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *, struct bset *, struct btree_node_iter_large *); struct btree_nr_keys +bch2_sort_repack(struct bset *, struct btree *, + struct btree_node_iter *, + struct bkey_format *, bool); +struct btree_nr_keys bch2_sort_repack_merge(struct bch_fs *, struct bset *, struct btree *, struct btree_node_iter *, - struct bkey_format *, - bool, - key_filter_fn, - key_merge_fn); + struct bkey_format *, bool); unsigned bch2_sort_keys(struct bkey_packed *, struct sort_iter *, bool); diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h index 5d03036620b9..329ffb0b6b3d 100644 --- a/fs/bcachefs/bset.h +++ b/fs/bcachefs/bset.h @@ -397,7 +397,7 @@ bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k) static inline struct bkey_packed * bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k) { - return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_DISCARD + 1); + return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_discard + 1); } enum bch_extent_overlap { @@ -529,7 +529,7 @@ bch2_btree_node_iter_peek_all(struct btree_node_iter *iter, static inline struct bkey_packed * bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b) { - return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_DISCARD + 1); + return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_discard + 1); } static inline struct bkey_packed * @@ -555,7 +555,7 @@ bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, struct btree *b) static inline struct bkey_packed * bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b) { - return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_DISCARD + 1); + return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_discard + 1); } struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *, diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 846d5e816aa2..b748afc778f4 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -6,20 +6,17 @@ #include "btree_iter.h" #include "btree_locking.h" #include "debug.h" -#include "extents.h" #include "trace.h" #include -#define DEF_BTREE_ID(kwd, val, name) name, - const char * const bch2_btree_ids[] = { - DEFINE_BCH_BTREE_IDS() +#define x(kwd, val, name) name, + BCH_BTREE_IDS() +#undef x NULL }; -#undef DEF_BTREE_ID - void bch2_recalc_btree_reserve(struct bch_fs *c) { unsigned i, reserve = 16; @@ -100,7 +97,7 @@ static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) if (!b) return NULL; - bkey_extent_init(&b->key); + bkey_btree_ptr_init(&b->key); six_lock_init(&b->lock); lockdep_set_novalidate_class(&b->lock); INIT_LIST_HEAD(&b->list); @@ -117,7 +114,7 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); /* Cause future lookups for this node to fail: */ - bkey_i_to_extent(&b->key)->v._data[0] = 0; + PTR_HASH(&b->key) = 0; } int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) @@ -604,7 +601,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, /* raced with another fill: */ /* mark as unhashed... */ - bkey_i_to_extent(&b->key)->v._data[0] = 0; + PTR_HASH(&b->key) = 0; mutex_lock(&bc->lock); list_add(&b->list, &bc->freeable); @@ -906,8 +903,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, b->data->min_key.offset, b->data->max_key.inode, b->data->max_key.offset); - bch2_val_to_text(out, c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key)); + bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key)); pr_buf(out, "\n" " format: u64s %u fields %u %u %u %u %u\n" " unpack fn len: %u\n" diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index cb7f66fc8bd4..7bd2bc84160d 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -4,7 +4,6 @@ #include "bcachefs.h" #include "btree_types.h" -#include "extents.h" struct btree_iter; @@ -37,12 +36,13 @@ void bch2_fs_btree_cache_exit(struct bch_fs *); int bch2_fs_btree_cache_init(struct bch_fs *); void bch2_fs_btree_cache_init_early(struct btree_cache *); -#define PTR_HASH(_k) (bkey_i_to_extent_c(_k)->v._data[0]) +#define PTR_HASH(_k) *((u64 *) &bkey_i_to_btree_ptr_c(_k)->v) /* is btree node in hash table? */ static inline bool btree_node_hashed(struct btree *b) { - return bkey_extent_is_data(&b->key.k) && PTR_HASH(&b->key); + return b->key.k.type == KEY_TYPE_btree_ptr && + PTR_HASH(&b->key); } #define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index a849f9e320b3..85fc181e76a8 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -112,137 +112,11 @@ static void btree_node_range_checks(struct bch_fs *c, struct btree *b, /* marking of btree keys/nodes: */ -static void ptr_gen_recalc_oldest(struct bch_fs *c, - const struct bch_extent_ptr *ptr, - u8 *max_stale) -{ - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - size_t b = PTR_BUCKET_NR(ca, ptr); - - if (gen_after(ca->oldest_gens[b], ptr->gen)) - ca->oldest_gens[b] = ptr->gen; - - *max_stale = max(*max_stale, ptr_stale(ca, ptr)); -} - -static void ptr_gens_recalc_oldest(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k, u8 *max_stale) -{ - const struct bch_extent_ptr *ptr; - - switch (type) { - case BKEY_TYPE_BTREE: - case BKEY_TYPE_EXTENTS: - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - - extent_for_each_ptr(e, ptr) - ptr_gen_recalc_oldest(c, ptr, max_stale); - break; - } - } - break; - case BKEY_TYPE_EC: - switch (k.k->type) { - case BCH_STRIPE: { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - - for (ptr = s.v->ptrs; - ptr < s.v->ptrs + s.v->nr_blocks; - ptr++) - ptr_gen_recalc_oldest(c, ptr, max_stale); - } - } - default: - break; - } -} - -static int ptr_gen_check(struct bch_fs *c, - enum bkey_type type, - const struct bch_extent_ptr *ptr) -{ - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - size_t b = PTR_BUCKET_NR(ca, ptr); - struct bucket *g = PTR_BUCKET(ca, ptr); - int ret = 0; - - if (mustfix_fsck_err_on(!g->mark.gen_valid, c, - "found ptr with missing gen in alloc btree,\n" - "type %u gen %u", - type, ptr->gen)) { - g->_mark.gen = ptr->gen; - g->_mark.gen_valid = 1; - set_bit(b, ca->buckets_dirty); - } - - if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c, - "%u ptr gen in the future: %u > %u", - type, ptr->gen, g->mark.gen)) { - g->_mark.gen = ptr->gen; - g->_mark.gen_valid = 1; - set_bit(b, ca->buckets_dirty); - set_bit(BCH_FS_FIXED_GENS, &c->flags); - } -fsck_err: - return ret; -} - -static int ptr_gens_check(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k) -{ - const struct bch_extent_ptr *ptr; - int ret = 0; - - switch (type) { - case BKEY_TYPE_BTREE: - case BKEY_TYPE_EXTENTS: - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - - extent_for_each_ptr(e, ptr) { - ret = ptr_gen_check(c, type, ptr); - if (ret) - return ret; - - } - break; - } - } - break; - case BKEY_TYPE_EC: - switch (k.k->type) { - case BCH_STRIPE: { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - - for (ptr = s.v->ptrs; - ptr < s.v->ptrs + s.v->nr_blocks; - ptr++) { - ret = ptr_gen_check(c, type, ptr); - if (ret) - return ret; - } - } - } - break; - default: - break; - } - - return ret; -} - -/* - * For runtime mark and sweep: - */ -static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k, +static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k, u8 *max_stale, bool initial) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; struct gc_pos pos = { 0 }; unsigned flags = BCH_BUCKET_MARK_GC| @@ -257,23 +131,50 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type, atomic64_set(&c->key_version, k.k->version.lo); if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || - fsck_err_on(!bch2_bkey_replicas_marked(c, type, k, - false), c, + fsck_err_on(!bch2_bkey_replicas_marked(c, k, false), c, "superblock not marked as containing replicas (type %u)", - type)) { - ret = bch2_mark_bkey_replicas(c, type, k); + k.k->type)) { + ret = bch2_mark_bkey_replicas(c, k); if (ret) return ret; } - ret = ptr_gens_check(c, type, k); - if (ret) - return ret; + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + size_t b = PTR_BUCKET_NR(ca, ptr); + struct bucket *g = PTR_BUCKET(ca, ptr); + + if (mustfix_fsck_err_on(!g->mark.gen_valid, c, + "found ptr with missing gen in alloc btree,\n" + "type %u gen %u", + k.k->type, ptr->gen)) { + g->_mark.gen = ptr->gen; + g->_mark.gen_valid = 1; + set_bit(b, ca->buckets_dirty); + } + + if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c, + "%u ptr gen in the future: %u > %u", + k.k->type, ptr->gen, g->mark.gen)) { + g->_mark.gen = ptr->gen; + g->_mark.gen_valid = 1; + set_bit(b, ca->buckets_dirty); + set_bit(BCH_FS_FIXED_GENS, &c->flags); + } + } } - bch2_mark_key(c, type, k, true, k.k->size, pos, NULL, 0, flags); + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + size_t b = PTR_BUCKET_NR(ca, ptr); + + if (gen_after(ca->oldest_gens[b], ptr->gen)) + ca->oldest_gens[b] = ptr->gen; + + *max_stale = max(*max_stale, ptr_stale(ca, ptr)); + } - ptr_gens_recalc_oldest(c, type, k, max_stale); + bch2_mark_key(c, k, true, k.k->size, pos, NULL, 0, flags); fsck_err: return ret; } @@ -281,7 +182,6 @@ fsck_err: static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, bool initial) { - enum bkey_type type = btree_node_type(b); struct btree_node_iter iter; struct bkey unpacked; struct bkey_s_c k; @@ -289,14 +189,14 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, *max_stale = 0; - if (!bkey_type_needs_gc(type)) + if (!btree_node_type_needs_gc(btree_node_type(b))) return 0; for_each_btree_node_key_unpack(b, k, &iter, &unpacked) { bch2_bkey_debugcheck(c, b, k); - ret = bch2_gc_mark_key(c, type, k, max_stale, initial); + ret = bch2_gc_mark_key(c, k, max_stale, initial); if (ret) break; } @@ -310,7 +210,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, struct btree_iter iter; struct btree *b; struct range_checks r; - unsigned depth = bkey_type_needs_gc(btree_id) ? 0 : 1; + unsigned depth = btree_node_type_needs_gc(btree_id) ? 0 : 1; u8 max_stale; int ret = 0; @@ -364,7 +264,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, b = c->btree_roots[btree_id].b; if (!btree_node_fake(b)) - bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), + bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key), &max_stale, initial); gc_pos_set(c, gc_pos_btree_root(b->btree_id)); @@ -391,13 +291,13 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal, for (i = 0; i < BTREE_ID_NR; i++) { enum btree_id id = ids[i]; - enum bkey_type type = bkey_type(0, id); + enum btree_node_type type = __btree_node_type(0, id); int ret = bch2_gc_btree(c, id, initial); if (ret) return ret; - if (journal && bkey_type_needs_gc(type)) { + if (journal && btree_node_type_needs_gc(type)) { struct bkey_i *k, *n; struct jset_entry *j; struct journal_replay *r; @@ -405,8 +305,8 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal, list_for_each_entry(r, journal, list) for_each_jset_key(k, n, j, &r->j) { - if (type == bkey_type(j->level, j->btree_id)) { - ret = bch2_gc_mark_key(c, type, + if (type == __btree_node_type(j->level, j->btree_id)) { + ret = bch2_gc_mark_key(c, bkey_i_to_s_c(k), &max_stale, initial); if (ret) @@ -507,8 +407,7 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c) for_each_pending_btree_node_free(c, as, d) if (d->index_update_done) - bch2_mark_key(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&d->key), + bch2_mark_key(c, bkey_i_to_s_c(&d->key), true, 0, pos, NULL, 0, BCH_BUCKET_MARK_GC); diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h index bb77564b9463..89ee72ac49f6 100644 --- a/fs/bcachefs/btree_gc.h +++ b/fs/bcachefs/btree_gc.h @@ -4,8 +4,6 @@ #include "btree_types.h" -enum bkey_type; - void bch2_coalesce(struct bch_fs *); int bch2_gc(struct bch_fs *, struct list_head *, bool); void bch2_gc_thread_stop(struct bch_fs *); @@ -58,9 +56,9 @@ static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r) static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id) { switch (id) { -#define DEF_BTREE_ID(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n; - DEFINE_BCH_BTREE_IDS() -#undef DEF_BTREE_ID +#define x(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n; + BCH_BTREE_IDS() +#undef x default: BUG(); } diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 506bf9e8df38..f205bddd814d 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -392,12 +392,16 @@ void bch2_btree_sort_into(struct bch_fs *c, bch2_btree_node_iter_init_from_start(&src_iter, src); - nr = bch2_sort_repack_merge(c, btree_bset_first(dst), - src, &src_iter, - &dst->format, - true, - btree_node_ops(src)->key_normalize, - btree_node_ops(src)->key_merge); + if (btree_node_is_extents(src)) + nr = bch2_sort_repack_merge(c, btree_bset_first(dst), + src, &src_iter, + &dst->format, + true); + else + nr = bch2_sort_repack(btree_bset_first(dst), + src, &src_iter, + &dst->format, + true); bch2_time_stats_update(&c->times[BCH_TIME_btree_sort], start_time); @@ -598,8 +602,8 @@ static int validate_bset(struct bch_fs *c, struct btree *b, { struct bkey_packed *k, *prev = NULL; struct bpos prev_pos = POS_MIN; - enum bkey_type type = btree_node_type(b); bool seen_non_whiteout = false; + unsigned version; const char *err; int ret = 0; @@ -645,13 +649,12 @@ static int validate_bset(struct bch_fs *c, struct btree *b, "invalid bkey format: %s", err); } - if (btree_err_on(le16_to_cpu(i->version) != BCACHE_BSET_VERSION, - BTREE_ERR_FIXABLE, c, b, i, - "unsupported bset version")) { - i->version = cpu_to_le16(BCACHE_BSET_VERSION); - i->u64s = 0; - return 0; - } + version = le16_to_cpu(i->version); + btree_err_on((version != BCH_BSET_VERSION_OLD && + version < bcachefs_metadata_version_min) || + version >= bcachefs_metadata_version_max, + BTREE_ERR_FATAL, c, b, i, + "unsupported bset version"); if (btree_err_on(b->written + sectors > c->opts.btree_node_size, BTREE_ERR_FIXABLE, c, b, i, @@ -700,17 +703,21 @@ static int validate_bset(struct bch_fs *c, struct btree *b, } if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) - bch2_bkey_swab(type, &b->format, k); + bch2_bkey_swab(&b->format, k); + + if (!write && + version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(btree_node_type(b), k, write); u = bkey_disassemble(b, k, &tmp); - invalid = __bch2_bkey_invalid(c, type, u) ?: + invalid = __bch2_bkey_invalid(c, u, btree_node_type(b)) ?: bch2_bkey_in_btree_node(b, u) ?: - (write ? bch2_bkey_val_invalid(c, type, u) : NULL); + (write ? bch2_bkey_val_invalid(c, u) : NULL); if (invalid) { char buf[160]; - bch2_bkey_val_to_text(&PBUF(buf), c, type, u); + bch2_bkey_val_to_text(&PBUF(buf), c, u); btree_err(BTREE_ERR_FIXABLE, c, b, i, "invalid bkey:\n%s\n%s", invalid, buf); @@ -720,6 +727,10 @@ static int validate_bset(struct bch_fs *c, struct btree *b, continue; } + if (write && + version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(btree_node_type(b), k, write); + /* * with the separate whiteouts thing (used for extents), the * second set of keys actually can have whiteouts too, so we @@ -885,17 +896,16 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry i = &b->data->keys; for (k = i->start; k != vstruct_last(i);) { - enum bkey_type type = btree_node_type(b); struct bkey tmp; struct bkey_s_c u = bkey_disassemble(b, k, &tmp); - const char *invalid = bch2_bkey_val_invalid(c, type, u); + const char *invalid = bch2_bkey_val_invalid(c, u); if (invalid || (inject_invalid_keys(c) && !bversion_cmp(u.k->version, MAX_VERSION))) { char buf[160]; - bch2_bkey_val_to_text(&PBUF(buf), c, type, u); + bch2_bkey_val_to_text(&PBUF(buf), c, u); btree_err(BTREE_ERR_FIXABLE, c, b, i, "invalid bkey %s: %s", buf, invalid); @@ -964,7 +974,9 @@ start: bch2_mark_io_failure(&failed, &rb->pick); - can_retry = bch2_btree_pick_ptr(c, b, &failed, &rb->pick) > 0; + can_retry = bch2_bkey_pick_read_device(c, + bkey_i_to_s_c(&b->key), + &failed, &rb->pick) > 0; if (!bio->bi_status && !bch2_btree_node_read_done(c, b, can_retry)) @@ -1007,7 +1019,8 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b, trace_btree_read(c, b); - ret = bch2_btree_pick_ptr(c, b, NULL, &pick); + ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), + NULL, &pick); if (bch2_fs_fatal_err_on(ret <= 0, c, "btree node read error: no device to read from")) { set_btree_node_read_error(b); @@ -1135,8 +1148,8 @@ static void bch2_btree_node_write_error(struct bch_fs *c, { struct btree *b = wbio->wbio.bio.bi_private; __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; - struct bkey_i_extent *new_key; - struct bkey_s_extent e; + struct bkey_i_btree_ptr *new_key; + struct bkey_s_btree_ptr bp; struct bch_extent_ptr *ptr; struct btree_iter iter; int ret; @@ -1160,13 +1173,13 @@ retry: bkey_copy(&tmp.k, &b->key); - new_key = bkey_i_to_extent(&tmp.k); - e = extent_i_to_s(new_key); + new_key = bkey_i_to_btree_ptr(&tmp.k); + bp = btree_ptr_i_to_s(new_key); - bch2_extent_drop_ptrs(e, ptr, + bch2_bkey_drop_ptrs(bkey_i_to_s(&tmp.k), ptr, bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev)); - if (!bch2_extent_nr_ptrs(e.c)) + if (!bch2_bkey_nr_ptrs(bp.s_c)) goto err; ret = bch2_btree_node_update_key(c, &iter, b, new_key); @@ -1269,12 +1282,11 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - const struct bch_extent_ptr *ptr; unsigned whiteout_u64s = 0; int ret; - extent_for_each_ptr(bkey_i_to_s_c_extent(&b->key), ptr) - break; + if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE)) + return -1; ret = validate_bset(c, b, i, sectors, &whiteout_u64s, WRITE, false); if (ret) @@ -1292,7 +1304,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, struct btree_node *bn = NULL; struct btree_node_entry *bne = NULL; BKEY_PADDED(key) k; - struct bkey_s_extent e; struct bch_extent_ptr *ptr; struct sort_iter sort_iter; struct nonce nonce; @@ -1300,6 +1311,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, u64 seq = 0; bool used_mempool; unsigned long old, new; + bool validate_before_checksum = false; void *data; if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) @@ -1433,11 +1445,21 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN); BUG_ON(i->seq != b->data->keys.seq); - i->version = cpu_to_le16(BCACHE_BSET_VERSION); + i->version = c->sb.version < bcachefs_metadata_version_new_versioning + ? cpu_to_le16(BCH_BSET_VERSION_OLD) + : cpu_to_le16(c->sb.version); SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c)); + if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i))) + validate_before_checksum = true; + + /* validate_bset will be modifying: */ + if (le16_to_cpu(i->version) < + bcachefs_metadata_version_bkey_renumber) + validate_before_checksum = true; + /* if we're going to be encrypting, check metadata validity first: */ - if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) && + if (validate_before_checksum && validate_bset_for_write(c, b, i, sectors_to_write)) goto err; @@ -1451,7 +1473,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); /* if we're not encrypting, check metadata after checksumming: */ - if (!bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) && + if (!validate_before_checksum && validate_bset_for_write(c, b, i, sectors_to_write)) goto err; @@ -1506,9 +1528,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, */ bkey_copy(&k.key, &b->key); - e = bkey_i_to_s_extent(&k.key); - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&k.key)), ptr) ptr->offset += b->written; b->written += sectors_to_write; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index afc43722c1fc..4720061e9562 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -433,7 +433,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, * whiteouts) */ k = b->level || iter->flags & BTREE_ITER_IS_EXTENTS - ? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_DISCARD) + ? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_discard) : bch2_btree_node_iter_prev_all(&tmp, b); if (k && btree_iter_pos_cmp(iter, b, k) > 0) { char buf[100]; @@ -622,7 +622,7 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter, * signal to bch2_btree_iter_peek_slot() that we're currently at * a hole */ - u->type = KEY_TYPE_DELETED; + u->type = KEY_TYPE_deleted; return bkey_s_c_null; } diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 7eecaa6cd5a2..b4a826369a57 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -405,20 +405,45 @@ static inline unsigned bset_byte_offset(struct btree *b, void *i) return i - (void *) b->data; } +enum btree_node_type { +#define x(kwd, val, name) BKEY_TYPE_##kwd = val, + BCH_BTREE_IDS() +#undef x + BKEY_TYPE_BTREE, +}; + +/* Type of a key in btree @id at level @level: */ +static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id) +{ + return level ? BKEY_TYPE_BTREE : (enum btree_node_type) id; +} + /* Type of keys @b contains: */ -static inline enum bkey_type btree_node_type(struct btree *b) +static inline enum btree_node_type btree_node_type(struct btree *b) { - return b->level ? BKEY_TYPE_BTREE : b->btree_id; + return __btree_node_type(b->level, b->btree_id); } -static inline const struct bkey_ops *btree_node_ops(struct btree *b) +static inline bool btree_node_type_is_extents(enum btree_node_type type) { - return &bch2_bkey_ops[btree_node_type(b)]; + return type == BKEY_TYPE_EXTENTS; } static inline bool btree_node_is_extents(struct btree *b) { - return btree_node_type(b) == BKEY_TYPE_EXTENTS; + return btree_node_type_is_extents(btree_node_type(b)); +} + +static inline bool btree_node_type_needs_gc(enum btree_node_type type) +{ + switch (type) { + case BKEY_TYPE_BTREE: + case BKEY_TYPE_EXTENTS: + case BKEY_TYPE_EC: + return true; + default: + return false; + } } struct btree_root { diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index f6b0082235af..d1647f6eb476 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -120,7 +120,7 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id, int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *, __le64, unsigned); int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *, - struct btree *, struct bkey_i_extent *); + struct btree *, struct bkey_i_btree_ptr *); /* new transactional interface: */ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 7d7a021416f3..22f087098776 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -132,13 +132,15 @@ bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b, /* Btree node freeing/allocation: */ static bool btree_key_matches(struct bch_fs *c, - struct bkey_s_c_extent l, - struct bkey_s_c_extent r) + struct bkey_s_c l, + struct bkey_s_c r) { + struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(l); + struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(r); const struct bch_extent_ptr *ptr1, *ptr2; - extent_for_each_ptr(l, ptr1) - extent_for_each_ptr(r, ptr2) + bkey_for_each_ptr(ptrs1, ptr1) + bkey_for_each_ptr(ptrs2, ptr2) if (ptr1->dev == ptr2->dev && ptr1->gen == ptr2->gen && ptr1->offset == ptr2->offset) @@ -164,8 +166,7 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b, for (d = as->pending; d < as->pending + as->nr_pending; d++) if (!bkey_cmp(k.k->p, d->key.k.p) && - btree_key_matches(c, bkey_s_c_to_extent(k), - bkey_i_to_s_c_extent(&d->key))) + btree_key_matches(c, k, bkey_i_to_s_c(&d->key))) goto found; BUG(); found: @@ -197,7 +198,7 @@ found: ? gc_pos_btree_node(b) : gc_pos_btree_root(as->btree_id)) >= 0 && gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) - bch2_mark_key_locked(c, BKEY_TYPE_BTREE, + bch2_mark_key_locked(c, bkey_i_to_s_c(&d->key), false, 0, pos, NULL, 0, BCH_BUCKET_MARK_GC); @@ -270,8 +271,7 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c, { BUG_ON(!pending->index_update_done); - bch2_mark_key(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&pending->key), + bch2_mark_key(c, bkey_i_to_s_c(&pending->key), false, 0, gc_phase(GC_PHASE_PENDING_DELETE), NULL, 0, 0); @@ -285,7 +285,6 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, struct write_point *wp; struct btree *b; BKEY_PADDED(k) tmp; - struct bkey_i_extent *e; struct open_buckets ob = { .nr = 0 }; struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; unsigned nr_reserve; @@ -336,8 +335,8 @@ retry: goto retry; } - e = bkey_extent_init(&tmp.k); - bch2_alloc_sectors_append_ptrs(c, wp, e, c->opts.btree_node_size); + bkey_btree_ptr_init(&tmp.k); + bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size); bch2_open_bucket_get(c, wp, &ob); bch2_alloc_sectors_done(c, wp); @@ -375,7 +374,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev b->data->flags = 0; SET_BTREE_NODE_ID(b->data, as->btree_id); SET_BTREE_NODE_LEVEL(b->data, level); - b->data->ptr = bkey_i_to_extent(&b->key)->v.start->ptr; + b->data->ptr = bkey_i_to_btree_ptr(&b->key)->v.start[0]; bch2_btree_build_aux_trees(b); @@ -528,8 +527,7 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c, goto err_free; } - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key)); + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); if (ret) goto err_free; @@ -1072,8 +1070,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) mutex_lock(&c->btree_interior_update_lock); percpu_down_read(&c->usage_lock); - bch2_mark_key_locked(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key), + bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key), true, 0, gc_pos_btree_root(b->btree_id), &stats, 0, 0); @@ -1166,11 +1163,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b mutex_lock(&c->btree_interior_update_lock); percpu_down_read(&c->usage_lock); - if (bkey_extent_is_data(&insert->k)) - bch2_mark_key_locked(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(insert), - true, 0, - gc_pos_btree_node(b), &stats, 0, 0); + bch2_mark_key_locked(c, bkey_i_to_s_c(insert), + true, 0, + gc_pos_btree_node(b), &stats, 0, 0); while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) && bkey_iter_pos_cmp(b, &insert->k.p, k) > 0) @@ -1893,7 +1888,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, struct btree_update *as, struct btree_iter *iter, struct btree *b, struct btree *new_hash, - struct bkey_i_extent *new_key) + struct bkey_i_btree_ptr *new_key) { struct btree *parent; int ret; @@ -1938,7 +1933,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, */ ret = bch2_disk_reservation_add(c, &as->reserve->disk_res, c->opts.btree_node_size * - bch2_extent_nr_ptrs(extent_i_to_s_c(new_key)), + bch2_bkey_nr_ptrs(bkey_i_to_s_c(&new_key->k_i)), BCH_DISK_RESERVATION_NOFAIL| BCH_DISK_RESERVATION_GC_LOCK_HELD); BUG_ON(ret); @@ -1978,8 +1973,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, mutex_lock(&c->btree_interior_update_lock); percpu_down_read(&c->usage_lock); - bch2_mark_key_locked(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&new_key->k_i), + bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i), true, 0, gc_pos_btree_root(b->btree_id), &stats, 0, 0); @@ -2012,7 +2006,8 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, } int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, - struct btree *b, struct bkey_i_extent *new_key) + struct btree *b, + struct bkey_i_btree_ptr *new_key) { struct btree *parent = btree_node_parent(iter, b); struct btree_update *as = NULL; @@ -2078,8 +2073,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, goto err; } - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE, - extent_i_to_s_c(new_key).s_c); + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&new_key->k_i)); if (ret) goto err_free_update; @@ -2137,9 +2131,9 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) b->level = 0; b->btree_id = id; - bkey_extent_init(&b->key); + bkey_btree_ptr_init(&b->key); b->key.k.p = POS_MAX; - bkey_i_to_extent(&b->key)->v._data[0] = U64_MAX - id; + PTR_HASH(&b->key) = U64_MAX - id; bch2_bset_init_first(b, &b->data->keys); bch2_btree_build_aux_trees(b); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 4b0d674472db..fd27334cf2a4 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -71,7 +71,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, goto overwrite; } - k->type = KEY_TYPE_DELETED; + k->type = KEY_TYPE_deleted; bch2_btree_node_iter_fix(iter, b, node_iter, k, k->u64s, k->u64s); bch2_btree_iter_verify(iter, b); @@ -312,7 +312,6 @@ btree_key_can_insert(struct btree_insert *trans, return BTREE_INSERT_BTREE_NODE_FULL; if (!bch2_bkey_replicas_marked(c, - insert->iter->btree_id, bkey_i_to_s_c(insert->k), true)) return BTREE_INSERT_NEED_MARK_REPLICAS; @@ -449,8 +448,8 @@ static inline void btree_insert_entry_checks(struct bch_fs *c, BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); BUG_ON(debug_check_bkeys(c) && !bkey_deleted(&i->k->k) && - bch2_bkey_invalid(c, (enum bkey_type) i->iter->btree_id, - bkey_i_to_s_c(i->k))); + bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), + i->iter->btree_id)); } /** @@ -585,8 +584,7 @@ err: } bch2_btree_iter_unlock(trans->entries[0].iter); - ret = bch2_mark_bkey_replicas(c, i->iter->btree_id, - bkey_i_to_s_c(i->k)) + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k)) ?: -EINTR; break; default: diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 3f4bbf280a78..d08e95020cef 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -567,7 +567,7 @@ static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors) crc.uncompressed_size)); } -static s64 ptr_disk_sectors(struct bkey_s_c_extent e, +static s64 ptr_disk_sectors(const struct bkey *k, struct extent_ptr_decoded p, s64 sectors) { @@ -579,8 +579,8 @@ static s64 ptr_disk_sectors(struct bkey_s_c_extent e, old_sectors = 0; new_sectors = sectors; } else { - old_sectors = e.k->size; - new_sectors = e.k->size + sectors; + old_sectors = k->size; + new_sectors = k->size + sectors; } sectors = -__disk_sectors(p.crc, old_sectors) @@ -596,7 +596,6 @@ static s64 ptr_disk_sectors(struct bkey_s_c_extent e, * that with the gc pos seqlock held. */ static void bch2_mark_pointer(struct bch_fs *c, - struct bkey_s_c_extent e, struct extent_ptr_decoded p, s64 sectors, enum bch_data_type data_type, struct bch_fs_usage *fs_usage, @@ -709,70 +708,54 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, u64 journal_seq, unsigned flags, bool gc) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + s64 cached_sectors = 0; + s64 dirty_sectors = 0; + s64 ec_sectors = 0; + unsigned replicas = 0; + unsigned ec_redundancy = 0; + unsigned i; + int ret; + BUG_ON(!sectors); - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - s64 cached_sectors = 0; - s64 dirty_sectors = 0; - s64 ec_sectors = 0; - unsigned replicas = 0; - unsigned ec_redundancy = 0; - unsigned i; - int ret; - - extent_for_each_ptr_decode(e, p, entry) { - s64 disk_sectors = ptr_disk_sectors(e, p, sectors); - s64 adjusted_disk_sectors = disk_sectors; - - bch2_mark_pointer(c, e, p, disk_sectors, data_type, - stats, journal_seq, flags, gc); - - if (!p.ptr.cached) - for (i = 0; i < p.ec_nr; i++) { - ret = bch2_mark_stripe_ptr(c, p.ec[i], - disk_sectors, flags, - &adjusted_disk_sectors, - &ec_redundancy, gc); - if (ret) - return ret; - } - if (!p.ptr.cached) - replicas++; - - if (p.ptr.cached) - cached_sectors += adjusted_disk_sectors; - else if (!p.ec_nr) - dirty_sectors += adjusted_disk_sectors; - else - ec_sectors += adjusted_disk_sectors; - } + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + s64 disk_sectors = ptr_disk_sectors(k.k, p, sectors); + s64 adjusted_disk_sectors = disk_sectors; - replicas = clamp_t(unsigned, replicas, - 1, ARRAY_SIZE(stats->replicas)); - ec_redundancy = clamp_t(unsigned, ec_redundancy, - 1, ARRAY_SIZE(stats->replicas)); + bch2_mark_pointer(c, p, disk_sectors, data_type, + stats, journal_seq, flags, gc); - stats->replicas[0].data[BCH_DATA_CACHED] += cached_sectors; - stats->replicas[replicas - 1].data[data_type] += dirty_sectors; - stats->replicas[ec_redundancy - 1].ec_data += ec_sectors; - break; + if (!p.ptr.cached) + for (i = 0; i < p.ec_nr; i++) { + ret = bch2_mark_stripe_ptr(c, p.ec[i], + disk_sectors, flags, + &adjusted_disk_sectors, + &ec_redundancy, gc); + if (ret) + return ret; + } + if (!p.ptr.cached) + replicas++; + + if (p.ptr.cached) + cached_sectors += adjusted_disk_sectors; + else if (!p.ec_nr) + dirty_sectors += adjusted_disk_sectors; + else + ec_sectors += adjusted_disk_sectors; } - case BCH_RESERVATION: { - unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; - sectors *= replicas; - replicas = clamp_t(unsigned, replicas, - 1, ARRAY_SIZE(stats->replicas)); + replicas = clamp_t(unsigned, replicas, + 1, ARRAY_SIZE(stats->replicas)); + ec_redundancy = clamp_t(unsigned, ec_redundancy, + 1, ARRAY_SIZE(stats->replicas)); - stats->replicas[replicas - 1].persistent_reserved += sectors; - break; - } - } + stats->replicas[0].data[BCH_DATA_CACHED] += cached_sectors; + stats->replicas[replicas - 1].data[data_type] += dirty_sectors; + stats->replicas[ec_redundancy - 1].ec_data += ec_sectors; return 0; } @@ -813,56 +796,49 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k, u64 journal_seq, unsigned flags, bool gc) { - switch (k.k->type) { - case BCH_STRIPE: { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - size_t idx = s.k->p.offset; - struct stripe *m = genradix_ptr(&c->stripes[gc], idx); - unsigned i; - - if (!m || (!inserting && !m->alive)) { - bch_err_ratelimited(c, "error marking nonexistent stripe %zu", - idx); - return -1; - } - - if (inserting && m->alive) { - bch_err_ratelimited(c, "error marking stripe %zu: already exists", - idx); - return -1; - } + struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); + size_t idx = s.k->p.offset; + struct stripe *m = genradix_ptr(&c->stripes[gc], idx); + unsigned i; - BUG_ON(atomic_read(&m->blocks_nonempty)); + if (!m || (!inserting && !m->alive)) { + bch_err_ratelimited(c, "error marking nonexistent stripe %zu", + idx); + return -1; + } - for (i = 0; i < EC_STRIPE_MAX; i++) - BUG_ON(atomic_read(&m->block_sectors[i])); + if (inserting && m->alive) { + bch_err_ratelimited(c, "error marking stripe %zu: already exists", + idx); + return -1; + } - if (inserting) { - m->sectors = le16_to_cpu(s.v->sectors); - m->algorithm = s.v->algorithm; - m->nr_blocks = s.v->nr_blocks; - m->nr_redundant = s.v->nr_redundant; - } + BUG_ON(atomic_read(&m->blocks_nonempty)); - if (!gc) { - if (inserting) - bch2_stripes_heap_insert(c, m, idx); - else - bch2_stripes_heap_del(c, m, idx); - } else { - m->alive = inserting; - } + for (i = 0; i < EC_STRIPE_MAX; i++) + BUG_ON(atomic_read(&m->block_sectors[i])); - bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc); - break; + if (inserting) { + m->sectors = le16_to_cpu(s.v->sectors); + m->algorithm = s.v->algorithm; + m->nr_blocks = s.v->nr_blocks; + m->nr_redundant = s.v->nr_redundant; } + + if (!gc) { + if (inserting) + bch2_stripes_heap_insert(c, m, idx); + else + bch2_stripes_heap_del(c, m, idx); + } else { + m->alive = inserting; } + bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc); return 0; } -static int __bch2_mark_key(struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k, +static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, bool inserting, s64 sectors, struct bch_fs_usage *stats, u64 journal_seq, unsigned flags, @@ -870,22 +846,32 @@ static int __bch2_mark_key(struct bch_fs *c, { int ret = 0; - switch (type) { - case BKEY_TYPE_BTREE: + switch (k.k->type) { + case KEY_TYPE_btree_ptr: ret = bch2_mark_extent(c, k, inserting ? c->opts.btree_node_size : -c->opts.btree_node_size, BCH_DATA_BTREE, stats, journal_seq, flags, gc); break; - case BKEY_TYPE_EXTENTS: + case KEY_TYPE_extent: ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER, stats, journal_seq, flags, gc); break; - case BKEY_TYPE_EC: + case KEY_TYPE_stripe: ret = bch2_mark_stripe(c, k, inserting, stats, journal_seq, flags, gc); break; + case KEY_TYPE_reservation: { + unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; + + sectors *= replicas; + replicas = clamp_t(unsigned, replicas, + 1, ARRAY_SIZE(stats->replicas)); + + stats->replicas[replicas - 1].persistent_reserved += sectors; + break; + } default: break; } @@ -894,7 +880,7 @@ static int __bch2_mark_key(struct bch_fs *c, } int bch2_mark_key_locked(struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k, + struct bkey_s_c k, bool inserting, s64 sectors, struct gc_pos pos, struct bch_fs_usage *stats, @@ -906,7 +892,7 @@ int bch2_mark_key_locked(struct bch_fs *c, if (!stats) stats = this_cpu_ptr(c->usage[0]); - ret = __bch2_mark_key(c, type, k, inserting, sectors, + ret = __bch2_mark_key(c, k, inserting, sectors, stats, journal_seq, flags, false); if (ret) return ret; @@ -914,7 +900,7 @@ int bch2_mark_key_locked(struct bch_fs *c, if ((flags & BCH_BUCKET_MARK_GC) || gc_visited(c, pos)) { - ret = __bch2_mark_key(c, type, k, inserting, sectors, + ret = __bch2_mark_key(c, k, inserting, sectors, this_cpu_ptr(c->usage[1]), journal_seq, flags, true); if (ret) @@ -924,8 +910,7 @@ int bch2_mark_key_locked(struct bch_fs *c, return 0; } -int bch2_mark_key(struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k, +int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, bool inserting, s64 sectors, struct gc_pos pos, struct bch_fs_usage *stats, @@ -934,7 +919,7 @@ int bch2_mark_key(struct bch_fs *c, int ret; percpu_down_read(&c->usage_lock); - ret = bch2_mark_key_locked(c, type, k, inserting, sectors, + ret = bch2_mark_key_locked(c, k, inserting, sectors, pos, stats, journal_seq, flags); percpu_up_read(&c->usage_lock); @@ -952,20 +937,19 @@ void bch2_mark_update(struct btree_insert *trans, struct gc_pos pos = gc_pos_btree_node(b); struct bkey_packed *_k; - if (!bkey_type_needs_gc(iter->btree_id)) + if (!btree_node_type_needs_gc(iter->btree_id)) return; percpu_down_read(&c->usage_lock); if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) - bch2_mark_key_locked(c, btree_node_type(b), - bkey_i_to_s_c(insert->k), true, + bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true, bpos_min(insert->k->k.p, b->key.k.p).offset - bkey_start_offset(&insert->k->k), pos, &stats, trans->journal_res.seq, 0); while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b, - KEY_TYPE_DISCARD))) { + KEY_TYPE_discard))) { struct bkey unpacked; struct bkey_s_c k; s64 sectors = 0; @@ -994,9 +978,8 @@ void bch2_mark_update(struct btree_insert *trans, sectors = k.k->p.offset - insert->k->k.p.offset; BUG_ON(sectors <= 0); - bch2_mark_key_locked(c, btree_node_type(b), - k, true, sectors, pos, &stats, - trans->journal_res.seq, 0); + bch2_mark_key_locked(c, k, true, sectors, + pos, &stats, trans->journal_res.seq, 0); sectors = bkey_start_offset(&insert->k->k) - k.k->p.offset; @@ -1006,9 +989,8 @@ void bch2_mark_update(struct btree_insert *trans, BUG_ON(sectors >= 0); } - bch2_mark_key_locked(c, btree_node_type(b), - k, false, sectors, pos, &stats, - trans->journal_res.seq, 0); + bch2_mark_key_locked(c, k, false, sectors, + pos, &stats, trans->journal_res.seq, 0); bch2_btree_node_iter_advance(&node_iter, b); } diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 884041b53eb9..c584ad1b4375 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -220,10 +220,10 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, #define BCH_BUCKET_MARK_NOATOMIC (1 << 0) #define BCH_BUCKET_MARK_GC (1 << 1) -int bch2_mark_key_locked(struct bch_fs *, enum bkey_type, struct bkey_s_c, +int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c, bool, s64, struct gc_pos, struct bch_fs_usage *, u64, unsigned); -int bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c, +int bch2_mark_key(struct bch_fs *, struct bkey_s_c, bool, s64, struct gc_pos, struct bch_fs_usage *, u64, unsigned); void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *); diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 0a9efe57d5a9..f15c29878a9e 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -56,7 +56,8 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) v->btree_id = b->btree_id; bch2_btree_keys_init(v, &c->expensive_debug_checks); - if (bch2_btree_pick_ptr(c, b, NULL, &pick) <= 0) + if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), + NULL, &pick) <= 0) return; ca = bch_dev_bkey_exists(c, pick.ptr.dev); @@ -223,8 +224,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, k = bch2_btree_iter_peek(&iter); while (k.k && !(err = btree_iter_err(k))) { - bch2_bkey_val_to_text(&PBUF(i->buf), i->c, - bkey_type(0, i->id), k); + bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k); i->bytes = strlen(i->buf); BUG_ON(i->bytes >= PAGE_SIZE); i->buf[i->bytes] = '\n'; diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index c1a611b4d9ec..80d37c568272 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -65,8 +65,7 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) const struct bch_hash_desc bch2_dirent_hash_desc = { .btree_id = BTREE_ID_DIRENTS, - .key_type = BCH_DIRENT, - .whiteout_type = BCH_DIRENT_WHITEOUT, + .key_type = KEY_TYPE_dirent, .hash_key = dirent_hash_key, .hash_bkey = dirent_hash_bkey, .cmp_key = dirent_cmp_key, @@ -75,58 +74,37 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_dirent d; + struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); unsigned len; - switch (k.k->type) { - case BCH_DIRENT: - if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent)) - return "value too small"; - - d = bkey_s_c_to_dirent(k); - len = bch2_dirent_name_bytes(d); - - if (!len) - return "empty name"; + if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent)) + return "value too small"; - /* - * older versions of bcachefs were buggy and creating dirent - * keys that were bigger than necessary: - */ - if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7)) - return "value too big"; + len = bch2_dirent_name_bytes(d); + if (!len) + return "empty name"; - if (len > BCH_NAME_MAX) - return "dirent name too big"; + /* + * older versions of bcachefs were buggy and creating dirent + * keys that were bigger than necessary: + */ + if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7)) + return "value too big"; - return NULL; - case BCH_DIRENT_WHITEOUT: - return bkey_val_bytes(k.k) != 0 - ? "value size should be zero" - : NULL; + if (len > BCH_NAME_MAX) + return "dirent name too big"; - default: - return "invalid type"; - } + return NULL; } void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_dirent d; - - switch (k.k->type) { - case BCH_DIRENT: - d = bkey_s_c_to_dirent(k); - - bch_scnmemcpy(out, d.v->d_name, - bch2_dirent_name_bytes(d)); - pr_buf(out, " -> %llu", d.v->d_inum); - break; - case BCH_DIRENT_WHITEOUT: - pr_buf(out, "whiteout"); - break; - } + struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); + + bch_scnmemcpy(out, d.v->d_name, + bch2_dirent_name_bytes(d)); + pr_buf(out, " -> %llu", d.v->d_inum); } static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, @@ -287,7 +265,7 @@ int bch2_dirent_rename(struct btree_trans *trans, * overwrite old_dst - just make sure to use a * whiteout when deleting src: */ - new_src->k.type = BCH_DIRENT_WHITEOUT; + new_src->k.type = KEY_TYPE_whiteout; } } else { /* Check if we need a whiteout to delete src: */ @@ -298,7 +276,7 @@ int bch2_dirent_rename(struct btree_trans *trans, return ret; if (ret) - new_src->k.type = BCH_DIRENT_WHITEOUT; + new_src->k.type = KEY_TYPE_whiteout; } } @@ -361,7 +339,7 @@ int bch2_empty_dir(struct bch_fs *c, u64 dir_inum) if (k.k->p.inode > dir_inum) break; - if (k.k->type == BCH_DIRENT) { + if (k.k->type == KEY_TYPE_dirent) { ret = -ENOTEMPTY; break; } @@ -385,7 +363,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file, for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(inode->v.i_ino, ctx->pos), 0, k) { - if (k.k->type != BCH_DIRENT) + if (k.k->type != KEY_TYPE_dirent) continue; dirent = bkey_s_c_to_dirent(k); diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 2afb0baed11a..7b47573dcc46 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -9,7 +9,7 @@ extern const struct bch_hash_desc bch2_dirent_hash_desc; const char *bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_dirent_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_dirent (struct bkey_ops) { \ .key_invalid = bch2_dirent_invalid, \ .val_to_text = bch2_dirent_to_text, \ } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 091a1f0a0432..010b9b90f2fc 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -123,49 +123,39 @@ static void *stripe_csum(struct bch_stripe *s, unsigned dev, unsigned csum_idx) return csums + (dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes; } -const char *bch2_ec_key_invalid(const struct bch_fs *c, struct bkey_s_c k) +const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k) { + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + if (k.k->p.inode) return "invalid stripe key"; - switch (k.k->type) { - case BCH_STRIPE: { - const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; - - if (bkey_val_bytes(k.k) < sizeof(*s)) - return "incorrect value size"; + if (bkey_val_bytes(k.k) < sizeof(*s)) + return "incorrect value size"; - if (bkey_val_u64s(k.k) != stripe_val_u64s(s)) - return "incorrect value size"; + if (bkey_val_u64s(k.k) != stripe_val_u64s(s)) + return "incorrect value size"; - return NULL; - } - default: - return "invalid type"; - } + return NULL; } -void bch2_ec_key_to_text(struct printbuf *out, struct bch_fs *c, +void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - switch (k.k->type) { - case BCH_STRIPE: { - const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; - unsigned i; - - pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u", - s->algorithm, - le16_to_cpu(s->sectors), - s->nr_blocks - s->nr_redundant, - s->nr_redundant, - s->csum_type, - 1U << s->csum_granularity_bits); - - for (i = 0; i < s->nr_blocks; i++) - pr_buf(out, " %u:%llu", s->ptrs[i].dev, - (u64) s->ptrs[i].offset); - } - } + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + unsigned i; + + pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u", + s->algorithm, + le16_to_cpu(s->sectors), + s->nr_blocks - s->nr_redundant, + s->nr_redundant, + s->csum_type, + 1U << s->csum_granularity_bits); + + for (i = 0; i < s->nr_blocks; i++) + pr_buf(out, " %u:%llu", s->ptrs[i].dev, + (u64) s->ptrs[i].offset); } static int ptr_matches_stripe(struct bch_fs *c, @@ -454,7 +444,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) POS(0, stripe_idx), BTREE_ITER_SLOTS); k = bch2_btree_iter_peek_slot(&iter); - if (btree_iter_err(k) || k.k->type != BCH_STRIPE) { + if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe) { __bcache_io_error(c, "error doing reconstruct read: stripe not found"); kfree(buf); @@ -695,7 +685,7 @@ static void ec_stripe_delete(struct bch_fs *c, size_t idx) POS(0, idx), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); k = bch2_btree_iter_peek_slot(&iter); - if (btree_iter_err(k) || k.k->type != BCH_STRIPE) + if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe) goto out; v = kmalloc(bkey_val_bytes(k.k), GFP_KERNEL); diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index c35de8b1ef64..4a8cade37c7a 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -5,13 +5,13 @@ #include "ec_types.h" #include "keylist_types.h" -const char *bch2_ec_key_invalid(const struct bch_fs *, struct bkey_s_c); -void bch2_ec_key_to_text(struct printbuf *, struct bch_fs *, +const char *bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c); +void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_ec_ops (struct bkey_ops) { \ - .key_invalid = bch2_ec_key_invalid, \ - .val_to_text = bch2_ec_key_to_text, \ +#define bch2_bkey_ops_stripe (struct bkey_ops) { \ + .key_invalid = bch2_stripe_invalid, \ + .val_to_text = bch2_stripe_to_text, \ } struct bch_read_bio; diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 582499b08f31..c9a6f6e4a165 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -27,84 +27,34 @@ #include "util.h" #include "xattr.h" -/* Common among btree and extent ptrs */ - -const struct bch_extent_ptr * -bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev) -{ - const struct bch_extent_ptr *ptr; - - extent_for_each_ptr(e, ptr) - if (ptr->dev == dev) - return ptr; - - return NULL; -} - -void bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev) -{ - struct bch_extent_ptr *ptr; - - bch2_extent_drop_ptrs(e, ptr, ptr->dev == dev); -} - -const struct bch_extent_ptr * -bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group) -{ - const struct bch_extent_ptr *ptr; - - extent_for_each_ptr(e, ptr) { - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - - if (ca->mi.group && - ca->mi.group - 1 == group) - return ptr; - } - - return NULL; -} - -const struct bch_extent_ptr * -bch2_extent_has_target(struct bch_fs *c, struct bkey_s_c_extent e, unsigned target) -{ - const struct bch_extent_ptr *ptr; - - extent_for_each_ptr(e, ptr) - if (bch2_dev_in_target(c, ptr->dev, target) && - (!ptr->cached || - !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr))) - return ptr; - - return NULL; -} - -unsigned bch2_extent_nr_ptrs(struct bkey_s_c_extent e) +unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k) { + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; unsigned nr_ptrs = 0; - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(p, ptr) nr_ptrs++; return nr_ptrs; } -unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c k) +unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c k) { - struct bkey_s_c_extent e; - const struct bch_extent_ptr *ptr; unsigned nr_ptrs = 0; switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - e = bkey_s_c_to_extent(k); + case KEY_TYPE_btree_ptr: + case KEY_TYPE_extent: { + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(p, ptr) nr_ptrs += !ptr->cached; + BUG_ON(!nr_ptrs); break; - - case BCH_RESERVATION: + } + case KEY_TYPE_reservation: nr_ptrs = bkey_s_c_to_reservation(k).v->nr_replicas; break; } @@ -139,25 +89,216 @@ static unsigned bch2_extent_ptr_durability(struct bch_fs *c, return durability; } -unsigned bch2_extent_durability(struct bch_fs *c, struct bkey_s_c_extent e) +unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; unsigned durability = 0; - extent_for_each_ptr_decode(e, p, entry) + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) durability += bch2_extent_ptr_durability(c, p); return durability; } +static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f, + unsigned dev) +{ + struct bch_dev_io_failures *i; + + for (i = f->devs; i < f->devs + f->nr; i++) + if (i->dev == dev) + return i; + + return NULL; +} + +void bch2_mark_io_failure(struct bch_io_failures *failed, + struct extent_ptr_decoded *p) +{ + struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev); + + if (!f) { + BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs)); + + f = &failed->devs[failed->nr++]; + f->dev = p->ptr.dev; + f->idx = p->idx; + f->nr_failed = 1; + f->nr_retries = 0; + } else if (p->idx != f->idx) { + f->idx = p->idx; + f->nr_failed = 1; + f->nr_retries = 0; + } else { + f->nr_failed++; + } +} + +/* + * returns true if p1 is better than p2: + */ +static inline bool ptr_better(struct bch_fs *c, + const struct extent_ptr_decoded p1, + const struct extent_ptr_decoded p2) +{ + if (likely(!p1.idx && !p2.idx)) { + struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev); + struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev); + + u64 l1 = atomic64_read(&dev1->cur_latency[READ]); + u64 l2 = atomic64_read(&dev2->cur_latency[READ]); + + /* Pick at random, biased in favor of the faster device: */ + + return bch2_rand_range(l1 + l2) > l1; + } + + if (force_reconstruct_read(c)) + return p1.idx > p2.idx; + + return p1.idx < p2.idx; +} + +/* + * This picks a non-stale pointer, preferably from a device other than @avoid. + * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to + * other devices, it will still pick a pointer from avoid. + */ +int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, + struct bch_io_failures *failed, + struct extent_ptr_decoded *pick) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + struct bch_dev_io_failures *f; + struct bch_dev *ca; + int ret = 0; + + if (k.k->type == KEY_TYPE_error) + return -EIO; + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + ca = bch_dev_bkey_exists(c, p.ptr.dev); + + /* + * If there are any dirty pointers it's an error if we can't + * read: + */ + if (!ret && !p.ptr.cached) + ret = -EIO; + + if (p.ptr.cached && ptr_stale(ca, &p.ptr)) + continue; + + f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL; + if (f) + p.idx = f->nr_failed < f->nr_retries + ? f->idx + : f->idx + 1; + + if (!p.idx && + !bch2_dev_is_readable(ca)) + p.idx++; + + if (force_reconstruct_read(c) && + !p.idx && p.ec_nr) + p.idx++; + + if (p.idx >= p.ec_nr + 1) + continue; + + if (ret > 0 && !ptr_better(c, p, *pick)) + continue; + + *pick = p; + ret = 1; + } + + return ret; +} + +void bch2_bkey_append_ptr(struct bkey_i *k, + struct bch_extent_ptr ptr) +{ + EBUG_ON(bch2_bkey_has_device(bkey_i_to_s_c(k), ptr.dev)); + + switch (k->k.type) { + case KEY_TYPE_btree_ptr: + case KEY_TYPE_extent: + EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX); + + ptr.type = 1 << BCH_EXTENT_ENTRY_ptr; + + memcpy((void *) &k->v + bkey_val_bytes(&k->k), + &ptr, + sizeof(ptr)); + k->u64s++; + break; + default: + BUG(); + } +} + +void bch2_bkey_drop_device(struct bkey_s k, unsigned dev) +{ + struct bch_extent_ptr *ptr; + + bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev); +} + +/* extent specific utility code */ + +const struct bch_extent_ptr * +bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev) +{ + const struct bch_extent_ptr *ptr; + + extent_for_each_ptr(e, ptr) + if (ptr->dev == dev) + return ptr; + + return NULL; +} + +const struct bch_extent_ptr * +bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group) +{ + const struct bch_extent_ptr *ptr; + + extent_for_each_ptr(e, ptr) { + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + + if (ca->mi.group && + ca->mi.group - 1 == group) + return ptr; + } + + return NULL; +} + +const struct bch_extent_ptr * +bch2_extent_has_target(struct bch_fs *c, struct bkey_s_c_extent e, unsigned target) +{ + const struct bch_extent_ptr *ptr; + + extent_for_each_ptr(e, ptr) + if (bch2_dev_in_target(c, ptr->dev, target) && + (!ptr->cached || + !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr))) + return ptr; + + return NULL; +} + unsigned bch2_extent_is_compressed(struct bkey_s_c k) { unsigned ret = 0; switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { + case KEY_TYPE_extent: { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -189,10 +330,10 @@ bool bch2_extent_matches_ptr(struct bch_fs *c, struct bkey_s_c_extent e, return false; } -static union bch_extent_entry *extent_entry_prev(struct bkey_s_extent e, +static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs, union bch_extent_entry *entry) { - union bch_extent_entry *i = e.v->start; + union bch_extent_entry *i = ptrs.start; if (i == entry) return NULL; @@ -202,23 +343,24 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_s_extent e, return i; } -union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e, - struct bch_extent_ptr *ptr) +union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k, + struct bch_extent_ptr *ptr) { + struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); union bch_extent_entry *dst, *src, *prev; bool drop_crc = true; - EBUG_ON(ptr < &e.v->start->ptr || - ptr >= &extent_entry_last(e)->ptr); + EBUG_ON(ptr < &ptrs.start->ptr || + ptr >= &ptrs.end->ptr); EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr); src = extent_entry_next(to_entry(ptr)); - if (src != extent_entry_last(e) && + if (src != ptrs.end && !extent_entry_is_crc(src)) drop_crc = false; dst = to_entry(ptr); - while ((prev = extent_entry_prev(e, dst))) { + while ((prev = extent_entry_prev(ptrs, dst))) { if (extent_entry_is_ptr(prev)) break; @@ -232,8 +374,8 @@ union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e, } memmove_u64s_down(dst, src, - (u64 *) extent_entry_last(e) - (u64 *) src); - e.k->u64s -= (u64 *) src - (u64 *) dst; + (u64 *) ptrs.end - (u64 *) src); + k.k->u64s -= (u64 *) src - (u64 *) dst; return dst; } @@ -300,7 +442,7 @@ found: restart_narrow_pointers: extent_for_each_ptr_decode(extent_i_to_s(e), p, i) if (can_narrow_crc(p.crc, n)) { - bch2_extent_drop_ptr(extent_i_to_s(e), &i->ptr); + bch2_bkey_drop_ptr(extent_i_to_s(e).s, &i->ptr); p.ptr.offset += p.crc.offset; p.crc = n; bch2_extent_ptr_decoded_append(e, &p); @@ -325,302 +467,165 @@ static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l, bch2_crc_cmp(l.csum, r.csum)); } -static void bch2_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e) -{ - struct bch_extent_ptr *ptr; - - bch2_extent_drop_ptrs(e, ptr, - ptr->cached && - ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)); -} - -bool bch2_ptr_normalize(struct bch_fs *c, struct btree *b, struct bkey_s k) -{ - return bch2_extent_normalize(c, k); -} - void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k) { - switch (k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - union bch_extent_entry *entry; - u64 *d = (u64 *) bkeyp_val(f, k); - unsigned i; - - for (i = 0; i < bkeyp_val_u64s(f, k); i++) - d[i] = swab64(d[i]); - - for (entry = (union bch_extent_entry *) d; - entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k)); - entry = extent_entry_next(entry)) { - switch (extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: - break; - case BCH_EXTENT_ENTRY_crc32: - entry->crc32.csum = swab32(entry->crc32.csum); - break; - case BCH_EXTENT_ENTRY_crc64: - entry->crc64.csum_hi = swab16(entry->crc64.csum_hi); - entry->crc64.csum_lo = swab64(entry->crc64.csum_lo); - break; - case BCH_EXTENT_ENTRY_crc128: - entry->crc128.csum.hi = (__force __le64) - swab64((__force u64) entry->crc128.csum.hi); - entry->crc128.csum.lo = (__force __le64) - swab64((__force u64) entry->crc128.csum.lo); - break; - case BCH_EXTENT_ENTRY_stripe_ptr: - break; - } - } - break; - } - } -} - -static const char *extent_ptr_invalid(const struct bch_fs *c, - struct bkey_s_c_extent e, - const struct bch_extent_ptr *ptr, - unsigned size_ondisk, - bool metadata) -{ - const struct bch_extent_ptr *ptr2; - struct bch_dev *ca; - - if (ptr->dev >= c->sb.nr_devices || - !c->devs[ptr->dev]) - return "pointer to invalid device"; - - ca = bch_dev_bkey_exists(c, ptr->dev); - if (!ca) - return "pointer to invalid device"; - - extent_for_each_ptr(e, ptr2) - if (ptr != ptr2 && ptr->dev == ptr2->dev) - return "multiple pointers to same device"; - - if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets)) - return "offset past end of device"; - - if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket)) - return "offset before first bucket"; - - if (bucket_remainder(ca, ptr->offset) + - size_ondisk > ca->mi.bucket_size) - return "spans multiple buckets"; - - return NULL; -} - -static void extent_print_ptrs(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c_extent e) -{ - const union bch_extent_entry *entry; - struct bch_extent_crc_unpacked crc; - const struct bch_extent_ptr *ptr; - const struct bch_extent_stripe_ptr *ec; - struct bch_dev *ca; - bool first = true; + union bch_extent_entry *entry; + u64 *d = (u64 *) bkeyp_val(f, k); + unsigned i; - extent_for_each_entry(e, entry) { - if (!first) - pr_buf(out, " "); + for (i = 0; i < bkeyp_val_u64s(f, k); i++) + d[i] = swab64(d[i]); - switch (__extent_entry_type(entry)) { + for (entry = (union bch_extent_entry *) d; + entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k)); + entry = extent_entry_next(entry)) { + switch (extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: - ptr = entry_to_ptr(entry); - ca = ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] - ? bch_dev_bkey_exists(c, ptr->dev) - : NULL; - - pr_buf(out, "ptr: %u:%llu gen %u%s%s", ptr->dev, - (u64) ptr->offset, ptr->gen, - ptr->cached ? " cached" : "", - ca && ptr_stale(ca, ptr) - ? " stale" : ""); break; case BCH_EXTENT_ENTRY_crc32: + entry->crc32.csum = swab32(entry->crc32.csum); + break; case BCH_EXTENT_ENTRY_crc64: + entry->crc64.csum_hi = swab16(entry->crc64.csum_hi); + entry->crc64.csum_lo = swab64(entry->crc64.csum_lo); + break; case BCH_EXTENT_ENTRY_crc128: - crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry)); - - pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u", - crc.compressed_size, - crc.uncompressed_size, - crc.offset, crc.nonce, - crc.csum_type, - crc.compression_type); + entry->crc128.csum.hi = (__force __le64) + swab64((__force u64) entry->crc128.csum.hi); + entry->crc128.csum.lo = (__force __le64) + swab64((__force u64) entry->crc128.csum.lo); break; case BCH_EXTENT_ENTRY_stripe_ptr: - ec = &entry->stripe_ptr; - - pr_buf(out, "ec: idx %llu block %u", - (u64) ec->idx, ec->block); break; - default: - pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); - goto out; } - - first = false; } -out: - if (bkey_extent_is_cached(e.k)) - pr_buf(out, " cached"); } -static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f, - unsigned dev) -{ - struct bch_dev_io_failures *i; - - for (i = f->devs; i < f->devs + f->nr; i++) - if (i->dev == dev) - return i; - - return NULL; -} - -void bch2_mark_io_failure(struct bch_io_failures *failed, - struct extent_ptr_decoded *p) -{ - struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev); - - if (!f) { - BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs)); - - f = &failed->devs[failed->nr++]; - f->dev = p->ptr.dev; - f->idx = p->idx; - f->nr_failed = 1; - f->nr_retries = 0; - } else if (p->idx != f->idx) { - f->idx = p->idx; - f->nr_failed = 1; - f->nr_retries = 0; - } else { - f->nr_failed++; - } -} - -/* - * returns true if p1 is better than p2: - */ -static inline bool ptr_better(struct bch_fs *c, - const struct extent_ptr_decoded p1, - const struct extent_ptr_decoded p2) +static const char *extent_ptr_invalid(const struct bch_fs *c, + struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + unsigned size_ondisk, + bool metadata) { - if (likely(!p1.idx && !p2.idx)) { - struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev); - struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev); + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr2; + struct bch_dev *ca; - u64 l1 = atomic64_read(&dev1->cur_latency[READ]); - u64 l2 = atomic64_read(&dev2->cur_latency[READ]); + if (ptr->dev >= c->sb.nr_devices || + !c->devs[ptr->dev]) + return "pointer to invalid device"; - /* Pick at random, biased in favor of the faster device: */ + ca = bch_dev_bkey_exists(c, ptr->dev); + if (!ca) + return "pointer to invalid device"; - return bch2_rand_range(l1 + l2) > l1; - } + bkey_for_each_ptr(ptrs, ptr2) + if (ptr != ptr2 && ptr->dev == ptr2->dev) + return "multiple pointers to same device"; - if (force_reconstruct_read(c)) - return p1.idx > p2.idx; + if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets)) + return "offset past end of device"; - return p1.idx < p2.idx; + if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket)) + return "offset before first bucket"; + + if (bucket_remainder(ca, ptr->offset) + + size_ondisk > ca->mi.bucket_size) + return "spans multiple buckets"; + + return NULL; } -static int extent_pick_read_device(struct bch_fs *c, - struct bkey_s_c_extent e, - struct bch_io_failures *failed, - struct extent_ptr_decoded *pick) +static void bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - struct bch_dev_io_failures *f; + struct bch_extent_crc_unpacked crc; + const struct bch_extent_ptr *ptr; + const struct bch_extent_stripe_ptr *ec; struct bch_dev *ca; - int ret = 0; - - extent_for_each_ptr_decode(e, p, entry) { - ca = bch_dev_bkey_exists(c, p.ptr.dev); - - if (p.ptr.cached && ptr_stale(ca, &p.ptr)) - continue; + bool first = true; - f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL; - if (f) - p.idx = f->nr_failed < f->nr_retries - ? f->idx - : f->idx + 1; + bkey_extent_entry_for_each(ptrs, entry) { + if (!first) + pr_buf(out, " "); - if (!p.idx && - !bch2_dev_is_readable(ca)) - p.idx++; + switch (__extent_entry_type(entry)) { + case BCH_EXTENT_ENTRY_ptr: + ptr = entry_to_ptr(entry); + ca = ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] + ? bch_dev_bkey_exists(c, ptr->dev) + : NULL; - if (force_reconstruct_read(c) && - !p.idx && p.ec_nr) - p.idx++; + pr_buf(out, "ptr: %u:%llu gen %u%s%s", ptr->dev, + (u64) ptr->offset, ptr->gen, + ptr->cached ? " cached" : "", + ca && ptr_stale(ca, ptr) + ? " stale" : ""); + break; + case BCH_EXTENT_ENTRY_crc32: + case BCH_EXTENT_ENTRY_crc64: + case BCH_EXTENT_ENTRY_crc128: + crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - if (p.idx >= p.ec_nr + 1) - continue; + pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u", + crc.compressed_size, + crc.uncompressed_size, + crc.offset, crc.nonce, + crc.csum_type, + crc.compression_type); + break; + case BCH_EXTENT_ENTRY_stripe_ptr: + ec = &entry->stripe_ptr; - if (ret && !ptr_better(c, p, *pick)) - continue; + pr_buf(out, "ec: idx %llu block %u", + (u64) ec->idx, ec->block); + break; + default: + pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); + return; + } - *pick = p; - ret = 1; + first = false; } - - return ret; } /* Btree ptrs */ const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k) { - if (bkey_extent_is_cached(k.k)) - return "cached"; - - if (k.k->size) - return "nonzero key size"; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + const struct bch_extent_ptr *ptr; + const char *reason; if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) return "value too big"; - switch (k.k->type) { - case BCH_EXTENT: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - const struct bch_extent_ptr *ptr; - const char *reason; - - extent_for_each_entry(e, entry) { - if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) - return "invalid extent entry type"; - - if (!extent_entry_is_ptr(entry)) - return "has non ptr field"; - } - - extent_for_each_ptr(e, ptr) { - reason = extent_ptr_invalid(c, e, ptr, - c->opts.btree_node_size, - true); - if (reason) - return reason; - } + bkey_extent_entry_for_each(ptrs, entry) { + if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) + return "invalid extent entry type"; - return NULL; + if (!extent_entry_is_ptr(entry)) + return "has non ptr field"; } - default: - return "invalid value type"; + bkey_for_each_ptr(ptrs, ptr) { + reason = extent_ptr_invalid(c, k, ptr, + c->opts.btree_node_size, + true); + if (reason) + return reason; } + + return NULL; } void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; unsigned seq; const char *err; @@ -630,7 +635,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, unsigned replicas = 0; bool bad; - extent_for_each_ptr(e, ptr) { + bkey_for_each_ptr(ptrs, ptr) { ca = bch_dev_bkey_exists(c, ptr->dev); replicas++; @@ -656,9 +661,8 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, } if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) && - !bch2_bkey_replicas_marked(c, btree_node_type(b), - e.s_c, false)) { - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k); + !bch2_bkey_replicas_marked(c, k, false)) { + bch2_bkey_val_to_text(&PBUF(buf), c, k); bch2_fs_bug(c, "btree key bad (replicas not marked in superblock):\n%s", buf); @@ -667,7 +671,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, return; err: - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k); + bch2_bkey_val_to_text(&PBUF(buf), c, k); bch2_fs_bug(c, "%s btree pointer %s: bucket %zi gen %i mark %08x", err, buf, PTR_BUCKET_NR(ca, ptr), mark.gen, (unsigned) mark.v.counter); @@ -678,22 +682,13 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, { const char *invalid; - if (bkey_extent_is_data(k.k)) - extent_print_ptrs(out, c, bkey_s_c_to_extent(k)); + bkey_ptrs_to_text(out, c, k); invalid = bch2_btree_ptr_invalid(c, k); if (invalid) pr_buf(out, " invalid: %s", invalid); } -int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b, - struct bch_io_failures *failed, - struct extent_ptr_decoded *pick) -{ - return extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key), - failed, pick); -} - /* Extents */ bool __bch2_cut_front(struct bpos where, struct bkey_s k) @@ -714,7 +709,7 @@ bool __bch2_cut_front(struct bpos where, struct bkey_s k) * cause offset to point to the next bucket: */ if (!len) - k.k->type = KEY_TYPE_DELETED; + k.k->type = KEY_TYPE_deleted; else if (bkey_extent_is_data(k.k)) { struct bkey_s_extent e = bkey_s_to_extent(k); union bch_extent_entry *entry; @@ -766,7 +761,7 @@ bool bch2_cut_back(struct bpos where, struct bkey *k) k->size = len; if (!len) - k->type = KEY_TYPE_DELETED; + k->type = KEY_TYPE_deleted; return true; } @@ -830,13 +825,13 @@ static void verify_extent_nonoverlapping(struct btree *b, struct bkey uk; iter = *_iter; - k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_DISCARD); + k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard); BUG_ON(k && (uk = bkey_unpack_key(b, k), bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0)); iter = *_iter; - k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_DISCARD); + k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_discard); #if 0 BUG_ON(k && (uk = bkey_unpack_key(b, k), @@ -882,13 +877,13 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, verify_extent_nonoverlapping(l->b, &l->iter, insert); node_iter = l->iter; - k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_DISCARD); + k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_discard); if (k && !bkey_written(l->b, k) && bch2_extent_merge_inline(c, iter, k, bkey_to_packed(insert), true)) return; node_iter = l->iter; - k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, KEY_TYPE_DISCARD); + k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, KEY_TYPE_discard); if (k && !bkey_written(l->b, k) && bch2_extent_merge_inline(c, iter, bkey_to_packed(insert), k, false)) return; @@ -912,7 +907,7 @@ static void extent_insert_committed(struct extent_insert_state *s) bkey_copy(&split.k, insert); if (s->deleting) - split.k.k.type = KEY_TYPE_DISCARD; + split.k.k.type = KEY_TYPE_discard; bch2_cut_back(s->committed, &split.k.k); @@ -934,7 +929,7 @@ static void extent_insert_committed(struct extent_insert_state *s) if (s->update_journal) { bkey_copy(&split.k, !s->deleting ? insert : &s->whiteout); if (s->deleting) - split.k.k.type = KEY_TYPE_DISCARD; + split.k.k.type = KEY_TYPE_discard; bch2_cut_back(s->committed, &split.k.k); @@ -985,7 +980,7 @@ bch2_extent_can_insert(struct btree_insert *trans, *u64s += BKEY_U64s; _k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, - KEY_TYPE_DISCARD); + KEY_TYPE_discard); if (!_k) return BTREE_INSERT_OK; @@ -1062,7 +1057,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, btree_account_key_drop(l->b, _k); k.k->size = 0; - k.k->type = KEY_TYPE_DELETED; + k.k->type = KEY_TYPE_deleted; if (_k >= btree_bset_last(l->b)->start) { unsigned u64s = _k->u64s; @@ -1123,7 +1118,7 @@ static void __bch2_insert_fixup_extent(struct extent_insert_state *s) while (bkey_cmp(s->committed, insert->k.p) < 0 && (_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b, - KEY_TYPE_DISCARD))) { + KEY_TYPE_discard))) { struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked); enum bch_extent_overlap overlap = bch2_extent_overlap(&insert->k, k.k); @@ -1155,7 +1150,7 @@ static void __bch2_insert_fixup_extent(struct extent_insert_state *s) !bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) { if (!bkey_whiteout(k.k)) { btree_account_key_drop(l->b, _k); - _k->type = KEY_TYPE_DISCARD; + _k->type = KEY_TYPE_discard; reserve_whiteout(l->b, _k); } break; @@ -1286,88 +1281,66 @@ bch2_insert_fixup_extent(struct btree_insert *trans, const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k) { - if (bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX) - return "value too big"; - - if (!k.k->size) - return "zero key size"; + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + const union bch_extent_entry *entry; + struct bch_extent_crc_unpacked crc; + const struct bch_extent_ptr *ptr; + unsigned size_ondisk = e.k->size; + const char *reason; + unsigned nonce = UINT_MAX; - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - struct bch_extent_crc_unpacked crc; - const struct bch_extent_ptr *ptr; - unsigned size_ondisk = e.k->size; - const char *reason; - unsigned nonce = UINT_MAX; + if (bkey_val_u64s(e.k) > BKEY_EXTENT_VAL_U64s_MAX) + return "value too big"; - extent_for_each_entry(e, entry) { - if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) - return "invalid extent entry type"; + extent_for_each_entry(e, entry) { + if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) + return "invalid extent entry type"; - switch (extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: - ptr = entry_to_ptr(entry); + switch (extent_entry_type(entry)) { + case BCH_EXTENT_ENTRY_ptr: + ptr = entry_to_ptr(entry); - reason = extent_ptr_invalid(c, e, &entry->ptr, - size_ondisk, false); - if (reason) - return reason; - break; - case BCH_EXTENT_ENTRY_crc32: - case BCH_EXTENT_ENTRY_crc64: - case BCH_EXTENT_ENTRY_crc128: - crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry)); + reason = extent_ptr_invalid(c, e.s_c, &entry->ptr, + size_ondisk, false); + if (reason) + return reason; + break; + case BCH_EXTENT_ENTRY_crc32: + case BCH_EXTENT_ENTRY_crc64: + case BCH_EXTENT_ENTRY_crc128: + crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry)); - if (crc.offset + e.k->size > - crc.uncompressed_size) - return "checksum offset + key size > uncompressed size"; + if (crc.offset + e.k->size > + crc.uncompressed_size) + return "checksum offset + key size > uncompressed size"; - size_ondisk = crc.compressed_size; + size_ondisk = crc.compressed_size; - if (!bch2_checksum_type_valid(c, crc.csum_type)) - return "invalid checksum type"; + if (!bch2_checksum_type_valid(c, crc.csum_type)) + return "invalid checksum type"; - if (crc.compression_type >= BCH_COMPRESSION_NR) - return "invalid compression type"; + if (crc.compression_type >= BCH_COMPRESSION_NR) + return "invalid compression type"; - if (bch2_csum_type_is_encryption(crc.csum_type)) { - if (nonce == UINT_MAX) - nonce = crc.offset + crc.nonce; - else if (nonce != crc.offset + crc.nonce) - return "incorrect nonce"; - } - break; - case BCH_EXTENT_ENTRY_stripe_ptr: - break; + if (bch2_csum_type_is_encryption(crc.csum_type)) { + if (nonce == UINT_MAX) + nonce = crc.offset + crc.nonce; + else if (nonce != crc.offset + crc.nonce) + return "incorrect nonce"; } + break; + case BCH_EXTENT_ENTRY_stripe_ptr: + break; } - - return NULL; - } - - case BCH_RESERVATION: { - struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); - - if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation)) - return "incorrect value size"; - - if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) - return "invalid nr_replicas"; - - return NULL; } - default: - return "invalid value type"; - } + return NULL; } -static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, - struct bkey_s_c_extent e) +void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b, + struct bkey_s_c k) { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const struct bch_extent_ptr *ptr; struct bch_dev *ca; struct bucket_mark mark; @@ -1429,8 +1402,7 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, } if (replicas > BCH_REPLICAS_MAX) { - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), - e.s_c); + bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c); bch2_fs_bug(c, "extent key bad (too many replicas: %u): %s", replicas, buf); @@ -1438,10 +1410,8 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, } if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) && - !bch2_bkey_replicas_marked(c, btree_node_type(b), - e.s_c, false)) { - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), - e.s_c); + !bch2_bkey_replicas_marked(c, e.s_c, false)) { + bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c); bch2_fs_bug(c, "extent key bad (replicas not marked in superblock):\n%s", buf); @@ -1451,34 +1421,18 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, return; bad_ptr: - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), - e.s_c); + bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c); bch2_fs_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu " "gen %i type %u", buf, PTR_BUCKET_NR(ca, ptr), mark.gen, mark.data_type); } -void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) -{ - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - bch2_extent_debugcheck_extent(c, b, bkey_s_c_to_extent(k)); - break; - case BCH_RESERVATION: - break; - default: - BUG(); - } -} - void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { const char *invalid; - if (bkey_extent_is_data(k.k)) - extent_print_ptrs(out, c, bkey_s_c_to_extent(k)); + bkey_ptrs_to_text(out, c, k); invalid = bch2_extent_invalid(c, k); if (invalid) @@ -1593,41 +1547,17 @@ found: */ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) { - struct bkey_s_extent e; - - switch (k.k->type) { - case KEY_TYPE_ERROR: - return false; - - case KEY_TYPE_DELETED: - return true; - case KEY_TYPE_DISCARD: - return bversion_zero(k.k->version); - case KEY_TYPE_COOKIE: - return false; - - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - e = bkey_s_to_extent(k); + struct bch_extent_ptr *ptr; - bch2_extent_drop_stale(c, e); + bch2_bkey_drop_ptrs(k, ptr, + ptr->cached && + ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)); - if (!bkey_val_u64s(e.k)) { - if (bkey_extent_is_cached(e.k)) { - k.k->type = KEY_TYPE_DISCARD; - if (bversion_zero(k.k->version)) - return true; - } else { - k.k->type = KEY_TYPE_ERROR; - } - } + /* will only happen if all pointers were cached: */ + if (!bkey_val_u64s(k.k)) + k.k->type = KEY_TYPE_deleted; - return false; - case BCH_RESERVATION: - return false; - default: - BUG(); - } + return false; } void bch2_extent_mark_replicas_cached(struct bch_fs *c, @@ -1637,7 +1567,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c, { union bch_extent_entry *entry; struct extent_ptr_decoded p; - int extra = bch2_extent_durability(c, e.c) - nr_desired_replicas; + int extra = bch2_bkey_durability(c, e.s_c) - nr_desired_replicas; if (target && extra > 0) extent_for_each_ptr_decode(e, p, entry) { @@ -1661,106 +1591,40 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c, } } -/* - * This picks a non-stale pointer, preferably from a device other than @avoid. - * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to - * other devices, it will still pick a pointer from avoid. - */ -int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k, - struct bch_io_failures *failed, - struct extent_ptr_decoded *pick) -{ - int ret; - - switch (k.k->type) { - case KEY_TYPE_ERROR: - return -EIO; - - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - ret = extent_pick_read_device(c, bkey_s_c_to_extent(k), - failed, pick); - - if (!ret && !bkey_extent_is_cached(k.k)) - ret = -EIO; - - return ret; - - default: - return 0; - } -} - -enum merge_result bch2_extent_merge(struct bch_fs *c, struct btree *b, +enum merge_result bch2_extent_merge(struct bch_fs *c, struct bkey_i *l, struct bkey_i *r) { - struct bkey_s_extent el, er; + struct bkey_s_extent el = bkey_i_to_s_extent(l); + struct bkey_s_extent er = bkey_i_to_s_extent(r); union bch_extent_entry *en_l, *en_r; - if (key_merging_disabled(c)) - return BCH_MERGE_NOMERGE; - - /* - * Generic header checks - * Assumes left and right are in order - * Left and right must be exactly aligned - */ - - if (l->k.u64s != r->k.u64s || - l->k.type != r->k.type || - bversion_cmp(l->k.version, r->k.version) || - bkey_cmp(l->k.p, bkey_start_pos(&r->k))) + if (bkey_val_u64s(&l->k) != bkey_val_u64s(&r->k)) return BCH_MERGE_NOMERGE; - switch (l->k.type) { - case KEY_TYPE_DISCARD: - case KEY_TYPE_ERROR: - /* These types are mergeable, and no val to check */ - break; - - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - el = bkey_i_to_s_extent(l); - er = bkey_i_to_s_extent(r); - - extent_for_each_entry(el, en_l) { - struct bch_extent_ptr *lp, *rp; - struct bch_dev *ca; - - en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data); - - if ((extent_entry_type(en_l) != - extent_entry_type(en_r)) || - !extent_entry_is_ptr(en_l)) - return BCH_MERGE_NOMERGE; + extent_for_each_entry(el, en_l) { + struct bch_extent_ptr *lp, *rp; + struct bch_dev *ca; - lp = &en_l->ptr; - rp = &en_r->ptr; + en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data); - if (lp->offset + el.k->size != rp->offset || - lp->dev != rp->dev || - lp->gen != rp->gen) - return BCH_MERGE_NOMERGE; + if ((extent_entry_type(en_l) != + extent_entry_type(en_r)) || + !extent_entry_is_ptr(en_l)) + return BCH_MERGE_NOMERGE; - /* We don't allow extents to straddle buckets: */ - ca = bch_dev_bkey_exists(c, lp->dev); + lp = &en_l->ptr; + rp = &en_r->ptr; - if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp)) - return BCH_MERGE_NOMERGE; - } + if (lp->offset + el.k->size != rp->offset || + lp->dev != rp->dev || + lp->gen != rp->gen) + return BCH_MERGE_NOMERGE; - break; - case BCH_RESERVATION: { - struct bkey_i_reservation *li = bkey_i_to_reservation(l); - struct bkey_i_reservation *ri = bkey_i_to_reservation(r); + /* We don't allow extents to straddle buckets: */ + ca = bch_dev_bkey_exists(c, lp->dev); - if (li->v.generation != ri->v.generation || - li->v.nr_replicas != ri->v.nr_replicas) + if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp)) return BCH_MERGE_NOMERGE; - break; - } - default: - return BCH_MERGE_NOMERGE; } l->k.needs_whiteout |= r->k.needs_whiteout; @@ -1810,7 +1674,7 @@ static bool bch2_extent_merge_inline(struct bch_fs *c, bch2_bkey_unpack(b, &li.k, l); bch2_bkey_unpack(b, &ri.k, r); - ret = bch2_extent_merge(c, b, &li.k, &ri.k); + ret = bch2_bkey_merge(c, &li.k, &ri.k); if (ret == BCH_MERGE_NOMERGE) return false; @@ -1878,3 +1742,54 @@ int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size) return ret; } + +/* KEY_TYPE_reservation: */ + +const char *bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); + + if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation)) + return "incorrect value size"; + + if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) + return "invalid nr_replicas"; + + return NULL; +} + +void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) +{ + struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); + + pr_buf(out, "generation %u replicas %u", + le32_to_cpu(r.v->generation), + r.v->nr_replicas); +} + +enum merge_result bch2_reservation_merge(struct bch_fs *c, + struct bkey_i *l, struct bkey_i *r) +{ + struct bkey_i_reservation *li = bkey_i_to_reservation(l); + struct bkey_i_reservation *ri = bkey_i_to_reservation(r); + + if (li->v.generation != ri->v.generation || + li->v.nr_replicas != ri->v.nr_replicas) + return BCH_MERGE_NOMERGE; + + l->k.needs_whiteout |= r->k.needs_whiteout; + + /* Keys with no pointers aren't restricted to one bucket and could + * overflow KEY_SIZE + */ + if ((u64) l->k.size + r->k.size > KEY_SIZE_MAX) { + bch2_key_resize(&l->k, KEY_SIZE_MAX); + bch2_cut_front(l->k.p, r); + return BCH_MERGE_PARTIAL; + } + + bch2_key_resize(&l->k, l->k.size + r->k.size); + + return BCH_MERGE_MERGE; +} diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 389604f25630..57eb35699545 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -10,125 +10,34 @@ struct bch_fs; struct btree_insert; struct btree_insert_entry; -const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c); -void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *, - struct bkey_s_c); -void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); -void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *); - -#define bch2_bkey_btree_ops (struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_invalid, \ - .key_debugcheck = bch2_btree_ptr_debugcheck, \ - .val_to_text = bch2_btree_ptr_to_text, \ - .swab = bch2_ptr_swab, \ -} - -const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c); -void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c); -void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -bool bch2_ptr_normalize(struct bch_fs *, struct btree *, struct bkey_s); -enum merge_result bch2_extent_merge(struct bch_fs *, struct btree *, - struct bkey_i *, struct bkey_i *); - -#define bch2_bkey_extent_ops (struct bkey_ops) { \ - .key_invalid = bch2_extent_invalid, \ - .key_debugcheck = bch2_extent_debugcheck, \ - .val_to_text = bch2_extent_to_text, \ - .swab = bch2_ptr_swab, \ - .key_normalize = bch2_ptr_normalize, \ - .key_merge = bch2_extent_merge, \ - .is_extents = true, \ -} - -void bch2_mark_io_failure(struct bch_io_failures *, - struct extent_ptr_decoded *); -int bch2_btree_pick_ptr(struct bch_fs *, const struct btree *, - struct bch_io_failures *, - struct extent_ptr_decoded *); -int bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c, - struct bch_io_failures *, - struct extent_ptr_decoded *); - -void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *); - -static inline bool bch2_extent_is_atomic(struct bkey *k, - struct btree_iter *iter) -{ - struct btree *b = iter->l[0].b; - - return bkey_cmp(k->p, b->key.k.p) <= 0 && - bkey_cmp(bkey_start_pos(k), b->data->min_key) >= 0; -} - -enum btree_insert_ret -bch2_extent_can_insert(struct btree_insert *, struct btree_insert_entry *, - unsigned *); -enum btree_insert_ret -bch2_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *); - -bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); -void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent, - unsigned, unsigned); - -const struct bch_extent_ptr * -bch2_extent_has_device(struct bkey_s_c_extent, unsigned); -void bch2_extent_drop_device(struct bkey_s_extent, unsigned); -const struct bch_extent_ptr * -bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned); -const struct bch_extent_ptr * -bch2_extent_has_target(struct bch_fs *, struct bkey_s_c_extent, unsigned); - -unsigned bch2_extent_nr_ptrs(struct bkey_s_c_extent); -unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c); -unsigned bch2_extent_is_compressed(struct bkey_s_c); - -unsigned bch2_extent_durability(struct bch_fs *, struct bkey_s_c_extent); - -bool bch2_extent_matches_ptr(struct bch_fs *, struct bkey_s_c_extent, - struct bch_extent_ptr, u64); - -static inline bool bkey_extent_is_data(const struct bkey *k) -{ - switch (k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - return true; - default: - return false; - } -} - -static inline bool bkey_extent_is_allocation(const struct bkey *k) -{ - switch (k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - case BCH_RESERVATION: - return true; - default: - return false; - } -} - -static inline bool bch2_extent_is_fully_allocated(struct bkey_s_c k) -{ - return bkey_extent_is_allocation(k.k) && - !bch2_extent_is_compressed(k); -} +/* extent entries: */ -static inline bool bkey_extent_is_cached(const struct bkey *k) -{ - return k->type == BCH_EXTENT_CACHED; -} +#define extent_entry_last(_e) bkey_val_end(_e) -static inline void bkey_extent_set_cached(struct bkey *k, bool cached) -{ - EBUG_ON(k->type != BCH_EXTENT && - k->type != BCH_EXTENT_CACHED); +#define entry_to_ptr(_entry) \ +({ \ + EBUG_ON((_entry) && !extent_entry_is_ptr(_entry)); \ + \ + __builtin_choose_expr( \ + type_is_exact(_entry, const union bch_extent_entry *), \ + (const struct bch_extent_ptr *) (_entry), \ + (struct bch_extent_ptr *) (_entry)); \ +}) - k->type = cached ? BCH_EXTENT_CACHED : BCH_EXTENT; -} +/* downcast, preserves const */ +#define to_entry(_entry) \ +({ \ + BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) && \ + !type_is(_entry, struct bch_extent_ptr *) && \ + !type_is(_entry, struct bch_extent_stripe_ptr *)); \ + \ + __builtin_choose_expr( \ + (type_is_exact(_entry, const union bch_extent_crc *) || \ + type_is_exact(_entry, const struct bch_extent_ptr *) ||\ + type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\ + (const union bch_extent_entry *) (_entry), \ + (union bch_extent_entry *) (_entry)); \ +}) static inline unsigned __extent_entry_type(const union bch_extent_entry *e) @@ -193,21 +102,6 @@ union bch_extent_crc { struct bch_extent_crc128 crc128; }; -/* downcast, preserves const */ -#define to_entry(_entry) \ -({ \ - BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) && \ - !type_is(_entry, struct bch_extent_ptr *) && \ - !type_is(_entry, struct bch_extent_stripe_ptr *)); \ - \ - __builtin_choose_expr( \ - (type_is_exact(_entry, const union bch_extent_crc *) || \ - type_is_exact(_entry, const struct bch_extent_ptr *) ||\ - type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\ - (const union bch_extent_entry *) (_entry), \ - (union bch_extent_entry *) (_entry)); \ -}) - #define __entry_to_crc(_entry) \ __builtin_choose_expr( \ type_is_exact(_entry, const union bch_extent_entry *), \ @@ -221,18 +115,6 @@ union bch_extent_crc { __entry_to_crc(_entry); \ }) -#define entry_to_ptr(_entry) \ -({ \ - EBUG_ON((_entry) && !extent_entry_is_ptr(_entry)); \ - \ - __builtin_choose_expr( \ - type_is_exact(_entry, const union bch_extent_entry *), \ - (const struct bch_extent_ptr *) (_entry), \ - (struct bch_extent_ptr *) (_entry)); \ -}) - -/* checksum entries: */ - static inline struct bch_extent_crc_unpacked bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) { @@ -290,71 +172,64 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) #undef common_fields } -/* Extent entry iteration: */ - -#define extent_entry_next(_entry) \ - ((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry))) +/* bkey_ptrs: generically over any key type that has ptrs */ -#define extent_entry_last(_e) \ - vstruct_idx((_e).v, bkey_val_u64s((_e).k)) +struct bkey_ptrs_c { + const union bch_extent_entry *start; + const union bch_extent_entry *end; +}; -/* Iterate over all entries: */ +struct bkey_ptrs { + union bch_extent_entry *start; + union bch_extent_entry *end; +}; -#define extent_for_each_entry_from(_e, _entry, _start) \ - for ((_entry) = _start; \ - (_entry) < extent_entry_last(_e); \ - (_entry) = extent_entry_next(_entry)) +/* iterate over bkey ptrs */ -#define extent_for_each_entry(_e, _entry) \ - extent_for_each_entry_from(_e, _entry, (_e).v->start) +#define extent_entry_next(_entry) \ + ((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry))) -/* Iterate over pointers only: */ +#define __bkey_extent_entry_for_each_from(_start, _end, _entry) \ + for ((_entry) = (_start); \ + (_entry) < (_end); \ + (_entry) = extent_entry_next(_entry)) -#define extent_ptr_next(_e, _ptr) \ +#define __bkey_ptr_next(_ptr, _end) \ ({ \ - typeof(&(_e).v->start[0]) _entry; \ + typeof(_end) _entry; \ \ - extent_for_each_entry_from(_e, _entry, to_entry(_ptr)) \ + __bkey_extent_entry_for_each_from(to_entry(_ptr), _end, _entry) \ if (extent_entry_is_ptr(_entry)) \ break; \ \ - _entry < extent_entry_last(_e) ? entry_to_ptr(_entry) : NULL; \ + _entry < (_end) ? entry_to_ptr(_entry) : NULL; \ }) -#define extent_for_each_ptr(_e, _ptr) \ - for ((_ptr) = &(_e).v->start->ptr; \ - ((_ptr) = extent_ptr_next(_e, _ptr)); \ - (_ptr)++) +#define bkey_extent_entry_for_each_from(_p, _entry, _start) \ + __bkey_extent_entry_for_each_from(_start, (_p).end, _entry) -/* Iterate over crcs only: */ +#define bkey_extent_entry_for_each(_p, _entry) \ + bkey_extent_entry_for_each_from(_p, _entry, _p.start) -#define extent_crc_next(_e, _crc, _iter) \ -({ \ - extent_for_each_entry_from(_e, _iter, _iter) \ - if (extent_entry_is_crc(_iter)) { \ - (_crc) = bch2_extent_crc_unpack((_e).k, entry_to_crc(_iter));\ - break; \ - } \ - \ - (_iter) < extent_entry_last(_e); \ -}) +#define __bkey_for_each_ptr(_start, _end, _ptr) \ + for ((_ptr) = (_start); \ + ((_ptr) = __bkey_ptr_next(_ptr, _end)); \ + (_ptr)++) -#define extent_for_each_crc(_e, _crc, _iter) \ - for ((_crc) = bch2_extent_crc_unpack((_e).k, NULL), \ - (_iter) = (_e).v->start; \ - extent_crc_next(_e, _crc, _iter); \ - (_iter) = extent_entry_next(_iter)) +#define bkey_ptr_next(_p, _ptr) \ + __bkey_ptr_next(_ptr, (_p).end) -/* Iterate over pointers, with crcs: */ +#define bkey_for_each_ptr(_p, _ptr) \ + __bkey_for_each_ptr(&(_p).start->ptr, (_p).end, _ptr) -#define __extent_ptr_next_decode(_e, _ptr, _entry) \ +#define __bkey_ptr_next_decode(_k, _end, _ptr, _entry) \ ({ \ __label__ out; \ \ (_ptr).idx = 0; \ (_ptr).ec_nr = 0; \ \ - extent_for_each_entry_from(_e, _entry, _entry) \ + __bkey_extent_entry_for_each_from(_entry, _end, _entry) \ switch (extent_entry_type(_entry)) { \ case BCH_EXTENT_ENTRY_ptr: \ (_ptr).ptr = _entry->ptr; \ @@ -362,7 +237,7 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) case BCH_EXTENT_ENTRY_crc32: \ case BCH_EXTENT_ENTRY_crc64: \ case BCH_EXTENT_ENTRY_crc128: \ - (_ptr).crc = bch2_extent_crc_unpack((_e).k, \ + (_ptr).crc = bch2_extent_crc_unpack(_k, \ entry_to_crc(_entry)); \ break; \ case BCH_EXTENT_ENTRY_stripe_ptr: \ @@ -370,122 +245,298 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) break; \ } \ out: \ - _entry < extent_entry_last(_e); \ + _entry < (_end); \ }) -#define extent_for_each_ptr_decode(_e, _ptr, _entry) \ - for ((_ptr).crc = bch2_extent_crc_unpack((_e).k, NULL), \ - (_entry) = (_e).v->start; \ - __extent_ptr_next_decode(_e, _ptr, _entry); \ +#define __bkey_for_each_ptr_decode(_k, _start, _end, _ptr, _entry) \ + for ((_ptr).crc = bch2_extent_crc_unpack(_k, NULL), \ + (_entry) = _start; \ + __bkey_ptr_next_decode(_k, _end, _ptr, _entry); \ (_entry) = extent_entry_next(_entry)) -/* Iterate over pointers backwards: */ +#define bkey_for_each_ptr_decode(_k, _p, _ptr, _entry) \ + __bkey_for_each_ptr_decode(_k, (_p).start, (_p).end, \ + _ptr, _entry) -void bch2_extent_crc_append(struct bkey_i_extent *, - struct bch_extent_crc_unpacked); -void bch2_extent_ptr_decoded_append(struct bkey_i_extent *, - struct extent_ptr_decoded *); +/* utility code common to all keys with pointers: */ -static inline void __extent_entry_push(struct bkey_i_extent *e) +static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k) { - union bch_extent_entry *entry = extent_entry_last(extent_i_to_s(e)); - - EBUG_ON(bkey_val_u64s(&e->k) + extent_entry_u64s(entry) > - BKEY_EXTENT_VAL_U64s_MAX); - - e->k.u64s += extent_entry_u64s(entry); + switch (k.k->type) { + case KEY_TYPE_btree_ptr: { + struct bkey_s_c_btree_ptr e = bkey_s_c_to_btree_ptr(k); + return (struct bkey_ptrs_c) { + to_entry(&e.v->start[0]), + to_entry(bkey_val_end(e)) + }; + } + case KEY_TYPE_extent: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + return (struct bkey_ptrs_c) { + e.v->start, + extent_entry_last(e) + }; + } + case KEY_TYPE_stripe: { + struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); + return (struct bkey_ptrs_c) { + to_entry(&s.v->ptrs[0]), + to_entry(&s.v->ptrs[s.v->nr_blocks]), + }; + } + default: + return (struct bkey_ptrs_c) { NULL, NULL }; + } } -static inline void extent_ptr_append(struct bkey_i_extent *e, - struct bch_extent_ptr ptr) +static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k) { - ptr.type = 1 << BCH_EXTENT_ENTRY_ptr; - extent_entry_last(extent_i_to_s(e))->ptr = ptr; - __extent_entry_push(e); + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k.s_c); + + return (struct bkey_ptrs) { + (void *) p.start, + (void *) p.end + }; } -static inline struct bch_devs_list bch2_extent_devs(struct bkey_s_c_extent e) +static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k) { struct bch_devs_list ret = (struct bch_devs_list) { 0 }; + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(p, ptr) ret.devs[ret.nr++] = ptr->dev; return ret; } -static inline struct bch_devs_list bch2_extent_dirty_devs(struct bkey_s_c_extent e) +static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k) { struct bch_devs_list ret = (struct bch_devs_list) { 0 }; + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(p, ptr) if (!ptr->cached) ret.devs[ret.nr++] = ptr->dev; return ret; } -static inline struct bch_devs_list bch2_extent_cached_devs(struct bkey_s_c_extent e) +static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k) { struct bch_devs_list ret = (struct bch_devs_list) { 0 }; + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(p, ptr) if (ptr->cached) ret.devs[ret.nr++] = ptr->dev; return ret; } -static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k) +static inline bool bch2_bkey_has_device(struct bkey_s_c k, unsigned dev) { - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - return bch2_extent_devs(bkey_s_c_to_extent(k)); - default: - return (struct bch_devs_list) { .nr = 0 }; - } + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; + + bkey_for_each_ptr(p, ptr) + if (ptr->dev == dev) + return ptr; + + return NULL; } -static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k) +unsigned bch2_bkey_nr_ptrs(struct bkey_s_c); +unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c); +unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c); + +void bch2_mark_io_failure(struct bch_io_failures *, + struct extent_ptr_decoded *); +int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, + struct bch_io_failures *, + struct extent_ptr_decoded *); + +/* bch_btree_ptr: */ + +const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c); +void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *, + struct bkey_s_c); +void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, + struct bkey_s_c); +void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *); + +#define bch2_bkey_ops_btree_ptr (struct bkey_ops) { \ + .key_invalid = bch2_btree_ptr_invalid, \ + .key_debugcheck = bch2_btree_ptr_debugcheck, \ + .val_to_text = bch2_btree_ptr_to_text, \ + .swab = bch2_ptr_swab, \ +} + +/* bch_extent: */ + +const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c); +void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c); +void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); +enum merge_result bch2_extent_merge(struct bch_fs *, + struct bkey_i *, struct bkey_i *); + +#define bch2_bkey_ops_extent (struct bkey_ops) { \ + .key_invalid = bch2_extent_invalid, \ + .key_debugcheck = bch2_extent_debugcheck, \ + .val_to_text = bch2_extent_to_text, \ + .swab = bch2_ptr_swab, \ + .key_normalize = bch2_extent_normalize, \ + .key_merge = bch2_extent_merge, \ +} + +/* bch_reservation: */ + +const char *bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c); +void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +enum merge_result bch2_reservation_merge(struct bch_fs *, + struct bkey_i *, struct bkey_i *); + +#define bch2_bkey_ops_reservation (struct bkey_ops) { \ + .key_invalid = bch2_reservation_invalid, \ + .val_to_text = bch2_reservation_to_text, \ + .key_merge = bch2_reservation_merge, \ +} + +void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *); + +static inline bool bch2_extent_is_atomic(struct bkey *k, + struct btree_iter *iter) { - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - return bch2_extent_dirty_devs(bkey_s_c_to_extent(k)); + struct btree *b = iter->l[0].b; + + return bkey_cmp(k->p, b->key.k.p) <= 0 && + bkey_cmp(bkey_start_pos(k), b->data->min_key) >= 0; +} + +enum btree_insert_ret +bch2_extent_can_insert(struct btree_insert *, struct btree_insert_entry *, + unsigned *); +enum btree_insert_ret +bch2_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *); + +void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent, + unsigned, unsigned); + +const struct bch_extent_ptr * +bch2_extent_has_device(struct bkey_s_c_extent, unsigned); +const struct bch_extent_ptr * +bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned); +const struct bch_extent_ptr * +bch2_extent_has_target(struct bch_fs *, struct bkey_s_c_extent, unsigned); + +unsigned bch2_extent_is_compressed(struct bkey_s_c); + +bool bch2_extent_matches_ptr(struct bch_fs *, struct bkey_s_c_extent, + struct bch_extent_ptr, u64); + +static inline bool bkey_extent_is_data(const struct bkey *k) +{ + switch (k->type) { + case KEY_TYPE_btree_ptr: + case KEY_TYPE_extent: + return true; default: - return (struct bch_devs_list) { .nr = 0 }; + return false; } } -static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k) +static inline bool bkey_extent_is_allocation(const struct bkey *k) { - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - return bch2_extent_cached_devs(bkey_s_c_to_extent(k)); + switch (k->type) { + case KEY_TYPE_extent: + case KEY_TYPE_reservation: + return true; default: - return (struct bch_devs_list) { .nr = 0 }; + return false; } } +static inline bool bch2_extent_is_fully_allocated(struct bkey_s_c k) +{ + return bkey_extent_is_allocation(k.k) && + !bch2_extent_is_compressed(k); +} + +void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr); +void bch2_bkey_drop_device(struct bkey_s, unsigned); + +/* Extent entry iteration: */ + +#define extent_for_each_entry_from(_e, _entry, _start) \ + __bkey_extent_entry_for_each_from(_start, \ + extent_entry_last(_e),_entry) + +#define extent_for_each_entry(_e, _entry) \ + extent_for_each_entry_from(_e, _entry, (_e).v->start) + +#define extent_ptr_next(_e, _ptr) \ + __bkey_ptr_next(_ptr, extent_entry_last(_e)) + +#define extent_for_each_ptr(_e, _ptr) \ + __bkey_for_each_ptr(&(_e).v->start->ptr, extent_entry_last(_e), _ptr) + +#define extent_crc_next(_e, _crc, _iter) \ +({ \ + extent_for_each_entry_from(_e, _iter, _iter) \ + if (extent_entry_is_crc(_iter)) { \ + (_crc) = bch2_extent_crc_unpack((_e).k, entry_to_crc(_iter));\ + break; \ + } \ + \ + (_iter) < extent_entry_last(_e); \ +}) + +#define extent_for_each_crc(_e, _crc, _iter) \ + for ((_crc) = bch2_extent_crc_unpack((_e).k, NULL), \ + (_iter) = (_e).v->start; \ + extent_crc_next(_e, _crc, _iter); \ + (_iter) = extent_entry_next(_iter)) + +#define extent_for_each_ptr_decode(_e, _ptr, _entry) \ + __bkey_for_each_ptr_decode((_e).k, (_e).v->start, \ + extent_entry_last(_e), _ptr, _entry) + +void bch2_extent_crc_append(struct bkey_i_extent *, + struct bch_extent_crc_unpacked); +void bch2_extent_ptr_decoded_append(struct bkey_i_extent *, + struct extent_ptr_decoded *); + +static inline void __extent_entry_push(struct bkey_i_extent *e) +{ + union bch_extent_entry *entry = extent_entry_last(extent_i_to_s(e)); + + EBUG_ON(bkey_val_u64s(&e->k) + extent_entry_u64s(entry) > + BKEY_EXTENT_VAL_U64s_MAX); + + e->k.u64s += extent_entry_u64s(entry); +} + bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent, struct bch_extent_crc_unpacked); bool bch2_extent_narrow_crcs(struct bkey_i_extent *, struct bch_extent_crc_unpacked); -union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent , - struct bch_extent_ptr *); +union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s, + struct bch_extent_ptr *); -#define bch2_extent_drop_ptrs(_e, _ptr, _cond) \ +#define bch2_bkey_drop_ptrs(_k, _ptr, _cond) \ do { \ - _ptr = &(_e).v->start->ptr; \ + struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k); \ + \ + _ptr = &_ptrs.start->ptr; \ \ - while ((_ptr = extent_ptr_next(e, _ptr))) { \ + while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) { \ if (_cond) { \ - _ptr = (void *) bch2_extent_drop_ptr(_e, _ptr); \ + _ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr); \ + _ptrs = bch2_bkey_ptrs(_k); \ continue; \ } \ \ diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index e7d7c5fe6db7..ad06db069fcf 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -121,7 +121,7 @@ static void bch2_quota_reservation_put(struct bch_fs *c, BUG_ON(res->sectors > inode->ei_quota_reserved); bch2_quota_acct(c, inode->ei_qid, Q_SPC, - -((s64) res->sectors), BCH_QUOTA_PREALLOC); + -((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC); inode->ei_quota_reserved -= res->sectors; mutex_unlock(&inode->ei_quota_lock); @@ -138,7 +138,7 @@ static int bch2_quota_reservation_add(struct bch_fs *c, mutex_lock(&inode->ei_quota_lock); ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, - check_enospc ? BCH_QUOTA_PREALLOC : BCH_QUOTA_NOCHECK); + check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK); if (likely(!ret)) { inode->ei_quota_reserved += sectors; res->sectors += sectors; @@ -220,7 +220,7 @@ static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, quota_res->sectors -= sectors; inode->ei_quota_reserved -= sectors; } else { - bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, BCH_QUOTA_WARN); + bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN); } #endif inode->v.i_blocks += sectors; @@ -813,7 +813,7 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k) struct bvec_iter iter; struct bio_vec bv; unsigned nr_ptrs = !bch2_extent_is_compressed(k) - ? bch2_extent_nr_dirty_ptrs(k) + ? bch2_bkey_nr_dirty_ptrs(k) : 0; bio_for_each_segment(bv, bio, iter) { @@ -2397,7 +2397,7 @@ static long bch2_fcollapse(struct bch_inode_info *inode, BUG_ON(bkey_cmp(dst->pos, bkey_start_pos(©.k.k))); ret = bch2_disk_reservation_get(c, &disk_res, copy.k.k.size, - bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(©.k)), + bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(©.k)), BCH_DISK_RESERVATION_NOFAIL); BUG_ON(ret); @@ -2504,7 +2504,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, goto btree_iter_err; /* already reserved */ - if (k.k->type == BCH_RESERVATION && + if (k.k->type == KEY_TYPE_reservation && bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) { bch2_btree_iter_next_slot(iter); continue; @@ -2517,7 +2517,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, } bkey_reservation_init(&reservation.k_i); - reservation.k.type = BCH_RESERVATION; + reservation.k.type = KEY_TYPE_reservation; reservation.k.p = k.k->p; reservation.k.size = k.k->size; @@ -2525,7 +2525,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, bch2_cut_back(end_pos, &reservation.k); sectors = reservation.k.size; - reservation.v.nr_replicas = bch2_extent_nr_dirty_ptrs(k); + reservation.v.nr_replicas = bch2_bkey_nr_dirty_ptrs(k); if (!bkey_extent_is_allocation(k.k)) { ret = bch2_quota_reservation_add(c, inode, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b6fe2059fe5f..93e1f3aaacd4 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -340,7 +340,7 @@ __bch2_create(struct mnt_idmap *idmap, if (tmpfile) inode_u.bi_flags |= BCH_INODE_UNLINKED; - ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, BCH_QUOTA_PREALLOC); + ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, KEY_TYPE_QUOTA_PREALLOC); if (ret) return ERR_PTR(ret); @@ -457,7 +457,7 @@ err_trans: make_bad_inode(&inode->v); iput(&inode->v); err: - bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, BCH_QUOTA_WARN); + bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, KEY_TYPE_QUOTA_WARN); inode = ERR_PTR(ret); goto out; } @@ -1079,7 +1079,7 @@ static int bch2_fill_extent(struct fiemap_extent_info *info, } return 0; - } else if (k->k.type == BCH_RESERVATION) { + } else if (k->k.type == KEY_TYPE_reservation) { return fiemap_fill_next_extent(info, bkey_start_offset(&k->k) << 9, 0, k->k.size << 9, @@ -1112,7 +1112,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(ei->v.i_ino, start >> 9), 0, k) if (bkey_extent_is_data(k.k) || - k.k->type == BCH_RESERVATION) { + k.k->type == KEY_TYPE_reservation) { if (bkey_cmp(bkey_start_pos(k.k), POS(ei->v.i_ino, (start + len) >> 9)) >= 0) break; @@ -1414,9 +1414,9 @@ static void bch2_evict_inode(struct inode *vinode) if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) { bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks), - BCH_QUOTA_WARN); + KEY_TYPE_QUOTA_WARN); bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, - BCH_QUOTA_WARN); + KEY_TYPE_QUOTA_WARN); bch2_inode_rm(c, inode->v.i_ino); WARN_ONCE(atomic_long_dec_return(&c->nr_inodes) < 0, diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 74b83201c213..57ab8f088415 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -235,7 +235,6 @@ static int hash_check_duplicates(const struct bch_hash_desc desc, !desc.cmp_bkey(k, k2), c, "duplicate hash table keys:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, - bkey_type(0, desc.btree_id), k), buf))) { ret = fsck_hash_delete_at(desc, &h->info, k_iter); if (ret) @@ -255,7 +254,7 @@ static bool key_has_correct_hash(const struct bch_hash_desc desc, { u64 hash; - if (k.k->type != desc.whiteout_type && + if (k.k->type != KEY_TYPE_whiteout && k.k->type != desc.key_type) return true; @@ -280,7 +279,7 @@ static int hash_check_key(const struct bch_hash_desc desc, u64 hashed; int ret = 0; - if (k.k->type != desc.whiteout_type && + if (k.k->type != KEY_TYPE_whiteout && k.k->type != desc.key_type) return 0; @@ -300,7 +299,6 @@ static int hash_check_key(const struct bch_hash_desc desc, desc.btree_id, k.k->p.offset, hashed, h->chain->pos.offset, (bch2_bkey_val_to_text(&PBUF(buf), c, - bkey_type(0, desc.btree_id), k), buf))) { ret = hash_redo_key(desc, h, c, k_iter, k, hashed); if (ret) { @@ -370,7 +368,7 @@ static int check_dirent_hash(struct hash_check *h, struct bch_fs *c, *k = bch2_btree_iter_peek(iter); - BUG_ON(k->k->type != BCH_DIRENT); + BUG_ON(k->k->type != KEY_TYPE_dirent); } err: fsck_err: @@ -385,7 +383,6 @@ err_redo: buf, strlen(buf), BTREE_ID_DIRENTS, k->k->p.offset, hash, h->chain->pos.offset, (bch2_bkey_val_to_text(&PBUF(buf), c, - bkey_type(0, BTREE_ID_DIRENTS), *k), buf))) { ret = hash_redo_key(bch2_dirent_hash_desc, h, c, iter, *k, hash); @@ -471,7 +468,7 @@ static int check_extents(struct bch_fs *c) if (fsck_err_on(w.have_inode && !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && - k.k->type != BCH_RESERVATION && + k.k->type != KEY_TYPE_reservation && k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c, "extent type %u offset %llu past end of inode %llu, i_size %llu", k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) { @@ -529,13 +526,11 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(!w.have_inode, c, "dirent in nonexisting directory:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, - (enum bkey_type) BTREE_ID_DIRENTS, k), buf)) || fsck_err_on(!S_ISDIR(w.inode.bi_mode), c, "dirent in non directory inode type %u:\n%s", mode_to_type(w.inode.bi_mode), (bch2_bkey_val_to_text(&PBUF(buf), c, - (enum bkey_type) BTREE_ID_DIRENTS, k), buf))) { ret = bch2_btree_delete_at(iter, 0); if (ret) @@ -557,7 +552,7 @@ static int check_dirents(struct bch_fs *c) if (ret) goto fsck_err; - if (k.k->type != BCH_DIRENT) + if (k.k->type != KEY_TYPE_dirent) continue; d = bkey_s_c_to_dirent(k); @@ -586,7 +581,6 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(d_inum == d.k->p.inode, c, "dirent points to own directory:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, - (enum bkey_type) BTREE_ID_DIRENTS, k), buf))) { ret = remove_dirent(c, iter, d); if (ret) @@ -604,7 +598,6 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(!have_target, c, "dirent points to missing inode:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, - (enum bkey_type) BTREE_ID_DIRENTS, k), buf))) { ret = remove_dirent(c, iter, d); if (ret) @@ -618,7 +611,6 @@ static int check_dirents(struct bch_fs *c) "incorrect d_type: should be %u:\n%s", mode_to_type(target.bi_mode), (bch2_bkey_val_to_text(&PBUF(buf), c, - (enum bkey_type) BTREE_ID_DIRENTS, k), buf))) { struct bkey_i_dirent *n; @@ -899,7 +891,7 @@ next: e->offset = k.k->p.offset; - if (k.k->type != BCH_DIRENT) + if (k.k->type != KEY_TYPE_dirent) continue; dirent = bkey_s_c_to_dirent(k); @@ -942,7 +934,7 @@ up: } for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) { - if (k.k->type != BCH_INODE_FS) + if (k.k->type != KEY_TYPE_inode) continue; if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode))) @@ -1030,7 +1022,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) { switch (k.k->type) { - case BCH_DIRENT: + case KEY_TYPE_dirent: d = bkey_s_c_to_dirent(k); d_inum = le64_to_cpu(d.v->d_inum); @@ -1310,7 +1302,7 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); if (iter.pos.inode < nlinks_pos || !link) link = &zero_links; - if (k.k && k.k->type == BCH_INODE_FS) { + if (k.k && k.k->type == KEY_TYPE_inode) { /* * Avoid potential deadlocks with iter for * truncate/rm/etc.: @@ -1392,7 +1384,7 @@ static int check_inodes_fast(struct bch_fs *c) int ret = 0; for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) { - if (k.k->type != BCH_INODE_FS) + if (k.k->type != KEY_TYPE_inode) continue; inode = bkey_s_c_to_inode(k); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 0a350c6d0932..30f93fbe280d 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -178,76 +178,69 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode, const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k) { - if (k.k->p.offset) - return "nonzero offset"; - - switch (k.k->type) { - case BCH_INODE_FS: { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); struct bch_inode_unpacked unpacked; - if (bkey_val_bytes(k.k) < sizeof(struct bch_inode)) - return "incorrect value size"; - - if (k.k->p.inode < BLOCKDEV_INODE_MAX) - return "fs inode in blockdev range"; + if (k.k->p.offset) + return "nonzero offset"; - if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR) - return "invalid str hash type"; + if (bkey_val_bytes(k.k) < sizeof(struct bch_inode)) + return "incorrect value size"; - if (bch2_inode_unpack(inode, &unpacked)) - return "invalid variable length fields"; + if (k.k->p.inode < BLOCKDEV_INODE_MAX) + return "fs inode in blockdev range"; - if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1) - return "invalid data checksum type"; + if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR) + return "invalid str hash type"; - if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1) - return "invalid data checksum type"; + if (bch2_inode_unpack(inode, &unpacked)) + return "invalid variable length fields"; - if ((unpacked.bi_flags & BCH_INODE_UNLINKED) && - unpacked.bi_nlink != 0) - return "flagged as unlinked but bi_nlink != 0"; + if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1) + return "invalid data checksum type"; - return NULL; - } - case BCH_INODE_BLOCKDEV: - if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_blockdev)) - return "incorrect value size"; + if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1) + return "invalid data checksum type"; - if (k.k->p.inode >= BLOCKDEV_INODE_MAX) - return "blockdev inode in fs range"; + if ((unpacked.bi_flags & BCH_INODE_UNLINKED) && + unpacked.bi_nlink != 0) + return "flagged as unlinked but bi_nlink != 0"; - return NULL; - case BCH_INODE_GENERATION: - if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation)) - return "incorrect value size"; - - return NULL; - default: - return "invalid type"; - } + return NULL; } void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_inode inode; + struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); struct bch_inode_unpacked unpacked; - switch (k.k->type) { - case BCH_INODE_FS: - inode = bkey_s_c_to_inode(k); - if (bch2_inode_unpack(inode, &unpacked)) { - pr_buf(out, "(unpack error)"); - break; - } + if (bch2_inode_unpack(inode, &unpacked)) { + pr_buf(out, "(unpack error)"); + return; + } #define BCH_INODE_FIELD(_name, _bits) \ - pr_buf(out, #_name ": %llu ", (u64) unpacked._name); - BCH_INODE_FIELDS() + pr_buf(out, #_name ": %llu ", (u64) unpacked._name); + BCH_INODE_FIELDS() #undef BCH_INODE_FIELD - break; - } +} + +const char *bch2_inode_generation_invalid(const struct bch_fs *c, + struct bkey_s_c k) +{ + if (k.k->p.offset) + return "nonzero offset"; + + if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation)) + return "incorrect value size"; + + return NULL; +} + +void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) +{ } void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, @@ -281,10 +274,9 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, static inline u32 bkey_generation(struct bkey_s_c k) { switch (k.k->type) { - case BCH_INODE_BLOCKDEV: - case BCH_INODE_FS: + case KEY_TYPE_inode: BUG(); - case BCH_INODE_GENERATION: + case KEY_TYPE_inode_generation: return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation); default: return 0; @@ -330,8 +322,7 @@ again: return ret; switch (k.k->type) { - case BCH_INODE_BLOCKDEV: - case BCH_INODE_FS: + case KEY_TYPE_inode: /* slot used */ if (iter->pos.inode >= max) goto out; @@ -405,19 +396,19 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) return ret; } - bch2_fs_inconsistent_on(k.k->type != BCH_INODE_FS, c, + bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c, "inode %llu not found when deleting", inode_nr); switch (k.k->type) { - case BCH_INODE_FS: { + case KEY_TYPE_inode: { struct bch_inode_unpacked inode_u; if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u)) bi_generation = inode_u.bi_generation + 1; break; } - case BCH_INODE_GENERATION: { + case KEY_TYPE_inode_generation: { struct bkey_s_c_inode_generation g = bkey_s_c_to_inode_generation(k); bi_generation = le32_to_cpu(g.v->bi_generation); @@ -455,7 +446,7 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, POS(inode_nr, 0), BTREE_ITER_SLOTS, k) { switch (k.k->type) { - case BCH_INODE_FS: + case KEY_TYPE_inode: ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode); break; default: @@ -464,7 +455,6 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, } break; - } return bch2_btree_iter_unlock(&iter) ?: ret; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 897ff65d01cb..0bc852e69355 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -9,11 +9,21 @@ const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_inode_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_inode (struct bkey_ops) { \ .key_invalid = bch2_inode_invalid, \ .val_to_text = bch2_inode_to_text, \ } +const char *bch2_inode_generation_invalid(const struct bch_fs *, + struct bkey_s_c); +void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, + struct bkey_s_c); + +#define bch2_bkey_ops_inode_generation (struct bkey_ops) { \ + .key_invalid = bch2_inode_generation_invalid, \ + .val_to_text = bch2_inode_generation_to_text, \ +} + struct bch_inode_unpacked { u64 bi_inum; __le64 bi_hash_seed; diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 2fee2f2efd38..3e990709fedb 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -213,20 +213,20 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, enum bch_data_type type, const struct bkey_i *k) { - struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k); + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k)); const struct bch_extent_ptr *ptr; struct bch_write_bio *n; struct bch_dev *ca; BUG_ON(c->opts.nochanges); - extent_for_each_ptr(e, ptr) { + bkey_for_each_ptr(ptrs, ptr) { BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX || !c->devs[ptr->dev]); ca = bch_dev_bkey_exists(c, ptr->dev); - if (ptr + 1 < &extent_entry_last(e)->ptr) { + if (to_entry(ptr + 1) < ptrs.end) { n = to_wbio(bio_alloc_clone(NULL, &wbio->bio, GFP_NOIO, &ca->replica_set)); @@ -317,7 +317,6 @@ static void __bch2_write_index(struct bch_write_op *op) { struct bch_fs *c = op->c; struct keylist *keys = &op->insert_keys; - struct bkey_s_extent e; struct bch_extent_ptr *ptr; struct bkey_i *src, *dst = keys->keys, *n, *k; unsigned dev; @@ -327,12 +326,10 @@ static void __bch2_write_index(struct bch_write_op *op) n = bkey_next(src); bkey_copy(dst, src); - e = bkey_i_to_s_extent(dst); - - bch2_extent_drop_ptrs(e, ptr, + bch2_bkey_drop_ptrs(bkey_i_to_s(dst), ptr, test_bit(ptr->dev, op->failed.d)); - if (!bch2_extent_nr_ptrs(e.c)) { + if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(dst))) { ret = -EIO; goto err; } @@ -433,14 +430,13 @@ static void init_append_extent(struct bch_write_op *op, e->k.p = op->pos; e->k.size = crc.uncompressed_size; e->k.version = version; - bkey_extent_set_cached(&e->k, op->flags & BCH_WRITE_CACHED); if (crc.csum_type || crc.compression_type || crc.nonce) bch2_extent_crc_append(e, crc); - bch2_alloc_sectors_append_ptrs(op->c, wp, e, crc.compressed_size); + bch2_alloc_sectors_append_ptrs(op->c, wp, &e->k_i, crc.compressed_size); bch2_keylist_push(&op->insert_keys); } @@ -1608,7 +1604,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, struct bpos pos = bkey_start_pos(k.k); int pick_ret; - pick_ret = bch2_extent_pick_ptr(c, k, failed, &pick); + pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); /* hole or reservation - just zero fill: */ if (!pick_ret) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index a74566764630..f3bb28f32c6e 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -141,11 +141,12 @@ static void journal_entry_null_range(void *start, void *end) static int journal_validate_key(struct bch_fs *c, struct jset *jset, struct jset_entry *entry, - struct bkey_i *k, enum bkey_type key_type, + struct bkey_i *k, enum btree_node_type key_type, const char *type, int write) { void *next = vstruct_next(entry); const char *invalid; + unsigned version = le32_to_cpu(jset->version); int ret = 0; if (journal_entry_err_on(!k->k.u64s, c, @@ -174,14 +175,17 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, } if (JSET_BIG_ENDIAN(jset) != CPU_BIG_ENDIAN) - bch2_bkey_swab(key_type, NULL, bkey_to_packed(k)); + bch2_bkey_swab(NULL, bkey_to_packed(k)); - invalid = bch2_bkey_invalid(c, key_type, bkey_i_to_s_c(k)); + if (!write && + version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(key_type, bkey_to_packed(k), write); + + invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k), key_type); if (invalid) { char buf[160]; - bch2_bkey_val_to_text(&PBUF(buf), c, key_type, - bkey_i_to_s_c(k)); + bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k)); mustfix_fsck_err(c, "invalid %s in journal: %s\n%s", type, invalid, buf); @@ -190,6 +194,10 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, journal_entry_null_range(vstruct_next(entry), next); return 0; } + + if (write && + version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(key_type, bkey_to_packed(k), write); fsck_err: return ret; } @@ -203,8 +211,8 @@ static int journal_entry_validate_btree_keys(struct bch_fs *c, vstruct_for_each(entry, k) { int ret = journal_validate_key(c, jset, entry, k, - bkey_type(entry->level, - entry->btree_id), + __btree_node_type(entry->level, + entry->btree_id), "key", write); if (ret) return ret; @@ -351,14 +359,17 @@ static int jset_validate(struct bch_fs *c, { size_t bytes = vstruct_bytes(jset); struct bch_csum csum; + unsigned version; int ret = 0; if (le64_to_cpu(jset->magic) != jset_magic(c)) return JOURNAL_ENTRY_NONE; - if (le32_to_cpu(jset->version) != BCACHE_JSET_VERSION) { - bch_err(c, "unknown journal entry version %u", - le32_to_cpu(jset->version)); + version = le32_to_cpu(jset->version); + if ((version != BCH_JSET_VERSION_OLD && + version < bcachefs_metadata_version_min) || + version >= bcachefs_metadata_version_max) { + bch_err(c, "unknown journal entry version %u", jset->version); return BCH_FSCK_UNKNOWN_VERSION; } @@ -929,7 +940,6 @@ static void __journal_write_alloc(struct journal *j, unsigned replicas_want) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bkey_i_extent *e = bkey_i_to_extent(&w->key); struct journal_device *ja; struct bch_dev *ca; unsigned i; @@ -951,13 +961,14 @@ static void __journal_write_alloc(struct journal *j, if (!ca->mi.durability || ca->mi.state != BCH_MEMBER_STATE_RW || !ja->nr || - bch2_extent_has_device(extent_i_to_s_c(e), ca->dev_idx) || + bch2_bkey_has_device(bkey_i_to_s_c(&w->key), + ca->dev_idx) || sectors > ja->sectors_free) continue; bch2_dev_stripe_increment(c, ca, &j->wp.stripe); - extent_ptr_append(e, + bch2_bkey_append_ptr(&w->key, (struct bch_extent_ptr) { .offset = bucket_to_sector(ca, ja->buckets[ja->cur_idx]) + @@ -1096,7 +1107,7 @@ static void journal_write_done(struct closure *cl) struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_buf *w = journal_prev_buf(j); struct bch_devs_list devs = - bch2_extent_devs(bkey_i_to_s_c_extent(&w->key)); + bch2_bkey_devs(bkey_i_to_s_c(&w->key)); u64 seq = le64_to_cpu(w->data->seq); u64 last_seq = le64_to_cpu(w->data->last_seq); @@ -1158,7 +1169,7 @@ static void journal_write_endio(struct bio *bio) unsigned long flags; spin_lock_irqsave(&j->err_lock, flags); - bch2_extent_drop_device(bkey_i_to_s_extent(&w->key), ca->dev_idx); + bch2_bkey_drop_device(bkey_i_to_s(&w->key), ca->dev_idx); spin_unlock_irqrestore(&j->err_lock, flags); } @@ -1175,6 +1186,7 @@ void bch2_journal_write(struct closure *cl) struct jset *jset; struct bio *bio; struct bch_extent_ptr *ptr; + bool validate_before_checksum = false; unsigned i, sectors, bytes; journal_buf_realloc(j, w); @@ -1196,12 +1208,22 @@ void bch2_journal_write(struct closure *cl) jset->read_clock = cpu_to_le16(c->bucket_clock[READ].hand); jset->write_clock = cpu_to_le16(c->bucket_clock[WRITE].hand); jset->magic = cpu_to_le64(jset_magic(c)); - jset->version = cpu_to_le32(BCACHE_JSET_VERSION); + + jset->version = c->sb.version < bcachefs_metadata_version_new_versioning + ? cpu_to_le32(BCH_JSET_VERSION_OLD) + : cpu_to_le32(c->sb.version); SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN); SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c)); - if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) && + if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset))) + validate_before_checksum = true; + + if (le32_to_cpu(jset->version) < + bcachefs_metadata_version_bkey_renumber) + validate_before_checksum = true; + + if (validate_before_checksum && jset_validate_entries(c, jset, WRITE)) goto err; @@ -1212,7 +1234,7 @@ void bch2_journal_write(struct closure *cl) jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset); - if (!bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) && + if (!validate_before_checksum && jset_validate_entries(c, jset, WRITE)) goto err; diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 46878590327d..63fe8cbb0564 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -15,7 +15,7 @@ #include "replicas.h" #include "super-io.h" -static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e, +static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k, unsigned dev_idx, int flags, bool metadata) { unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas; @@ -23,9 +23,9 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e, unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED; unsigned nr_good; - bch2_extent_drop_device(e, dev_idx); + bch2_bkey_drop_device(k, dev_idx); - nr_good = bch2_extent_durability(c, e.c); + nr_good = bch2_bkey_durability(c, k.s_c); if ((!nr_good && !(flags & lost)) || (nr_good < replicas && !(flags & degraded))) return -EINVAL; @@ -36,7 +36,6 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e, static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) { struct bkey_s_c k; - struct bkey_s_extent e; BKEY_PADDED(key) tmp; struct btree_iter iter; int ret = 0; @@ -51,7 +50,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) !(ret = btree_iter_err(k))) { if (!bkey_extent_is_data(k.k) || !bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) { - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k); + ret = bch2_mark_bkey_replicas(c, k); if (ret) break; bch2_btree_iter_next(&iter); @@ -59,18 +58,18 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) } bkey_reassemble(&tmp.key, k); - e = bkey_i_to_s_extent(&tmp.key); - ret = drop_dev_ptrs(c, e, dev_idx, flags, false); + ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.key), + dev_idx, flags, false); if (ret) break; /* * If the new extent no longer has any pointers, bch2_extent_normalize() * will do the appropriate thing with it (turning it into a - * KEY_TYPE_ERROR key, or just a discard if it was a cached extent) + * KEY_TYPE_error key, or just a discard if it was a cached extent) */ - bch2_extent_normalize(c, e.s); + bch2_extent_normalize(c, bkey_i_to_s(&tmp.key)); iter.pos = bkey_start_pos(&tmp.key.k); @@ -118,10 +117,10 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) for (id = 0; id < BTREE_ID_NR; id++) { for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; - struct bkey_i_extent *new_key; + struct bkey_i_btree_ptr *new_key; retry: - if (!bch2_extent_has_device(bkey_i_to_s_c_extent(&b->key), - dev_idx)) { + if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key), + dev_idx)) { /* * we might have found a btree node key we * needed to update, and then tried to update it @@ -130,15 +129,14 @@ retry: */ bch2_btree_iter_downgrade(&iter); - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key)); + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); if (ret) goto err; } else { bkey_copy(&tmp.k, &b->key); - new_key = bkey_i_to_extent(&tmp.k); + new_key = bkey_i_to_btree_ptr(&tmp.k); - ret = drop_dev_ptrs(c, extent_i_to_s(new_key), + ret = drop_dev_ptrs(c, bkey_i_to_s(&new_key->k_i), dev_idx, flags, true); if (ret) goto err; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 7de3c6c475be..aff611c908ef 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -100,8 +100,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op) bch2_cut_back(insert->k.p, &new->k); if (m->data_cmd == DATA_REWRITE) - bch2_extent_drop_device(extent_i_to_s(insert), - m->data_opts.rewrite_dev); + bch2_bkey_drop_device(extent_i_to_s(insert).s, + m->data_opts.rewrite_dev); extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) { if (bch2_extent_has_device(extent_i_to_s_c(insert), p.ptr.dev)) { @@ -132,8 +132,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op) * has fewer replicas than when we last looked at it - meaning * we need to get a disk reservation here: */ - nr = bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)) - - (bch2_extent_nr_dirty_ptrs(k) + m->nr_ptrs_reserved); + nr = bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)) - + (bch2_bkey_nr_dirty_ptrs(k) + m->nr_ptrs_reserved); if (nr > 0) { /* * can't call bch2_disk_reservation_add() with btree @@ -243,7 +243,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, switch (data_cmd) { case DATA_ADD_REPLICAS: { int nr = (int) io_opts.data_replicas - - bch2_extent_nr_dirty_ptrs(k); + bch2_bkey_nr_dirty_ptrs(k); if (nr > 0) { m->op.nr_replicas = m->nr_ptrs_reserved = nr; @@ -477,7 +477,6 @@ int bch2_move_data(struct bch_fs *c, struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); BKEY_PADDED(k) tmp; struct bkey_s_c k; - struct bkey_s_c_extent e; struct data_opts data_opts; enum data_cmd data_cmd; u64 delay, cur_inum = U64_MAX; @@ -530,8 +529,6 @@ peek: if (!bkey_extent_is_data(k.k)) goto next_nondata; - e = bkey_s_c_to_extent(k); - if (cur_inum != k.k->p.inode) { struct bch_inode_unpacked inode; @@ -545,8 +542,7 @@ peek: goto peek; } - switch ((data_cmd = pred(c, arg, BKEY_TYPE_EXTENTS, e, - &io_opts, &data_opts))) { + switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) { case DATA_SKIP: goto next; case DATA_SCRUB: @@ -581,7 +577,7 @@ peek: if (rate) bch2_ratelimit_increment(rate, k.k->size); next: - atomic64_add(k.k->size * bch2_extent_nr_dirty_ptrs(k), + atomic64_add(k.k->size * bch2_bkey_nr_dirty_ptrs(k), &stats->sectors_seen); next_nondata: bch2_btree_iter_next(&stats->iter); @@ -613,7 +609,7 @@ static int bch2_gc_data_replicas(struct bch_fs *c) for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_PREFETCH, k) { - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k); + ret = bch2_mark_bkey_replicas(c, k); if (ret) break; } @@ -637,8 +633,7 @@ static int bch2_gc_btree_replicas(struct bch_fs *c) for (id = 0; id < BTREE_ID_NR; id++) { for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key)); + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); bch2_btree_iter_cond_resched(&iter); } @@ -668,10 +663,9 @@ static int bch2_move_btree(struct bch_fs *c, for (id = 0; id < BTREE_ID_NR; id++) { for_each_btree_node(&stats->iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { - switch ((cmd = pred(c, arg, BKEY_TYPE_BTREE, - bkey_i_to_s_c_extent(&b->key), - &io_opts, - &data_opts))) { + switch ((cmd = pred(c, arg, + bkey_i_to_s_c(&b->key), + &io_opts, &data_opts))) { case DATA_SKIP: goto next; case DATA_SCRUB: @@ -697,8 +691,7 @@ next: #if 0 static enum data_cmd scrub_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { @@ -707,33 +700,38 @@ static enum data_cmd scrub_pred(struct bch_fs *c, void *arg, #endif static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { - unsigned nr_good = bch2_extent_durability(c, e); - unsigned replicas = type == BKEY_TYPE_BTREE - ? c->opts.metadata_replicas - : io_opts->data_replicas; + unsigned nr_good = bch2_bkey_durability(c, k); + unsigned replicas = 0; + + switch (k.k->type) { + case KEY_TYPE_btree_ptr: + replicas = c->opts.metadata_replicas; + break; + case KEY_TYPE_extent: + replicas = io_opts->data_replicas; + break; + } if (!nr_good || nr_good >= replicas) return DATA_SKIP; data_opts->target = 0; - data_opts->btree_insert_flags = 0; + data_opts->btree_insert_flags = 0; return DATA_ADD_REPLICAS; } static enum data_cmd migrate_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { struct bch_ioctl_data *op = arg; - if (!bch2_extent_has_device(e, op->migrate.dev)) + if (!bch2_bkey_has_device(k, op->migrate.dev)) return DATA_SKIP; data_opts->target = 0; diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 3f7e31cc8f6e..71b3d2b2ddb6 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -47,7 +47,7 @@ int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *, struct bkey_s_c); typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *, - enum bkey_type, struct bkey_s_c_extent, + struct bkey_s_c, struct bch_io_opts *, struct data_opts *); int bch2_move_data(struct bch_fs *, struct bch_ratelimit *, diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 80577661e008..4bf4cc33dbb1 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -66,36 +66,42 @@ static int bucket_offset_cmp(const void *_l, const void *_r, size_t size) } static bool __copygc_pred(struct bch_dev *ca, - struct bkey_s_c_extent e) + struct bkey_s_c k) { copygc_heap *h = &ca->copygc_heap; - const struct bch_extent_ptr *ptr = - bch2_extent_has_device(e, ca->dev_idx); - if (ptr) { - struct copygc_heap_entry search = { .offset = ptr->offset }; + switch (k.k->type) { + case KEY_TYPE_extent: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + const struct bch_extent_ptr *ptr = + bch2_extent_has_device(e, ca->dev_idx); - ssize_t i = eytzinger0_find_le(h->data, h->used, - sizeof(h->data[0]), - bucket_offset_cmp, &search); + if (ptr) { + struct copygc_heap_entry search = { .offset = ptr->offset }; - return (i >= 0 && - ptr->offset < h->data[i].offset + ca->mi.bucket_size && - ptr->gen == h->data[i].gen); + ssize_t i = eytzinger0_find_le(h->data, h->used, + sizeof(h->data[0]), + bucket_offset_cmp, &search); + + return (i >= 0 && + ptr->offset < h->data[i].offset + ca->mi.bucket_size && + ptr->gen == h->data[i].gen); + } + break; + } } return false; } static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { struct bch_dev *ca = arg; - if (!__copygc_pred(ca, e)) + if (!__copygc_pred(ca, k)) return DATA_SKIP; data_opts->target = dev_to_target(ca->dev_idx); diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 80869e34e3b6..acdc952c48be 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -184,6 +184,9 @@ enum opt_type { OPT_BOOL(), \ NO_SB_OPT, false) \ BCH_OPT(no_data_io, u8, OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false) \ + BCH_OPT(version_upgrade, u8, OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false) diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index cc20742d542b..7c38daac1cac 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -22,23 +22,13 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { const char *bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_quota dq; - if (k.k->p.inode >= QTYP_NR) return "invalid quota type"; - switch (k.k->type) { - case BCH_QUOTA: { - dq = bkey_s_c_to_quota(k); + if (bkey_val_bytes(k.k) != sizeof(struct bch_quota)) + return "incorrect value size"; - if (bkey_val_bytes(k.k) != sizeof(struct bch_quota)) - return "incorrect value size"; - - return NULL; - } - default: - return "invalid type"; - } + return NULL; } static const char * const bch2_quota_counters[] = { @@ -49,20 +39,14 @@ static const char * const bch2_quota_counters[] = { void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_quota dq; + struct bkey_s_c_quota dq = bkey_s_c_to_quota(k); unsigned i; - switch (k.k->type) { - case BCH_QUOTA: - dq = bkey_s_c_to_quota(k); - - for (i = 0; i < Q_COUNTERS; i++) - pr_buf(out, "%s hardlimit %llu softlimit %llu", - bch2_quota_counters[i], - le64_to_cpu(dq.v->c[i].hardlimit), - le64_to_cpu(dq.v->c[i].softlimit)); - break; - } + for (i = 0; i < Q_COUNTERS; i++) + pr_buf(out, "%s hardlimit %llu softlimit %llu", + bch2_quota_counters[i], + le64_to_cpu(dq.v->c[i].hardlimit), + le64_to_cpu(dq.v->c[i].softlimit)); } #ifdef CONFIG_BCACHEFS_QUOTA @@ -178,7 +162,7 @@ static int bch2_quota_check_limit(struct bch_fs *c, BUG_ON((s64) n < 0); - if (mode == BCH_QUOTA_NOCHECK) + if (mode == KEY_TYPE_QUOTA_NOCHECK) return 0; if (v <= 0) { @@ -201,7 +185,7 @@ static int bch2_quota_check_limit(struct bch_fs *c, if (qc->hardlimit && qc->hardlimit < n && !ignore_hardlimit(q)) { - if (mode == BCH_QUOTA_PREALLOC) + if (mode == KEY_TYPE_QUOTA_PREALLOC) return -EDQUOT; prepare_warning(qc, qtype, counter, msgs, HARDWARN); @@ -212,7 +196,7 @@ static int bch2_quota_check_limit(struct bch_fs *c, qc->timer && ktime_get_real_seconds() >= qc->timer && !ignore_hardlimit(q)) { - if (mode == BCH_QUOTA_PREALLOC) + if (mode == KEY_TYPE_QUOTA_PREALLOC) return -EDQUOT; prepare_warning(qc, qtype, counter, msgs, SOFTLONGWARN); @@ -221,7 +205,7 @@ static int bch2_quota_check_limit(struct bch_fs *c, if (qc->softlimit && qc->softlimit < n && qc->timer == 0) { - if (mode == BCH_QUOTA_PREALLOC) + if (mode == KEY_TYPE_QUOTA_PREALLOC) return -EDQUOT; prepare_warning(qc, qtype, counter, msgs, SOFTWARN); @@ -312,13 +296,13 @@ int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes, ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC, dst_q[i]->c[Q_SPC].v + space, - BCH_QUOTA_PREALLOC); + KEY_TYPE_QUOTA_PREALLOC); if (ret) goto err; ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO, dst_q[i]->c[Q_INO].v + 1, - BCH_QUOTA_PREALLOC); + KEY_TYPE_QUOTA_PREALLOC); if (ret) goto err; } @@ -347,7 +331,7 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k) BUG_ON(k.k->p.inode >= QTYP_NR); switch (k.k->type) { - case BCH_QUOTA: + case KEY_TYPE_quota: dq = bkey_s_c_to_quota(k); q = &c->quotas[k.k->p.inode]; @@ -447,15 +431,15 @@ int bch2_fs_quota_read(struct bch_fs *c) for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, BTREE_ITER_PREFETCH, k) { switch (k.k->type) { - case BCH_INODE_FS: + case KEY_TYPE_inode: ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u); if (ret) return ret; bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors, - BCH_QUOTA_NOCHECK); + KEY_TYPE_QUOTA_NOCHECK); bch2_quota_acct(c, bch_qid(&u), Q_INO, 1, - BCH_QUOTA_NOCHECK); + KEY_TYPE_QUOTA_NOCHECK); } } return bch2_btree_iter_unlock(&iter) ?: ret; @@ -743,7 +727,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, return ret; switch (k.k->type) { - case BCH_QUOTA: + case KEY_TYPE_quota: new_quota.v = *bkey_s_c_to_quota(k).v; break; } diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h index 9c06eb07bccb..294a04db84bf 100644 --- a/fs/bcachefs/quota.h +++ b/fs/bcachefs/quota.h @@ -10,15 +10,15 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_quota; const char *bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_quota_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_quota (struct bkey_ops) { \ .key_invalid = bch2_quota_invalid, \ .val_to_text = bch2_quota_to_text, \ } enum quota_acct_mode { - BCH_QUOTA_PREALLOC, - BCH_QUOTA_WARN, - BCH_QUOTA_NOCHECK, + KEY_TYPE_QUOTA_PREALLOC, + KEY_TYPE_QUOTA_WARN, + KEY_TYPE_QUOTA_NOCHECK, }; static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u) diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 5d246c5b8186..eec74d4a5712 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -70,28 +70,34 @@ void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors) } static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; + switch (k.k->type) { + case KEY_TYPE_extent: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; - /* Make sure we have room to add a new pointer: */ - if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX > - BKEY_EXTENT_VAL_U64s_MAX) - return DATA_SKIP; + /* Make sure we have room to add a new pointer: */ + if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX > + BKEY_EXTENT_VAL_U64s_MAX) + return DATA_SKIP; - extent_for_each_ptr_decode(e, p, entry) - if (rebalance_ptr_pred(c, p, io_opts)) - goto found; + extent_for_each_ptr_decode(e, p, entry) + if (rebalance_ptr_pred(c, p, io_opts)) + goto found; - return DATA_SKIP; + return DATA_SKIP; found: - data_opts->target = io_opts->background_target; - data_opts->btree_insert_flags = 0; - return DATA_ADD_REPLICAS; + data_opts->target = io_opts->background_target; + data_opts->btree_insert_flags = 0; + return DATA_ADD_REPLICAS; + } + default: + return DATA_SKIP; + } } struct rebalance_work { diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 1cb0c9940ec1..172770606294 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -147,6 +147,10 @@ int bch2_fs_recovery(struct bch_fs *c) mutex_unlock(&c->sb_lock); goto err; } + + if (le16_to_cpu(c->disk_sb.sb->version) < + bcachefs_metadata_version_bkey_renumber) + bch2_sb_clean_renumber(clean, READ); } mutex_unlock(&c->sb_lock); @@ -265,12 +269,18 @@ int bch2_fs_recovery(struct bch_fs *c) if (ret) goto err; - if (!test_bit(BCH_FS_FSCK_UNFIXED_ERRORS, &c->flags)) { - mutex_lock(&c->sb_lock); - c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK; - mutex_unlock(&c->sb_lock); + mutex_lock(&c->sb_lock); + if (c->opts.version_upgrade) { + if (c->sb.version < bcachefs_metadata_version_new_versioning) + c->disk_sb.sb->version_min = + le16_to_cpu(bcachefs_metadata_version_min); + c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current); } + if (!test_bit(BCH_FS_FSCK_UNFIXED_ERRORS, &c->flags)) + c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK; + mutex_unlock(&c->sb_lock); + if (enabled_qtypes(c)) { bch_verbose(c, "reading quotas:"); ret = bch2_fs_quota_read(c); @@ -379,9 +389,12 @@ int bch2_fs_initialize(struct bch_fs *c) goto err; mutex_lock(&c->sb_lock); + c->disk_sb.sb->version = c->disk_sb.sb->version_min = + le16_to_cpu(bcachefs_metadata_version_current); + c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK; + SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); SET_BCH_SB_CLEAN(c->disk_sb.sb, false); - c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK; bch2_write_super(c); mutex_unlock(&c->sb_lock); diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 0296931b6b8c..77d175f34b2b 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -73,64 +73,57 @@ void bch2_cpu_replicas_to_text(struct printbuf *out, static void extent_to_replicas(struct bkey_s_c k, struct bch_replicas_entry *r) { - if (bkey_extent_is_data(k.k)) { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; - r->nr_required = 1; + r->nr_required = 1; - extent_for_each_ptr_decode(e, p, entry) { - if (p.ptr.cached) - continue; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.ptr.cached) + continue; - if (p.ec_nr) { - r->nr_devs = 0; - break; - } - - r->devs[r->nr_devs++] = p.ptr.dev; + if (p.ec_nr) { + r->nr_devs = 0; + break; } + + r->devs[r->nr_devs++] = p.ptr.dev; } } static void stripe_to_replicas(struct bkey_s_c k, struct bch_replicas_entry *r) { - if (k.k->type == BCH_STRIPE) { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - const struct bch_extent_ptr *ptr; + struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); + const struct bch_extent_ptr *ptr; - r->nr_required = s.v->nr_blocks - s.v->nr_redundant; + r->nr_required = s.v->nr_blocks - s.v->nr_redundant; - for (ptr = s.v->ptrs; - ptr < s.v->ptrs + s.v->nr_blocks; - ptr++) - r->devs[r->nr_devs++] = ptr->dev; - } + for (ptr = s.v->ptrs; + ptr < s.v->ptrs + s.v->nr_blocks; + ptr++) + r->devs[r->nr_devs++] = ptr->dev; } -static void bkey_to_replicas(enum bkey_type type, - struct bkey_s_c k, +static void bkey_to_replicas(struct bkey_s_c k, struct bch_replicas_entry *e) { e->nr_devs = 0; - switch (type) { - case BKEY_TYPE_BTREE: + switch (k.k->type) { + case KEY_TYPE_btree_ptr: e->data_type = BCH_DATA_BTREE; extent_to_replicas(k, e); break; - case BKEY_TYPE_EXTENTS: + case KEY_TYPE_extent: e->data_type = BCH_DATA_USER; extent_to_replicas(k, e); break; - case BKEY_TYPE_EC: + case KEY_TYPE_stripe: e->data_type = BCH_DATA_USER; stripe_to_replicas(k, e); break; - default: - break; } replicas_entry_sort(e); @@ -296,26 +289,21 @@ int bch2_mark_replicas(struct bch_fs *c, return __bch2_mark_replicas(c, &search.e); } -int bch2_mark_bkey_replicas(struct bch_fs *c, - enum bkey_type type, - struct bkey_s_c k) +int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) { struct bch_replicas_entry_padded search; + struct bch_devs_list cached = bch2_bkey_cached_devs(k); + unsigned i; int ret; memset(&search, 0, sizeof(search)); - if (type == BKEY_TYPE_EXTENTS) { - struct bch_devs_list cached = bch2_bkey_cached_devs(k); - unsigned i; + for (i = 0; i < cached.nr; i++) + if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED, + bch2_dev_list_single(cached.devs[i])))) + return ret; - for (i = 0; i < cached.nr; i++) - if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED, - bch2_dev_list_single(cached.devs[i])))) - return ret; - } - - bkey_to_replicas(type, k, &search.e); + bkey_to_replicas(k, &search.e); return search.e.nr_devs ? __bch2_mark_replicas(c, &search.e) @@ -719,26 +707,22 @@ bool bch2_replicas_marked(struct bch_fs *c, } bool bch2_bkey_replicas_marked(struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k, bool check_gc_replicas) { struct bch_replicas_entry_padded search; + struct bch_devs_list cached = bch2_bkey_cached_devs(k); + unsigned i; memset(&search, 0, sizeof(search)); - if (type == BKEY_TYPE_EXTENTS) { - struct bch_devs_list cached = bch2_bkey_cached_devs(k); - unsigned i; - - for (i = 0; i < cached.nr; i++) - if (!bch2_replicas_marked(c, BCH_DATA_CACHED, - bch2_dev_list_single(cached.devs[i]), - check_gc_replicas)) - return false; - } + for (i = 0; i < cached.nr; i++) + if (!bch2_replicas_marked(c, BCH_DATA_CACHED, + bch2_dev_list_single(cached.devs[i]), + check_gc_replicas)) + return false; - bkey_to_replicas(type, k, &search.e); + bkey_to_replicas(k, &search.e); return search.e.nr_devs ? replicas_has_entry(c, &search.e, check_gc_replicas) diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h index e22d2d7cd08a..03aaafdc7c17 100644 --- a/fs/bcachefs/replicas.h +++ b/fs/bcachefs/replicas.h @@ -6,12 +6,11 @@ bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type, struct bch_devs_list, bool); -bool bch2_bkey_replicas_marked(struct bch_fs *, enum bkey_type, +bool bch2_bkey_replicas_marked(struct bch_fs *, struct bkey_s_c, bool); int bch2_mark_replicas(struct bch_fs *, enum bch_data_type, struct bch_devs_list); -int bch2_mark_bkey_replicas(struct bch_fs *, enum bkey_type, - struct bkey_s_c); +int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c); void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *); diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index fbd6c3372677..6f30fbe44eb8 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -118,7 +118,6 @@ static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx, struct bch_hash_desc { enum btree_id btree_id; u8 key_type; - u8 whiteout_type; u64 (*hash_key)(const struct bch_hash_info *, const void *); u64 (*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c); @@ -149,7 +148,7 @@ bch2_hash_lookup(struct btree_trans *trans, if (k.k->type == desc.key_type) { if (!desc.cmp_key(k, key)) return iter; - } else if (k.k->type == desc.whiteout_type) { + } else if (k.k->type == KEY_TYPE_whiteout) { ; } else { /* hole, not found */ @@ -202,7 +201,7 @@ static inline int bch2_hash_needs_whiteout(struct btree_trans *trans, for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) { if (k.k->type != desc.key_type && - k.k->type != desc.whiteout_type) + k.k->type != KEY_TYPE_whiteout) return false; if (k.k->type == desc.key_type && @@ -245,7 +244,7 @@ static inline int __bch2_hash_set(struct btree_trans *trans, return PTR_ERR(slot); } - if (k.k->type != desc.whiteout_type) + if (k.k->type != KEY_TYPE_whiteout) goto not_found; } @@ -295,7 +294,7 @@ static inline int bch2_hash_delete_at(struct btree_trans *trans, bkey_init(&delete->k); delete->k.p = iter->pos; - delete->k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED; + delete->k.type = ret ? KEY_TYPE_whiteout : KEY_TYPE_deleted; bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, delete)); return 0; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 3dbcb6d7d261..dafdc45b442c 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -240,21 +240,25 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) struct bch_sb_field *f; struct bch_sb_field_members *mi; const char *err; + u32 version, version_min; u16 block_size; - if (le16_to_cpu(sb->version) < BCH_SB_VERSION_MIN || - le16_to_cpu(sb->version) > BCH_SB_VERSION_MAX) + version = le16_to_cpu(sb->version); + version_min = version >= bcachefs_metadata_version_new_versioning + ? le16_to_cpu(sb->version_min) + : version; + + if (version >= bcachefs_metadata_version_max || + version_min < bcachefs_metadata_version_min) return "Unsupported superblock version"; + if (version_min > version) + return "Bad minimum version"; + if (sb->features[1] || (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) return "Filesystem has incompatible features"; - if (le16_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX) { - SET_BCH_SB_ENCODED_EXTENT_MAX_BITS(sb, 7); - SET_BCH_SB_POSIX_ACL(sb, 1); - } - block_size = le16_to_cpu(sb->block_size); if (!is_power_of_2(block_size) || @@ -341,13 +345,6 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) return err; } - if (le16_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_NONCE_V1 && - bch2_sb_get_crypt(sb) && - BCH_SB_INITIALIZED(sb)) - return "Incompatible extent nonces"; - - sb->version = cpu_to_le16(BCH_SB_VERSION_MAX); - return NULL; } @@ -364,6 +361,7 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.uuid = src->uuid; c->sb.user_uuid = src->user_uuid; + c->sb.version = le16_to_cpu(src->version); c->sb.nr_devices = src->nr_devices; c->sb.clean = BCH_SB_CLEAN(src); c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src); @@ -385,6 +383,7 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) unsigned i; dst->version = src->version; + dst->version_min = src->version_min; dst->seq = src->seq; dst->uuid = src->uuid; dst->user_uuid = src->user_uuid; @@ -483,8 +482,8 @@ reread: !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC)) return "Not a bcachefs superblock"; - if (le16_to_cpu(sb->sb->version) < BCH_SB_VERSION_MIN || - le16_to_cpu(sb->sb->version) > BCH_SB_VERSION_MAX) + if (le16_to_cpu(sb->sb->version) < bcachefs_metadata_version_min || + le16_to_cpu(sb->sb->version) >= bcachefs_metadata_version_max) return "Unsupported superblock version"; bytes = vstruct_bytes(sb->sb); @@ -846,12 +845,6 @@ static const char *bch2_sb_validate_members(struct bch_sb *sb, return "bucket size smaller than btree node size"; } - if (le16_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX) - for (m = mi->members; - m < mi->members + sb->nr_devices; - m++) - SET_BCH_MEMBER_DATA_ALLOWED(m, ~0); - return NULL; } @@ -881,6 +874,16 @@ static const struct bch_sb_field_ops bch_sb_field_ops_crypt = { /* BCH_SB_FIELD_clean: */ +void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write) +{ + struct jset_entry *entry; + + for (entry = clean->start; + entry < (struct jset_entry *) vstruct_end(&clean->field); + entry = vstruct_next(entry)) + bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write); +} + void bch2_fs_mark_clean(struct bch_fs *c, bool clean) { struct bch_sb_field_clean *sb_clean; @@ -935,6 +938,10 @@ void bch2_fs_mark_clean(struct bch_fs *c, bool clean) BUG_ON(entry != vstruct_end(&sb_clean->field)); + if (le16_to_cpu(c->disk_sb.sb->version) < + bcachefs_metadata_version_bkey_renumber) + bch2_sb_clean_renumber(sb_clean, WRITE); + mutex_unlock(&c->btree_root_lock); write_super: bch2_write_super(c); diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index aa618fe9cd22..ac3b704f0540 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -135,6 +135,8 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) /* BCH_SB_FIELD_clean: */ +void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int); + void bch2_fs_mark_clean(struct bch_fs *, bool); void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index cadbc5481bcb..7405b5cdd1bf 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1799,7 +1799,7 @@ err: BCH_DEBUG_PARAMS() #undef BCH_DEBUG_PARAM -unsigned bch2_metadata_version = BCH_SB_VERSION_MAX; +unsigned bch2_metadata_version = bcachefs_metadata_version_current; module_param_named(version, bch2_metadata_version, uint, 0400); module_exit(bcachefs_exit); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 8eacc0d2550b..7e46b254da38 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -277,7 +277,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) return -EPERM; for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k) - if (k.k->type == BCH_EXTENT) { + if (k.k->type == KEY_TYPE_extent) { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 9730540f7375..1aa6ac05d50e 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -165,7 +165,7 @@ TRACE_EVENT(btree_write, TP_ARGS(b, bytes, sectors), TP_STRUCT__entry( - __field(enum bkey_type, type) + __field(enum btree_node_type, type) __field(unsigned, bytes ) __field(unsigned, sectors ) ), diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index ab358c434753..ff2d59ee1658 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -62,8 +62,7 @@ static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) const struct bch_hash_desc bch2_xattr_hash_desc = { .btree_id = BTREE_ID_XATTRS, - .key_type = BCH_XATTR, - .whiteout_type = BCH_XATTR_WHITEOUT, + .key_type = KEY_TYPE_xattr, .hash_key = xattr_hash_key, .hash_bkey = xattr_hash_bkey, .cmp_key = xattr_cmp_key, @@ -73,71 +72,50 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k) { const struct xattr_handler *handler; - struct bkey_s_c_xattr xattr; - - switch (k.k->type) { - case BCH_XATTR: - if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr)) - return "value too small"; - - xattr = bkey_s_c_to_xattr(k); + struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); - if (bkey_val_u64s(k.k) < - xattr_val_u64s(xattr.v->x_name_len, - le16_to_cpu(xattr.v->x_val_len))) - return "value too small"; + if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr)) + return "value too small"; - if (bkey_val_u64s(k.k) > - xattr_val_u64s(xattr.v->x_name_len, - le16_to_cpu(xattr.v->x_val_len) + 4)) - return "value too big"; + if (bkey_val_u64s(k.k) < + xattr_val_u64s(xattr.v->x_name_len, + le16_to_cpu(xattr.v->x_val_len))) + return "value too small"; - handler = bch2_xattr_type_to_handler(xattr.v->x_type); - if (!handler) - return "invalid type"; + if (bkey_val_u64s(k.k) > + xattr_val_u64s(xattr.v->x_name_len, + le16_to_cpu(xattr.v->x_val_len) + 4)) + return "value too big"; - if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len)) - return "xattr name has invalid characters"; + handler = bch2_xattr_type_to_handler(xattr.v->x_type); + if (!handler) + return "invalid type"; - return NULL; - case BCH_XATTR_WHITEOUT: - return bkey_val_bytes(k.k) != 0 - ? "value size should be zero" - : NULL; + if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len)) + return "xattr name has invalid characters"; - default: - return "invalid type"; - } + return NULL; } void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { const struct xattr_handler *handler; - struct bkey_s_c_xattr xattr; + struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); - switch (k.k->type) { - case BCH_XATTR: - xattr = bkey_s_c_to_xattr(k); + handler = bch2_xattr_type_to_handler(xattr.v->x_type); + if (handler && handler->prefix) + pr_buf(out, "%s", handler->prefix); + else if (handler) + pr_buf(out, "(type %u)", xattr.v->x_type); + else + pr_buf(out, "(unknown type %u)", xattr.v->x_type); - handler = bch2_xattr_type_to_handler(xattr.v->x_type); - if (handler && handler->prefix) - pr_buf(out, "%s", handler->prefix); - else if (handler) - pr_buf(out, "(type %u)", xattr.v->x_type); - else - pr_buf(out, "(unknown type %u)", xattr.v->x_type); - - bch_scnmemcpy(out, xattr.v->x_name, - xattr.v->x_name_len); - pr_buf(out, ":"); - bch_scnmemcpy(out, xattr_val(xattr.v), - le16_to_cpu(xattr.v->x_val_len)); - break; - case BCH_XATTR_WHITEOUT: - pr_buf(out, "whiteout"); - break; - } + bch_scnmemcpy(out, xattr.v->x_name, + xattr.v->x_name_len); + pr_buf(out, ":"); + bch_scnmemcpy(out, xattr_val(xattr.v), + le16_to_cpu(xattr.v->x_val_len)); } int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, @@ -261,7 +239,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (k.k->p.inode > inum) break; - if (k.k->type != BCH_XATTR) + if (k.k->type != KEY_TYPE_xattr) continue; xattr = bkey_s_c_to_xattr(k).v; @@ -315,7 +293,7 @@ static const struct xattr_handler bch_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, .get = bch2_xattr_get_handler, .set = bch2_xattr_set_handler, - .flags = BCH_XATTR_INDEX_USER, + .flags = KEY_TYPE_XATTR_INDEX_USER, }; static bool bch2_xattr_trusted_list(struct dentry *dentry) @@ -328,14 +306,14 @@ static const struct xattr_handler bch_xattr_trusted_handler = { .list = bch2_xattr_trusted_list, .get = bch2_xattr_get_handler, .set = bch2_xattr_set_handler, - .flags = BCH_XATTR_INDEX_TRUSTED, + .flags = KEY_TYPE_XATTR_INDEX_TRUSTED, }; static const struct xattr_handler bch_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, .get = bch2_xattr_get_handler, .set = bch2_xattr_set_handler, - .flags = BCH_XATTR_INDEX_SECURITY, + .flags = KEY_TYPE_XATTR_INDEX_SECURITY, }; #ifndef NO_BCACHEFS_FS @@ -474,13 +452,13 @@ const struct xattr_handler *bch2_xattr_handlers[] = { }; static const struct xattr_handler *bch_xattr_handler_map[] = { - [BCH_XATTR_INDEX_USER] = &bch_xattr_user_handler, - [BCH_XATTR_INDEX_POSIX_ACL_ACCESS] = + [KEY_TYPE_XATTR_INDEX_USER] = &bch_xattr_user_handler, + [KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access, - [BCH_XATTR_INDEX_POSIX_ACL_DEFAULT] = + [KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT] = &nop_posix_acl_default, - [BCH_XATTR_INDEX_TRUSTED] = &bch_xattr_trusted_handler, - [BCH_XATTR_INDEX_SECURITY] = &bch_xattr_security_handler, + [KEY_TYPE_XATTR_INDEX_TRUSTED] = &bch_xattr_trusted_handler, + [KEY_TYPE_XATTR_INDEX_SECURITY] = &bch_xattr_security_handler, }; static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned type) diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index 63be44b02a2b..4151065ab853 100644 --- a/fs/bcachefs/xattr.h +++ b/fs/bcachefs/xattr.h @@ -9,7 +9,7 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc; const char *bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_xattr_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_xattr (struct bkey_ops) { \ .key_invalid = bch2_xattr_invalid, \ .val_to_text = bch2_xattr_to_text, \ } -- cgit v1.2.3 From f0cfb963ec0370b021bb21c899b5fdcd020014cf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 29 Nov 2018 02:14:31 -0500 Subject: bcachefs: Track nr_inodes with the key marking machinery Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 3 --- fs/bcachefs/btree_gc.c | 1 + fs/bcachefs/btree_types.h | 1 + fs/bcachefs/buckets.c | 6 ++++++ fs/bcachefs/buckets_types.h | 2 ++ fs/bcachefs/fs.c | 7 +------ fs/bcachefs/fsck.c | 8 -------- fs/bcachefs/recovery.c | 2 -- 8 files changed, 11 insertions(+), 19 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index cd2fff851bbe..d774ddf6cbb3 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -721,9 +721,6 @@ struct bch_fs { struct mutex fsck_error_lock; bool fsck_alloc_err; - /* FILESYSTEM */ - atomic_long_t nr_inodes; - /* QUOTAS */ struct bch_memquota_type quotas[QTYP_NR]; diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 85fc181e76a8..65cf64f22522 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -685,6 +685,7 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) for (b = 0; b < BCH_DATA_NR; b++) copy_fs_field(buckets[b], "buckets[%s]", bch2_data_types[b]); + copy_fs_field(nr_inodes, "nr_inodes"); for_each_possible_cpu(cpu) { p = per_cpu_ptr(c->usage[0], cpu); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index b4a826369a57..f34f340ff034 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -439,6 +439,7 @@ static inline bool btree_node_type_needs_gc(enum btree_node_type type) switch (type) { case BKEY_TYPE_BTREE: case BKEY_TYPE_EXTENTS: + case BKEY_TYPE_INODES: case BKEY_TYPE_EC: return true; default: diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index d08e95020cef..8cbc1c5c8af5 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -862,6 +862,12 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, ret = bch2_mark_stripe(c, k, inserting, stats, journal_seq, flags, gc); break; + case KEY_TYPE_alloc: + if (inserting) + stats->nr_inodes++; + else + stats->nr_inodes--; + break; case KEY_TYPE_reservation: { unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 0187f465d23f..9f7812c69bbc 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -73,6 +73,8 @@ struct bch_fs_usage { u64 buckets[BCH_DATA_NR]; + u64 nr_inodes; + /* fields starting here aren't touched by gc: */ u64 online_reserved; u64 available_cache; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 93e1f3aaacd4..db3c5962ad31 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -400,8 +400,6 @@ retry: if (unlikely(ret)) goto err_trans; - atomic_long_inc(&c->nr_inodes); - if (!tmpfile) { bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); @@ -1418,9 +1416,6 @@ static void bch2_evict_inode(struct inode *vinode) bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, KEY_TYPE_QUOTA_WARN); bch2_inode_rm(c, inode->v.i_ino); - - WARN_ONCE(atomic_long_dec_return(&c->nr_inodes) < 0, - "nr_inodes < 0"); } } @@ -1439,7 +1434,7 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = (c->capacity - hidden_metadata) >> shift; buf->f_bfree = (c->capacity - bch2_fs_sectors_used(c, usage)) >> shift; buf->f_bavail = buf->f_bfree; - buf->f_files = atomic_long_read(&c->nr_inodes); + buf->f_files = usage.nr_inodes; buf->f_ffree = U64_MAX; fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^ diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 57ab8f088415..810e1c3f4c49 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1314,9 +1314,6 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); BUG_ON(ret == -EINTR); if (ret) break; - - if (link->count) - atomic_long_inc(&c->nr_inodes); } else { /* Should have been caught by dirents pass: */ need_fsck_err_on(link->count, c, @@ -1380,7 +1377,6 @@ static int check_inodes_fast(struct bch_fs *c) struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_inode inode; - unsigned long nr_inodes = 0; int ret = 0; for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) { @@ -1389,9 +1385,6 @@ static int check_inodes_fast(struct bch_fs *c) inode = bkey_s_c_to_inode(k); - if (!(inode.v->bi_flags & BCH_INODE_UNLINKED)) - nr_inodes++; - if (inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY| BCH_INODE_I_SECTORS_DIRTY| @@ -1405,7 +1398,6 @@ static int check_inodes_fast(struct bch_fs *c) break; } } - atomic_long_set(&c->nr_inodes, nr_inodes); fsck_err: return bch2_btree_iter_unlock(&iter) ?: ret; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 172770606294..2d0736caa5ef 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -375,8 +375,6 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; - atomic_long_set(&c->nr_inodes, 2); - if (enabled_qtypes(c)) { ret = bch2_fs_quota_read(c); if (ret) -- cgit v1.2.3 From 5663a4152138fdf23a300934128d77a1bf784237 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 27 Nov 2018 08:23:22 -0500 Subject: bcachefs: refactor bch_fs_usage Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 2 +- fs/bcachefs/bcachefs.h | 10 +++- fs/bcachefs/buckets.c | 131 ++++++++++++++++++++++------------------- fs/bcachefs/buckets.h | 42 +++++++------ fs/bcachefs/buckets_types.h | 14 +++-- fs/bcachefs/chardev.c | 2 +- fs/bcachefs/fs.c | 8 +-- fs/bcachefs/super.c | 2 + 8 files changed, 119 insertions(+), 92 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 3e77af4305a5..36aa7a5f2806 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -721,7 +721,7 @@ static struct write_point *__writepoint_find(struct hlist_head *head, static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor) { u64 stranded = c->write_points_nr * c->bucket_size_max; - u64 free = bch2_fs_sectors_free(c, bch2_fs_usage_read(c)); + u64 free = bch2_fs_sectors_free(c); return stranded * factor > free; } diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 512498c275a5..92a0ecd8fbc3 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -503,6 +503,10 @@ enum bch_fs_state { BCH_FS_RW, }; +struct bch_fs_pcpu { + u64 sectors_available; +}; + struct bch_fs { struct closure cl; @@ -615,9 +619,11 @@ struct bch_fs { atomic64_t sectors_available; - struct bch_fs_usage __percpu *usage[2]; + struct bch_fs_pcpu __percpu *pcpu; + + struct bch_fs_usage __percpu *usage[2]; - struct percpu_rw_semaphore mark_lock; + struct percpu_rw_semaphore mark_lock; /* * When we invalidate buckets, we use both the priority and the amount diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index ab68c5138ade..c53d7a030832 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -272,16 +272,31 @@ static u64 avail_factor(u64 r) return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1); } -static inline u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats) +static inline u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage fs_usage) { - struct fs_usage_sum sum = __fs_usage_sum(stats); + struct fs_usage_sum sum = __fs_usage_sum(fs_usage); return sum.hidden + sum.data + reserve_factor(sum.reserved); } -u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats) +u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage fs_usage) { - return min(c->capacity, __bch2_fs_sectors_used(c, stats)); + return min(c->capacity, __bch2_fs_sectors_used(c, fs_usage)); +} + +struct bch_fs_usage_short +bch2_fs_usage_read_short(struct bch_fs *c) +{ + struct bch_fs_usage usage = bch2_fs_usage_read(c); + struct fs_usage_sum sum = __fs_usage_sum(usage); + struct bch_fs_usage_short ret; + + ret.capacity = READ_ONCE(c->capacity) - sum.hidden; + ret.used = min(ret.capacity, sum.data + + reserve_factor(sum.reserved)); + ret.nr_inodes = usage.nr_inodes; + + return ret; } static inline int is_unavailable_bucket(struct bucket_mark m) @@ -315,11 +330,11 @@ static bool bucket_became_unavailable(struct bucket_mark old, } void bch2_fs_usage_apply(struct bch_fs *c, - struct bch_fs_usage *stats, + struct bch_fs_usage *fs_usage, struct disk_reservation *disk_res, struct gc_pos gc_pos) { - struct fs_usage_sum sum = __fs_usage_sum(*stats); + struct fs_usage_sum sum = __fs_usage_sum(*fs_usage); s64 added = sum.data + sum.reserved; s64 should_not_have_added; @@ -337,24 +352,20 @@ void bch2_fs_usage_apply(struct bch_fs *c, } if (added > 0) { - disk_res->sectors -= added; - stats->online_reserved -= added; + disk_res->sectors -= added; + fs_usage->online_reserved -= added; } preempt_disable(); - /* online_reserved not subject to gc: */ - this_cpu_add(c->usage[0]->online_reserved, stats->online_reserved); - stats->online_reserved = 0; - - bch2_usage_add(this_cpu_ptr(c->usage[0]), stats); + bch2_usage_add(this_cpu_ptr(c->usage[0]), fs_usage); if (gc_visited(c, gc_pos)) - bch2_usage_add(this_cpu_ptr(c->usage[1]), stats); + bch2_usage_add(this_cpu_ptr(c->usage[1]), fs_usage); bch2_fs_stats_verify(c); preempt_enable(); - memset(stats, 0, sizeof(*stats)); + memset(fs_usage, 0, sizeof(*fs_usage)); } static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, @@ -435,11 +446,11 @@ static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, size_t b, struct bucket_mark *old, bool gc) { - struct bch_fs_usage *stats = this_cpu_ptr(c->usage[gc]); + struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]); struct bucket *g = __bucket(ca, b, gc); struct bucket_mark new; - *old = bucket_data_cmpxchg(c, ca, stats, g, new, ({ + *old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ BUG_ON(!is_available_bucket(new)); new.owned_by_allocator = 1; @@ -449,7 +460,7 @@ static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, new.gen++; })); - stats->replicas[0].data[BCH_DATA_CACHED] -= old->cached_sectors; + fs_usage->replicas[0].data[BCH_DATA_CACHED] -= old->cached_sectors; } void bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, @@ -468,11 +479,11 @@ static void __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, size_t b, bool owned_by_allocator, bool gc) { - struct bch_fs_usage *stats = this_cpu_ptr(c->usage[gc]); + struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]); struct bucket *g = __bucket(ca, b, gc); struct bucket_mark old, new; - old = bucket_data_cmpxchg(c, ca, stats, g, new, ({ + old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ new.owned_by_allocator = owned_by_allocator; })); @@ -588,7 +599,7 @@ static void bch2_mark_pointer(struct bch_fs *c, struct extent_ptr_decoded p, s64 sectors, enum bch_data_type data_type, struct bch_fs_usage *fs_usage, - u64 journal_seq, unsigned flags, + unsigned journal_seq, unsigned flags, bool gc) { struct bucket_mark old, new; @@ -693,8 +704,8 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c, static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, s64 sectors, enum bch_data_type data_type, - struct bch_fs_usage *stats, - u64 journal_seq, unsigned flags, + struct bch_fs_usage *fs_usage, + unsigned journal_seq, unsigned flags, bool gc) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -717,7 +728,7 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, s64 adjusted_disk_sectors = disk_sectors; bch2_mark_pointer(c, p, disk_sectors, data_type, - stats, journal_seq, flags, gc); + fs_usage, journal_seq, flags, gc); if (!p.ptr.cached) for (i = 0; i < p.ec_nr; i++) { @@ -740,13 +751,13 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, } replicas = clamp_t(unsigned, replicas, - 1, ARRAY_SIZE(stats->replicas)); + 1, ARRAY_SIZE(fs_usage->replicas)); ec_redundancy = clamp_t(unsigned, ec_redundancy, - 1, ARRAY_SIZE(stats->replicas)); + 1, ARRAY_SIZE(fs_usage->replicas)); - stats->replicas[0].data[BCH_DATA_CACHED] += cached_sectors; - stats->replicas[replicas - 1].data[data_type] += dirty_sectors; - stats->replicas[ec_redundancy - 1].ec_data += ec_sectors; + fs_usage->replicas[0].data[BCH_DATA_CACHED] += cached_sectors; + fs_usage->replicas[replicas - 1].data[data_type] += dirty_sectors; + fs_usage->replicas[ec_redundancy - 1].ec_data += ec_sectors; return 0; } @@ -831,8 +842,8 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k, static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, bool inserting, s64 sectors, - struct bch_fs_usage *stats, - u64 journal_seq, unsigned flags, + struct bch_fs_usage *fs_usage, + unsigned journal_seq, unsigned flags, bool gc) { int ret = 0; @@ -843,30 +854,30 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, ? c->opts.btree_node_size : -c->opts.btree_node_size, BCH_DATA_BTREE, - stats, journal_seq, flags, gc); + fs_usage, journal_seq, flags, gc); break; case KEY_TYPE_extent: ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER, - stats, journal_seq, flags, gc); + fs_usage, journal_seq, flags, gc); break; case KEY_TYPE_stripe: ret = bch2_mark_stripe(c, k, inserting, - stats, journal_seq, flags, gc); + fs_usage, journal_seq, flags, gc); break; case KEY_TYPE_alloc: if (inserting) - stats->nr_inodes++; + fs_usage->nr_inodes++; else - stats->nr_inodes--; + fs_usage->nr_inodes--; break; case KEY_TYPE_reservation: { unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; sectors *= replicas; replicas = clamp_t(unsigned, replicas, - 1, ARRAY_SIZE(stats->replicas)); + 1, ARRAY_SIZE(fs_usage->replicas)); - stats->replicas[replicas - 1].persistent_reserved += sectors; + fs_usage->replicas[replicas - 1].persistent_reserved += sectors; break; } default: @@ -880,17 +891,15 @@ int bch2_mark_key_locked(struct bch_fs *c, struct bkey_s_c k, bool inserting, s64 sectors, struct gc_pos pos, - struct bch_fs_usage *stats, + struct bch_fs_usage *fs_usage, u64 journal_seq, unsigned flags) { int ret; if (!(flags & BCH_BUCKET_MARK_GC)) { - if (!stats) - stats = this_cpu_ptr(c->usage[0]); - ret = __bch2_mark_key(c, k, inserting, sectors, - stats, journal_seq, flags, false); + fs_usage ?: this_cpu_ptr(c->usage[0]), + journal_seq, flags, false); if (ret) return ret; } @@ -910,14 +919,14 @@ int bch2_mark_key_locked(struct bch_fs *c, int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, bool inserting, s64 sectors, struct gc_pos pos, - struct bch_fs_usage *stats, + struct bch_fs_usage *fs_usage, u64 journal_seq, unsigned flags) { int ret; percpu_down_read(&c->mark_lock); ret = bch2_mark_key_locked(c, k, inserting, sectors, - pos, stats, journal_seq, flags); + pos, fs_usage, journal_seq, flags); percpu_up_read(&c->mark_lock); return ret; @@ -930,7 +939,7 @@ void bch2_mark_update(struct btree_insert *trans, struct btree_iter *iter = insert->iter; struct btree *b = iter->l[0].b; struct btree_node_iter node_iter = iter->l[0].iter; - struct bch_fs_usage stats = { 0 }; + struct bch_fs_usage fs_usage = { 0 }; struct gc_pos pos = gc_pos_btree_node(b); struct bkey_packed *_k; @@ -943,7 +952,7 @@ void bch2_mark_update(struct btree_insert *trans, bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true, bpos_min(insert->k->k.p, b->key.k.p).offset - bkey_start_offset(&insert->k->k), - pos, &stats, trans->journal_res.seq, 0); + pos, &fs_usage, trans->journal_res.seq, 0); while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b, KEY_TYPE_discard))) { @@ -976,7 +985,7 @@ void bch2_mark_update(struct btree_insert *trans, BUG_ON(sectors <= 0); bch2_mark_key_locked(c, k, true, sectors, - pos, &stats, trans->journal_res.seq, 0); + pos, &fs_usage, trans->journal_res.seq, 0); sectors = bkey_start_offset(&insert->k->k) - k.k->p.offset; @@ -987,12 +996,12 @@ void bch2_mark_update(struct btree_insert *trans, } bch2_mark_key_locked(c, k, false, sectors, - pos, &stats, trans->journal_res.seq, 0); + pos, &fs_usage, trans->journal_res.seq, 0); bch2_btree_node_iter_advance(&node_iter, b); } - bch2_fs_usage_apply(c, &stats, trans->disk_res, pos); + bch2_fs_usage_apply(c, &fs_usage, trans->disk_res, pos); percpu_up_read(&c->mark_lock); } @@ -1004,9 +1013,9 @@ static u64 bch2_recalc_sectors_available(struct bch_fs *c) int cpu; for_each_possible_cpu(cpu) - per_cpu_ptr(c->usage[0], cpu)->available_cache = 0; + per_cpu_ptr(c->pcpu, cpu)->sectors_available = 0; - return avail_factor(bch2_fs_sectors_free(c, bch2_fs_usage_read(c))); + return avail_factor(bch2_fs_sectors_free(c)); } void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res) @@ -1026,16 +1035,16 @@ void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res) int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, unsigned sectors, int flags) { - struct bch_fs_usage *stats; + struct bch_fs_pcpu *pcpu; u64 old, v, get; s64 sectors_available; int ret; percpu_down_read(&c->mark_lock); preempt_disable(); - stats = this_cpu_ptr(c->usage[0]); + pcpu = this_cpu_ptr(c->pcpu); - if (sectors <= stats->available_cache) + if (sectors <= pcpu->sectors_available) goto out; v = atomic64_read(&c->sectors_available); @@ -1051,12 +1060,12 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, } while ((v = atomic64_cmpxchg(&c->sectors_available, old, old - get)) != old); - stats->available_cache += get; + pcpu->sectors_available += get; out: - stats->available_cache -= sectors; - stats->online_reserved += sectors; - res->sectors += sectors; + pcpu->sectors_available -= sectors; + this_cpu_add(c->usage[0]->online_reserved, sectors); + res->sectors += sectors; bch2_disk_reservations_verify(c, flags); bch2_fs_stats_verify(c); @@ -1089,8 +1098,8 @@ recalculate: (flags & BCH_DISK_RESERVATION_NOFAIL)) { atomic64_set(&c->sectors_available, max_t(s64, 0, sectors_available - sectors)); - stats->online_reserved += sectors; - res->sectors += sectors; + this_cpu_add(c->usage[0]->online_reserved, sectors); + res->sectors += sectors; ret = 0; bch2_disk_reservations_verify(c, flags); diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index d76e65316245..3db0e3b8a180 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -164,6 +164,20 @@ static inline bool bucket_unused(struct bucket_mark mark) !bucket_sectors_used(mark); } +static inline bool is_available_bucket(struct bucket_mark mark) +{ + return (!mark.owned_by_allocator && + !mark.dirty_sectors && + !mark.stripe); +} + +static inline bool bucket_needs_journal_commit(struct bucket_mark m, + u16 last_seq_ondisk) +{ + return m.journal_seq_valid && + ((s16) m.journal_seq - (s16) last_seq_ondisk > 0); +} + /* Device usage: */ struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *, bool); @@ -207,31 +221,21 @@ static inline u64 dev_buckets_free(struct bch_fs *c, struct bch_dev *ca) struct bch_fs_usage __bch2_fs_usage_read(struct bch_fs *, bool); struct bch_fs_usage bch2_fs_usage_read(struct bch_fs *); -void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *, - struct disk_reservation *, struct gc_pos); u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage); -static inline u64 bch2_fs_sectors_free(struct bch_fs *c, - struct bch_fs_usage stats) -{ - return c->capacity - bch2_fs_sectors_used(c, stats); -} +struct bch_fs_usage_short +bch2_fs_usage_read_short(struct bch_fs *); -static inline bool is_available_bucket(struct bucket_mark mark) +static inline u64 bch2_fs_sectors_free(struct bch_fs *c) { - return (!mark.owned_by_allocator && - !mark.dirty_sectors && - !mark.stripe); -} + struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c); -static inline bool bucket_needs_journal_commit(struct bucket_mark m, - u16 last_seq_ondisk) -{ - return m.journal_seq_valid && - ((s16) m.journal_seq - (s16) last_seq_ondisk > 0); + return usage.capacity - usage.used; } +/* key/bucket marking: */ + void bch2_bucket_seq_cleanup(struct bch_fs *); void bch2_invalidate_bucket(struct bch_fs *, struct bch_dev *, @@ -252,6 +256,10 @@ int bch2_mark_key(struct bch_fs *, struct bkey_s_c, bool, s64, struct gc_pos, struct bch_fs_usage *, u64, unsigned); void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *); +void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *, + struct disk_reservation *, struct gc_pos); + +/* disk reservations: */ void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *); diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 35d5cf48003b..f451a96f432c 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -73,18 +73,22 @@ struct bch_fs_usage { u64 nr_inodes; - /* fields starting here aren't touched by gc: */ u64 online_reserved; - u64 available_cache; +}; + +struct bch_fs_usage_short { + u64 capacity; + u64 used; + u64 nr_inodes; }; /* * A reservation for space on disk: */ struct disk_reservation { - u64 sectors; - u32 gen; - unsigned nr_replicas; + u64 sectors; + u32 gen; + unsigned nr_replicas; }; struct copygc_heap_entry { diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index d24cff52ba96..c11f8f4d24cf 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -306,7 +306,7 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, .p.btree_id = ctx->stats.iter.btree_id, .p.pos = ctx->stats.iter.pos, .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen), - .p.sectors_total = bch2_fs_sectors_used(c, bch2_fs_usage_read(c)), + .p.sectors_total = bch2_fs_usage_read_short(c).used, }; if (len < sizeof(e)) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index db3c5962ad31..8f0b049aa1ec 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1423,16 +1423,14 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct bch_fs *c = sb->s_fs_info; - struct bch_fs_usage usage = bch2_fs_usage_read(c); - u64 hidden_metadata = usage.buckets[BCH_DATA_SB] + - usage.buckets[BCH_DATA_JOURNAL]; + struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c); unsigned shift = sb->s_blocksize_bits - 9; u64 fsid; buf->f_type = BCACHEFS_STATFS_MAGIC; buf->f_bsize = sb->s_blocksize; - buf->f_blocks = (c->capacity - hidden_metadata) >> shift; - buf->f_bfree = (c->capacity - bch2_fs_sectors_used(c, usage)) >> shift; + buf->f_blocks = usage.capacity >> shift; + buf->f_bfree = (usage.capacity - usage.used) >> shift; buf->f_bavail = buf->f_bfree; buf->f_files = usage.nr_inodes; buf->f_ffree = U64_MAX; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 3887e63c0756..b2113c5426ca 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -376,6 +376,7 @@ static void bch2_fs_free(struct bch_fs *c) bch2_fs_compress_exit(c); percpu_free_rwsem(&c->mark_lock); free_percpu(c->usage[0]); + free_percpu(c->pcpu); mempool_exit(&c->btree_iters_pool); mempool_exit(&c->btree_bounce_pool); bioset_exit(&c->btree_bio); @@ -612,6 +613,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) offsetof(struct btree_write_bio, wbio.bio)), BIOSET_NEED_BVECS) || !(c->usage[0] = alloc_percpu(struct bch_fs_usage)) || + !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) || mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, btree_bytes(c)) || mempool_init_kmalloc_pool(&c->btree_iters_pool, 1, -- cgit v1.2.3 From 0f5254aa98befa5187cc4d02584ab0f19d18ff68 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Dec 2018 05:43:00 -0500 Subject: bcachefs: bch2_fs_quota_transfer improve quota transfer locking & make ei_qid usage more consistent Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-ioctl.c | 15 +++-------- fs/bcachefs/fs.c | 67 ++++++++++++++++++++++++++++++++--------------- fs/bcachefs/fs.h | 6 +++++ fs/bcachefs/quota.c | 7 ++--- fs/bcachefs/quota.h | 11 +++----- fs/bcachefs/quota_types.h | 6 +++++ 6 files changed, 68 insertions(+), 44 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index a89786f295cf..701882ce6024 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -108,21 +108,12 @@ static int bch2_set_projid(struct bch_fs *c, u32 projid) { struct bch_qid qid = inode->ei_qid; - int ret; - - if (projid == inode->ei_qid.q[QTYP_PRJ]) - return 0; qid.q[QTYP_PRJ] = projid; - return bch2_quota_transfer(c, 1 << QTYP_PRJ, qid, inode->ei_qid, - inode->v.i_blocks + - inode->ei_quota_reserved); - if (ret) - return ret; - - inode->ei_qid.q[QTYP_PRJ] = projid; - return 0; + return bch2_fs_quota_transfer(c, inode, qid, + 1 << QTYP_PRJ, + KEY_TYPE_QUOTA_PREALLOC); } static int fssetxattr_inode_update_fn(struct bch_inode_info *inode, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 8f0b049aa1ec..d22b9e7e2082 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -170,7 +170,6 @@ void bch2_inode_update_after_write(struct bch_fs *c, inode_set_ctime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_ctime)); inode->ei_inode = *bi; - inode->ei_qid = bch_qid(bi); bch2_inode_flags_to_vfs(inode); } @@ -248,6 +247,41 @@ retry: return ret < 0 ? ret : 0; } +int bch2_fs_quota_transfer(struct bch_fs *c, + struct bch_inode_info *inode, + struct bch_qid new_qid, + unsigned qtypes, + enum quota_acct_mode mode) +{ + unsigned i; + int ret; + + qtypes &= enabled_qtypes(c); + + for (i = 0; i < QTYP_NR; i++) + if (new_qid.q[i] == inode->ei_qid.q[i]) + qtypes &= ~(1U << i); + + if (!qtypes) + return 0; + + mutex_lock(&inode->ei_quota_lock); + + ret = bch2_quota_transfer(c, qtypes, new_qid, + inode->ei_qid, + inode->v.i_blocks + + inode->ei_quota_reserved, + mode); + if (!ret) + for (i = 0; i < QTYP_NR; i++) + if (qtypes & (1 << i)) + inode->ei_qid.q[i] = new_qid.q[i]; + + mutex_unlock(&inode->ei_quota_lock); + + return ret; +} + static struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum) { struct bch_inode_unpacked inode_u; @@ -913,37 +947,27 @@ static int bch2_setattr_nonsize(struct mnt_idmap *idmap, struct iattr *iattr) { struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_qid qid = inode->ei_qid; + struct bch_qid qid; struct btree_trans trans; struct bch_inode_unpacked inode_u; struct posix_acl *acl = NULL; struct inode_write_setattr s = { iattr, idmap }; - unsigned qtypes = 0; int ret; mutex_lock(&inode->ei_update_lock); - if (c->opts.usrquota && - (iattr->ia_valid & ATTR_UID) && - !uid_eq(iattr->ia_uid, inode->v.i_uid)) { - qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), iattr->ia_uid), - qtypes |= 1 << QTYP_USR; - } + qid = inode->ei_qid; + + if (iattr->ia_valid & ATTR_UID) + qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), iattr->ia_uid); - if (c->opts.grpquota && - (iattr->ia_valid & ATTR_GID) && - !gid_eq(iattr->ia_gid, inode->v.i_gid)) { + if (iattr->ia_valid & ATTR_GID) qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), iattr->ia_gid); - qtypes |= 1 << QTYP_GRP; - } - if (qtypes) { - ret = bch2_quota_transfer(c, qtypes, qid, inode->ei_qid, - inode->v.i_blocks + - inode->ei_quota_reserved); - if (ret) - goto err; - } + ret = bch2_fs_quota_transfer(c, inode, qid, ~0, + KEY_TYPE_QUOTA_PREALLOC); + if (ret) + goto err; bch2_trans_init(&trans, c); retry: @@ -1312,6 +1336,7 @@ static void bch2_vfs_inode_init(struct bch_fs *c, inode->ei_journal_seq = 0; inode->ei_quota_reserved = 0; inode->ei_str_hash = bch2_hash_info_init(c, bi); + inode->ei_qid = bch_qid(bi); inode->v.i_mapping->a_ops = &bch_address_space_operations; diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 4fdc11762cd7..fbb31976bc55 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -70,6 +70,12 @@ struct bch_inode_unpacked; #ifndef NO_BCACHEFS_FS +int bch2_fs_quota_transfer(struct bch_fs *, + struct bch_inode_info *, + struct bch_qid, + unsigned, + enum quota_acct_mode); + /* returns 0 if we want to do the update, or error is passed up */ typedef int (*inode_set_fn)(struct bch_inode_info *, struct bch_inode_unpacked *, void *); diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 7c38daac1cac..113a2ca88ffc 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -270,7 +270,8 @@ static void __bch2_quota_transfer(struct bch_memquota *src_q, int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes, struct bch_qid dst, - struct bch_qid src, u64 space) + struct bch_qid src, u64 space, + enum quota_acct_mode mode) { struct bch_memquota_type *q; struct bch_memquota *src_q[3], *dst_q[3]; @@ -296,13 +297,13 @@ int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes, ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC, dst_q[i]->c[Q_SPC].v + space, - KEY_TYPE_QUOTA_PREALLOC); + mode); if (ret) goto err; ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO, dst_q[i]->c[Q_INO].v + 1, - KEY_TYPE_QUOTA_PREALLOC); + mode); if (ret) goto err; } diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h index 294a04db84bf..72b5ea0d77c5 100644 --- a/fs/bcachefs/quota.h +++ b/fs/bcachefs/quota.h @@ -15,12 +15,6 @@ void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); .val_to_text = bch2_quota_to_text, \ } -enum quota_acct_mode { - KEY_TYPE_QUOTA_PREALLOC, - KEY_TYPE_QUOTA_WARN, - KEY_TYPE_QUOTA_NOCHECK, -}; - static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u) { return (struct bch_qid) { @@ -43,7 +37,7 @@ int bch2_quota_acct(struct bch_fs *, struct bch_qid, enum quota_counters, s64, enum quota_acct_mode); int bch2_quota_transfer(struct bch_fs *, unsigned, struct bch_qid, - struct bch_qid, u64); + struct bch_qid, u64, enum quota_acct_mode); void bch2_fs_quota_exit(struct bch_fs *); void bch2_fs_quota_init(struct bch_fs *); @@ -62,7 +56,8 @@ static inline int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid, static inline int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes, struct bch_qid dst, - struct bch_qid src, u64 space) + struct bch_qid src, u64 space, + enum quota_acct_mode mode) { return 0; } diff --git a/fs/bcachefs/quota_types.h b/fs/bcachefs/quota_types.h index 9eda6c363736..6a136083d389 100644 --- a/fs/bcachefs/quota_types.h +++ b/fs/bcachefs/quota_types.h @@ -8,6 +8,12 @@ struct bch_qid { u32 q[QTYP_NR]; }; +enum quota_acct_mode { + KEY_TYPE_QUOTA_PREALLOC, + KEY_TYPE_QUOTA_WARN, + KEY_TYPE_QUOTA_NOCHECK, +}; + struct memquota_counter { u64 v; u64 hardlimit; -- cgit v1.2.3 From 96012e143e699db1a7644e4c5903b63bdde33772 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Dec 2018 05:31:49 -0500 Subject: bcachefs: rename keeps inheritable inode opts consistent Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/bcachefs/fs.h | 25 +++++++++++++++++ 2 files changed, 109 insertions(+) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index d22b9e7e2082..033582a87852 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -282,6 +282,32 @@ int bch2_fs_quota_transfer(struct bch_fs *c, return ret; } +int bch2_reinherit_attrs_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_inode_info *dir = p; + u64 src, dst; + unsigned id; + int ret = 1; + + for (id = 0; id < Inode_opt_nr; id++) { + if (bi->bi_fields_set & (1 << id)) + continue; + + src = bch2_inode_opt_get(&dir->ei_inode, id); + dst = bch2_inode_opt_get(bi, id); + + if (src == dst) + continue; + + bch2_inode_opt_set(bi, id, src); + ret = 0; + } + + return ret; +} + static struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum) { struct bch_inode_unpacked inode_u; @@ -765,6 +791,7 @@ static int inode_update_for_rename_fn(struct bch_inode_info *inode, void *p) { struct rename_info *info = p; + int ret; if (inode == info->src_dir) { bi->bi_nlink -= S_ISDIR(info->src_inode->v.i_mode); @@ -779,6 +806,19 @@ static int inode_update_for_rename_fn(struct bch_inode_info *inode, S_ISDIR(info->dst_inode->v.i_mode); } + if (inode == info->src_inode) { + ret = bch2_reinherit_attrs_fn(inode, bi, info->dst_dir); + + BUG_ON(!ret && S_ISDIR(info->src_inode->v.i_mode)); + } + + if (inode == info->dst_inode && + info->mode == BCH_RENAME_EXCHANGE) { + ret = bch2_reinherit_attrs_fn(inode, bi, info->src_dir); + + BUG_ON(!ret && S_ISDIR(info->dst_inode->v.i_mode)); + } + if (inode == info->dst_inode && info->mode == BCH_RENAME_OVERWRITE) { BUG_ON(bi->bi_nlink && @@ -844,6 +884,39 @@ static int bch2_rename2(struct mnt_idmap *idmap, i.dst_inode); bch2_trans_init(&trans, c); + + if (S_ISDIR(i.src_inode->v.i_mode) && + inode_attrs_changing(i.dst_dir, i.src_inode)) { + ret = -EXDEV; + goto err; + } + + if (i.mode == BCH_RENAME_EXCHANGE && + S_ISDIR(i.dst_inode->v.i_mode) && + inode_attrs_changing(i.src_dir, i.dst_inode)) { + ret = -EXDEV; + goto err; + } + + if (inode_attr_changing(i.dst_dir, i.src_inode, Inode_opt_project)) { + ret = bch2_fs_quota_transfer(c, i.src_inode, + i.dst_dir->ei_qid, + 1 << QTYP_PRJ, + KEY_TYPE_QUOTA_PREALLOC); + if (ret) + goto err; + } + + if (i.mode == BCH_RENAME_EXCHANGE && + inode_attr_changing(i.src_dir, i.dst_inode, Inode_opt_project)) { + ret = bch2_fs_quota_transfer(c, i.dst_inode, + i.src_dir->ei_qid, + 1 << QTYP_PRJ, + KEY_TYPE_QUOTA_PREALLOC); + if (ret) + goto err; + } + retry: bch2_trans_begin(&trans); i.now = bch2_current_time(c); @@ -894,6 +967,17 @@ retry: ATTR_CTIME); err: bch2_trans_exit(&trans); + + bch2_fs_quota_transfer(c, i.src_inode, + bch_qid(&i.src_inode->ei_inode), + 1 << QTYP_PRJ, + KEY_TYPE_QUOTA_NOCHECK); + if (i.dst_inode) + bch2_fs_quota_transfer(c, i.dst_inode, + bch_qid(&i.dst_inode->ei_inode), + 1 << QTYP_PRJ, + KEY_TYPE_QUOTA_NOCHECK); + bch2_unlock_inodes(i.src_dir, i.dst_dir, i.src_inode, diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index fbb31976bc55..18e41609c89d 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -66,6 +66,27 @@ static inline unsigned nlink_bias(umode_t mode) return S_ISDIR(mode) ? 2 : 1; } +static inline bool inode_attr_changing(struct bch_inode_info *dir, + struct bch_inode_info *inode, + enum inode_opt_id id) +{ + return !(inode->ei_inode.bi_fields_set & (1 << id)) && + bch2_inode_opt_get(&dir->ei_inode, id) != + bch2_inode_opt_get(&inode->ei_inode, id); +} + +static inline bool inode_attrs_changing(struct bch_inode_info *dir, + struct bch_inode_info *inode) +{ + unsigned id; + + for (id = 0; id < Inode_opt_nr; id++) + if (inode_attr_changing(dir, inode, id)) + return true; + + return false; +} + struct bch_inode_unpacked; #ifndef NO_BCACHEFS_FS @@ -91,6 +112,10 @@ int __must_check bch2_write_inode_trans(struct btree_trans *, int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *, inode_set_fn, void *, unsigned); +int bch2_reinherit_attrs_fn(struct bch_inode_info *, + struct bch_inode_unpacked *, + void *); + void bch2_vfs_exit(void); int bch2_vfs_init(void); -- cgit v1.2.3 From 8095708fce72a911e20799078639e95c1a008176 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 17 Dec 2018 06:11:14 -0500 Subject: bcachefs: bch2_ioc_reinherit_attrs() Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_ioctl.h | 2 ++ fs/bcachefs/fs-ioctl.c | 77 +++++++++++++++++++++++++++++++++++++++++++- fs/bcachefs/fs.c | 31 ++---------------- fs/bcachefs/fs.h | 26 +++++++++++++++ fs/bcachefs/inode.c | 3 +- 5 files changed, 109 insertions(+), 30 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index c65104ed454a..2dca4bb0362b 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -307,4 +307,6 @@ struct bch_ioctl_disk_resize { __u64 nbuckets; }; +#define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 14, const char __user *) + #endif /* _BCACHEFS_IOCTL_H */ diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index d6563370bec4..92939befe507 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -3,6 +3,7 @@ #include "bcachefs.h" #include "chardev.h" +#include "dirent.h" #include "fs.h" #include "fs-ioctl.h" #include "quota.h" @@ -177,6 +178,75 @@ err: return ret; } +static int bch2_ioc_reinherit_attrs(struct bch_fs *c, + struct file *file, + struct bch_inode_info *src, + const char __user *name) +{ + struct bch_inode_info *dst; + struct inode *vinode = NULL; + char *kname = NULL; + struct qstr qstr; + int ret = 0; + u64 inum; + + kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL); + if (!kname) + return -ENOMEM; + + ret = strncpy_from_user(kname, name, BCH_NAME_MAX); + if (unlikely(ret < 0)) + goto err1; + + qstr.hash_len = ret; + qstr.name = kname; + + ret = -ENOENT; + inum = bch2_dirent_lookup(c, src->v.i_ino, + &src->ei_str_hash, + &qstr); + if (!inum) + goto err1; + + vinode = bch2_vfs_inode_get(c, inum); + ret = PTR_ERR_OR_ZERO(vinode); + if (ret) + goto err1; + + dst = to_bch_ei(vinode); + + ret = mnt_want_write_file(file); + if (ret) + goto err2; + + bch2_lock_inodes(src, dst); + + if (inode_attr_changing(src, dst, Inode_opt_project)) { + ret = bch2_fs_quota_transfer(c, dst, + src->ei_qid, + 1 << QTYP_PRJ, + KEY_TYPE_QUOTA_PREALLOC); + if (ret) + goto err3; + } + + ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0); +err3: + bch2_unlock_inodes(src, dst); + + /* return true if we did work */ + if (ret >= 0) + ret = !ret; + + mnt_drop_write_file(file); +err2: + iput(vinode); +err1: + kfree(kname); + + return ret; +} + long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct bch_inode_info *inode = file_bch_inode(file); @@ -193,7 +263,12 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) case FS_IOC_FSGETXATTR: return bch2_ioc_fsgetxattr(inode, (void __user *) arg); case FS_IOC_FSSETXATTR: - return bch2_ioc_fssetxattr(c, file, inode, (void __user *) arg); + return bch2_ioc_fssetxattr(c, file, inode, + (void __user *) arg); + + case BCHFS_IOC_REINHERIT_ATTRS: + return bch2_ioc_reinherit_attrs(c, file, inode, + (void __user *) arg); case FS_IOC_GETVERSION: return -ENOTTY; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 033582a87852..d23a82d94c5e 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -51,30 +51,6 @@ static void journal_seq_copy(struct bch_inode_info *dst, } while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old); } -static inline int ptrcmp(void *l, void *r) -{ - return (l > r) - (l < r); -} - -#define __bch2_lock_inodes(_lock, ...) \ -do { \ - struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \ - unsigned i; \ - \ - bubble_sort(&a[1], ARRAY_SIZE(a) - 1 , ptrcmp); \ - \ - for (i = ARRAY_SIZE(a) - 1; a[i]; --i) \ - if (a[i] != a[i - 1]) { \ - if (_lock) \ - mutex_lock_nested(&a[i]->ei_update_lock, i);\ - else \ - mutex_unlock(&a[i]->ei_update_lock); \ - } \ -} while (0) - -#define bch2_lock_inodes(...) __bch2_lock_inodes(true, __VA_ARGS__) -#define bch2_unlock_inodes(...) __bch2_lock_inodes(false, __VA_ARGS__) - static void __pagecache_lock_put(struct pagecache_lock *lock, long i) { BUG_ON(atomic_long_read(&lock->v) == 0); @@ -308,7 +284,7 @@ int bch2_reinherit_attrs_fn(struct bch_inode_info *inode, return ret; } -static struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum) +struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum) { struct bch_inode_unpacked inode_u; struct bch_inode_info *inode; @@ -393,14 +369,13 @@ __bch2_create(struct mnt_idmap *idmap, bch2_inode_init(c, &inode_u, 0, 0, 0, rdev, &dir->ei_inode); bch2_inode_init_owner(&inode_u, &dir->v, mode); - inode_u.bi_project = dir->ei_qid.q[QTYP_PRJ]; - hash_info = bch2_hash_info_init(c, &inode_u); if (tmpfile) inode_u.bi_flags |= BCH_INODE_UNLINKED; - ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, KEY_TYPE_QUOTA_PREALLOC); + ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, + KEY_TYPE_QUOTA_PREALLOC); if (ret) return ERR_PTR(ret); diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 18e41609c89d..4c584d3a27c3 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -51,6 +51,30 @@ struct bch_inode_info { #define to_bch_ei(_inode) \ container_of_or_null(_inode, struct bch_inode_info, v) +static inline int ptrcmp(void *l, void *r) +{ + return (l > r) - (l < r); +} + +#define __bch2_lock_inodes(_lock, ...) \ +do { \ + struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \ + unsigned i; \ + \ + bubble_sort(&a[1], ARRAY_SIZE(a) - 1 , ptrcmp); \ + \ + for (i = ARRAY_SIZE(a) - 1; a[i]; --i) \ + if (a[i] != a[i - 1]) { \ + if (_lock) \ + mutex_lock_nested(&a[i]->ei_update_lock, i);\ + else \ + mutex_unlock(&a[i]->ei_update_lock); \ + } \ +} while (0) + +#define bch2_lock_inodes(...) __bch2_lock_inodes(true, __VA_ARGS__) +#define bch2_unlock_inodes(...) __bch2_lock_inodes(false, __VA_ARGS__) + static inline struct bch_inode_info *file_bch_inode(struct file *file) { return to_bch_ei(file_inode(file)); @@ -97,6 +121,8 @@ int bch2_fs_quota_transfer(struct bch_fs *, unsigned, enum quota_acct_mode); +struct inode *bch2_vfs_inode_get(struct bch_fs *, u64); + /* returns 0 if we want to do the update, or error is passed up */ typedef int (*inode_set_fn)(struct bch_inode_info *, struct bch_inode_unpacked *, void *); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 23d3668b4567..6acb487312a8 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -258,7 +258,8 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, /* ick */ inode_u->bi_flags |= c->opts.str_hash << INODE_STR_HASH_OFFSET; - get_random_bytes(&inode_u->bi_hash_seed, sizeof(inode_u->bi_hash_seed)); + get_random_bytes(&inode_u->bi_hash_seed, + sizeof(inode_u->bi_hash_seed)); inode_u->bi_mode = mode; inode_u->bi_uid = uid; -- cgit v1.2.3 From 0b847a19d96b66baeb651317d5e22f8bd4368975 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 19 Dec 2018 12:58:56 -0500 Subject: bcachefs: Lots of option handling improvements Add helptext to option definitions - so we can unify the option handling with the format command Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 4 +- fs/bcachefs/fs.c | 2 +- fs/bcachefs/opts.c | 47 ++++-- fs/bcachefs/opts.h | 386 +++++++++++++++++++++++++++++++----------------- fs/bcachefs/rebalance.c | 10 +- fs/bcachefs/sysfs.c | 9 +- fs/bcachefs/tests.c | 4 +- fs/bcachefs/util.c | 27 ++-- fs/bcachefs/util.h | 3 +- 9 files changed, 310 insertions(+), 182 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 92a0ecd8fbc3..244b808688b3 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -230,13 +230,13 @@ #define bch_verbose(c, fmt, ...) \ do { \ - if ((c)->opts.verbose_recovery) \ + if ((c)->opts.verbose) \ bch_info(c, fmt, ##__VA_ARGS__); \ } while (0) #define pr_verbose_init(opts, fmt, ...) \ do { \ - if (opt_get(opts, verbose_init)) \ + if (opt_get(opts, verbose)) \ pr_info(fmt, ##__VA_ARGS__); \ } while (0) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index d23a82d94c5e..02c7543e40c8 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1690,7 +1690,7 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root) const struct bch_option *opt = &bch2_opt_table[i]; u64 v = bch2_opt_get_by_id(&c->opts, i); - if (opt->mode < OPT_MOUNT) + if (!(opt->mode & OPT_MOUNT)) continue; if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index 17245e0b4a73..13a9a2fcd575 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -75,22 +75,22 @@ const char * const bch2_dev_state[] = { void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src) { -#define BCH_OPT(_name, ...) \ +#define x(_name, ...) \ if (opt_defined(src, _name)) \ opt_set(*dst, _name, src._name); BCH_OPTS() -#undef BCH_OPT +#undef x } bool bch2_opt_defined_by_id(const struct bch_opts *opts, enum bch_opt_id id) { switch (id) { -#define BCH_OPT(_name, ...) \ +#define x(_name, ...) \ case Opt_##_name: \ return opt_defined(*opts, _name); BCH_OPTS() -#undef BCH_OPT +#undef x default: BUG(); } @@ -99,11 +99,11 @@ bool bch2_opt_defined_by_id(const struct bch_opts *opts, enum bch_opt_id id) u64 bch2_opt_get_by_id(const struct bch_opts *opts, enum bch_opt_id id) { switch (id) { -#define BCH_OPT(_name, ...) \ +#define x(_name, ...) \ case Opt_##_name: \ return opts->_name; BCH_OPTS() -#undef BCH_OPT +#undef x default: BUG(); } @@ -112,12 +112,12 @@ u64 bch2_opt_get_by_id(const struct bch_opts *opts, enum bch_opt_id id) void bch2_opt_set_by_id(struct bch_opts *opts, enum bch_opt_id id, u64 v) { switch (id) { -#define BCH_OPT(_name, ...) \ +#define x(_name, ...) \ case Opt_##_name: \ opt_set(*opts, _name, v); \ break; BCH_OPTS() -#undef BCH_OPT +#undef x default: BUG(); } @@ -131,11 +131,11 @@ struct bch_opts bch2_opts_from_sb(struct bch_sb *sb) { struct bch_opts opts = bch2_opts_empty(); -#define BCH_OPT(_name, _bits, _mode, _type, _sb_opt, _default) \ +#define x(_name, _bits, _mode, _type, _sb_opt, ...) \ if (_sb_opt != NO_SB_OPT) \ opt_set(opts, _name, _sb_opt(sb)); BCH_OPTS() -#undef BCH_OPT +#undef x return opts; } @@ -143,24 +143,27 @@ struct bch_opts bch2_opts_from_sb(struct bch_sb *sb) const struct bch_option bch2_opt_table[] = { #define OPT_BOOL() .type = BCH_OPT_BOOL #define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, .min = _min, .max = _max +#define OPT_SECTORS(_min, _max) .type = BCH_OPT_SECTORS, .min = _min, .max = _max #define OPT_STR(_choices) .type = BCH_OPT_STR, .choices = _choices #define OPT_FN(_fn) .type = BCH_OPT_FN, \ .parse = _fn##_parse, \ .to_text = _fn##_to_text -#define BCH_OPT(_name, _bits, _mode, _type, _sb_opt, _default) \ +#define x(_name, _bits, _mode, _type, _sb_opt, _default, _hint, _help) \ [Opt_##_name] = { \ .attr = { \ .name = #_name, \ - .mode = _mode == OPT_RUNTIME ? 0644 : 0444, \ + .mode = (_mode) & OPT_RUNTIME ? 0644 : 0444, \ }, \ .mode = _mode, \ + .hint = _hint, \ + .help = _help, \ .set_sb = SET_##_sb_opt, \ _type \ }, BCH_OPTS() -#undef BCH_OPT +#undef x }; int bch2_opt_lookup(const char *name) @@ -216,6 +219,19 @@ int bch2_opt_parse(struct bch_fs *c, const struct bch_option *opt, if (ret < 0) return ret; + if (*res < opt->min || *res >= opt->max) + return -ERANGE; + break; + case BCH_OPT_SECTORS: + ret = bch2_strtou64_h(val, res); + if (ret < 0) + return ret; + + if (*res & 511) + return -EINVAL; + + *res >>= 9; + if (*res < opt->min || *res >= opt->max) return -ERANGE; break; @@ -256,6 +272,9 @@ void bch2_opt_to_text(struct printbuf *out, struct bch_fs *c, case BCH_OPT_UINT: pr_buf(out, "%lli", v); break; + case BCH_OPT_SECTORS: + bch2_hprint(out, v); + break; case BCH_OPT_STR: if (flags & OPT_SHOW_FULL_LIST) bch2_string_opt_to_text(out, opt->choices, v); @@ -345,7 +364,7 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options) goto no_val; } - if (bch2_opt_table[id].mode < OPT_MOUNT) + if (!(bch2_opt_table[id].mode & OPT_MOUNT)) goto bad_opt; if (id == Opt_acl && diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index faa2a72c8c3b..f4cb0625c3cc 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -32,22 +32,25 @@ extern const char * const bch2_dev_state[]; /* dummy option, for options that aren't stored in the superblock */ LE64_BITMASK(NO_SB_OPT, struct bch_sb, flags[0], 0, 0); +/* When can be set: */ enum opt_mode { - OPT_INTERNAL, - OPT_FORMAT, - OPT_MOUNT, - OPT_RUNTIME, + OPT_FORMAT = (1 << 0), + OPT_MOUNT = (1 << 1), + OPT_RUNTIME = (1 << 2), + OPT_INODE = (1 << 3), + OPT_DEVICE = (1 << 4), }; enum opt_type { BCH_OPT_BOOL, BCH_OPT_UINT, + BCH_OPT_SECTORS, BCH_OPT_STR, BCH_OPT_FN, }; /** - * BCH_OPT(name, type, in mem type, mode, sb_opt) + * x(name, shortopt, type, in mem type, mode, sb_opt) * * @name - name of mount option, sysfs attribute, and struct bch_opts * member @@ -66,150 +69,252 @@ enum opt_type { */ #define BCH_OPTS() \ - BCH_OPT(block_size, u16, OPT_FORMAT, \ - OPT_UINT(1, 128), \ - BCH_SB_BLOCK_SIZE, 8) \ - BCH_OPT(btree_node_size, u16, OPT_FORMAT, \ - OPT_UINT(1, 128), \ - BCH_SB_BTREE_NODE_SIZE, 512) \ - BCH_OPT(errors, u8, OPT_RUNTIME, \ - OPT_STR(bch2_error_actions), \ - BCH_SB_ERROR_ACTION, BCH_ON_ERROR_RO) \ - BCH_OPT(metadata_replicas, u8, OPT_RUNTIME, \ - OPT_UINT(1, BCH_REPLICAS_MAX), \ - BCH_SB_META_REPLICAS_WANT, 1) \ - BCH_OPT(data_replicas, u8, OPT_RUNTIME, \ - OPT_UINT(1, BCH_REPLICAS_MAX), \ - BCH_SB_DATA_REPLICAS_WANT, 1) \ - BCH_OPT(metadata_replicas_required, u8, OPT_MOUNT, \ - OPT_UINT(1, BCH_REPLICAS_MAX), \ - BCH_SB_META_REPLICAS_REQ, 1) \ - BCH_OPT(data_replicas_required, u8, OPT_MOUNT, \ - OPT_UINT(1, BCH_REPLICAS_MAX), \ - BCH_SB_DATA_REPLICAS_REQ, 1) \ - BCH_OPT(metadata_checksum, u8, OPT_RUNTIME, \ - OPT_STR(bch2_csum_types), \ - BCH_SB_META_CSUM_TYPE, BCH_CSUM_OPT_CRC32C) \ - BCH_OPT(data_checksum, u8, OPT_RUNTIME, \ - OPT_STR(bch2_csum_types), \ - BCH_SB_DATA_CSUM_TYPE, BCH_CSUM_OPT_CRC32C) \ - BCH_OPT(compression, u8, OPT_RUNTIME, \ - OPT_STR(bch2_compression_types), \ - BCH_SB_COMPRESSION_TYPE, BCH_COMPRESSION_OPT_NONE)\ - BCH_OPT(background_compression, u8, OPT_RUNTIME, \ - OPT_STR(bch2_compression_types), \ - BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_NONE)\ - BCH_OPT(str_hash, u8, OPT_RUNTIME, \ - OPT_STR(bch2_str_hash_types), \ - BCH_SB_STR_HASH_TYPE, BCH_STR_HASH_SIPHASH) \ - BCH_OPT(foreground_target, u16, OPT_RUNTIME, \ - OPT_FN(bch2_opt_target), \ - BCH_SB_FOREGROUND_TARGET, 0) \ - BCH_OPT(background_target, u16, OPT_RUNTIME, \ - OPT_FN(bch2_opt_target), \ - BCH_SB_BACKGROUND_TARGET, 0) \ - BCH_OPT(promote_target, u16, OPT_RUNTIME, \ - OPT_FN(bch2_opt_target), \ - BCH_SB_PROMOTE_TARGET, 0) \ - BCH_OPT(erasure_code, u16, OPT_RUNTIME, \ - OPT_BOOL(), \ - BCH_SB_ERASURE_CODE, false) \ - BCH_OPT(inodes_32bit, u8, OPT_RUNTIME, \ - OPT_BOOL(), \ - BCH_SB_INODE_32BIT, false) \ - BCH_OPT(gc_reserve_percent, u8, OPT_RUNTIME, \ - OPT_UINT(5, 21), \ - BCH_SB_GC_RESERVE, 8) \ - BCH_OPT(gc_reserve_bytes, u64, OPT_RUNTIME, \ - OPT_UINT(0, U64_MAX), \ - BCH_SB_GC_RESERVE_BYTES, 0) \ - BCH_OPT(root_reserve_percent, u8, OPT_MOUNT, \ - OPT_UINT(0, 100), \ - BCH_SB_ROOT_RESERVE, 0) \ - BCH_OPT(wide_macs, u8, OPT_RUNTIME, \ - OPT_BOOL(), \ - BCH_SB_128_BIT_MACS, false) \ - BCH_OPT(acl, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - BCH_SB_POSIX_ACL, true) \ - BCH_OPT(usrquota, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - BCH_SB_USRQUOTA, false) \ - BCH_OPT(grpquota, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - BCH_SB_GRPQUOTA, false) \ - BCH_OPT(prjquota, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - BCH_SB_PRJQUOTA, false) \ - BCH_OPT(degraded, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ - BCH_OPT(discard, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ - BCH_OPT(verbose_recovery, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ - BCH_OPT(verbose_init, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ - BCH_OPT(journal_flush_disabled, u8, OPT_RUNTIME, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ - BCH_OPT(fsck, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - NO_SB_OPT, true) \ - BCH_OPT(fix_errors, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ - BCH_OPT(nochanges, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ - BCH_OPT(noreplay, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ - BCH_OPT(norecovery, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ - BCH_OPT(noexcl, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ - BCH_OPT(sb, u64, OPT_MOUNT, \ - OPT_UINT(0, S64_MAX), \ - NO_SB_OPT, BCH_SB_SECTOR) \ - BCH_OPT(read_only, u8, OPT_INTERNAL, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ - BCH_OPT(nostart, u8, OPT_INTERNAL, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ - BCH_OPT(no_data_io, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ - BCH_OPT(version_upgrade, u8, OPT_MOUNT, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ - BCH_OPT(project, u8, OPT_INTERNAL, \ - OPT_BOOL(), \ - NO_SB_OPT, false) \ + x(block_size, u16, \ + OPT_FORMAT, \ + OPT_SECTORS(1, 128), \ + BCH_SB_BLOCK_SIZE, 8, \ + "size", NULL) \ + x(btree_node_size, u16, \ + OPT_FORMAT, \ + OPT_SECTORS(1, 128), \ + BCH_SB_BTREE_NODE_SIZE, 512, \ + "size", "Btree node size, default 256k") \ + x(errors, u8, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_STR(bch2_error_actions), \ + BCH_SB_ERROR_ACTION, BCH_ON_ERROR_RO, \ + NULL, "Action to take on filesystem error") \ + x(metadata_replicas, u8, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_UINT(1, BCH_REPLICAS_MAX), \ + BCH_SB_META_REPLICAS_WANT, 1, \ + "#", "Number of metadata replicas") \ + x(data_replicas, u8, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_UINT(1, BCH_REPLICAS_MAX), \ + BCH_SB_DATA_REPLICAS_WANT, 1, \ + "#", "Number of data replicas") \ + x(metadata_replicas_required, u8, \ + OPT_FORMAT|OPT_MOUNT, \ + OPT_UINT(1, BCH_REPLICAS_MAX), \ + BCH_SB_META_REPLICAS_REQ, 1, \ + "#", NULL) \ + x(data_replicas_required, u8, \ + OPT_FORMAT|OPT_MOUNT, \ + OPT_UINT(1, BCH_REPLICAS_MAX), \ + BCH_SB_DATA_REPLICAS_REQ, 1, \ + "#", NULL) \ + x(metadata_checksum, u8, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_STR(bch2_csum_types), \ + BCH_SB_META_CSUM_TYPE, BCH_CSUM_OPT_CRC32C, \ + NULL, NULL) \ + x(data_checksum, u8, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_STR(bch2_csum_types), \ + BCH_SB_DATA_CSUM_TYPE, BCH_CSUM_OPT_CRC32C, \ + NULL, NULL) \ + x(compression, u8, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_STR(bch2_compression_types), \ + BCH_SB_COMPRESSION_TYPE, BCH_COMPRESSION_OPT_NONE, \ + NULL, NULL) \ + x(background_compression, u8, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_STR(bch2_compression_types), \ + BCH_SB_BACKGROUND_COMPRESSION_TYPE,BCH_COMPRESSION_OPT_NONE, \ + NULL, NULL) \ + x(str_hash, u8, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_STR(bch2_str_hash_types), \ + BCH_SB_STR_HASH_TYPE, BCH_STR_HASH_SIPHASH, \ + NULL, "Hash function for directory entries and xattrs")\ + x(foreground_target, u16, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_FN(bch2_opt_target), \ + BCH_SB_FOREGROUND_TARGET, 0, \ + "(target)", "Device or disk group for foreground writes") \ + x(background_target, u16, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_FN(bch2_opt_target), \ + BCH_SB_BACKGROUND_TARGET, 0, \ + "(target)", "Device or disk group to move data to in the background")\ + x(promote_target, u16, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_FN(bch2_opt_target), \ + BCH_SB_PROMOTE_TARGET, 0, \ + "(target)", "Device or disk group to promote data to on read")\ + x(erasure_code, u16, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME|OPT_INODE, \ + OPT_BOOL(), \ + BCH_SB_ERASURE_CODE, false, \ + NULL, "Enable erasure coding (DO NOT USE YET)") \ + x(inodes_32bit, u8, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_BOOL(), \ + BCH_SB_INODE_32BIT, false, \ + NULL, "Constrain inode numbers to 32 bits") \ + x(gc_reserve_percent, u8, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_UINT(5, 21), \ + BCH_SB_GC_RESERVE, 8, \ + "%", "Percentage of disk space to reserve for copygc")\ + x(gc_reserve_bytes, u64, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_SECTORS(0, U64_MAX), \ + BCH_SB_GC_RESERVE_BYTES, 0, \ + "%", "Amount of disk space to reserve for copygc\n" \ + "Takes precedence over gc_reserve_percent if set")\ + x(root_reserve_percent, u8, \ + OPT_FORMAT|OPT_MOUNT, \ + OPT_UINT(0, 100), \ + BCH_SB_ROOT_RESERVE, 0, \ + "%", "Percentage of disk space to reserve for superuser")\ + x(wide_macs, u8, \ + OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_BOOL(), \ + BCH_SB_128_BIT_MACS, false, \ + NULL, "Store full 128 bits of cryptographic MACs, instead of 80")\ + x(acl, u8, \ + OPT_FORMAT|OPT_MOUNT, \ + OPT_BOOL(), \ + BCH_SB_POSIX_ACL, true, \ + NULL, "Enable POSIX acls") \ + x(usrquota, u8, \ + OPT_FORMAT|OPT_MOUNT, \ + OPT_BOOL(), \ + BCH_SB_USRQUOTA, false, \ + NULL, "Enable user quotas") \ + x(grpquota, u8, \ + OPT_FORMAT|OPT_MOUNT, \ + OPT_BOOL(), \ + BCH_SB_GRPQUOTA, false, \ + NULL, "Enable group quotas") \ + x(prjquota, u8, \ + OPT_FORMAT|OPT_MOUNT, \ + OPT_BOOL(), \ + BCH_SB_PRJQUOTA, false, \ + NULL, "Enable project quotas") \ + x(degraded, u8, \ + OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Allow mounting in degraded mode") \ + x(discard, u8, \ + OPT_MOUNT|OPT_DEVICE, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Enable discard/TRIM support") \ + x(verbose, u8, \ + OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Extra debugging information during mount/recovery")\ + x(journal_flush_disabled, u8, \ + OPT_MOUNT|OPT_RUNTIME, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Disable journal flush on sync/fsync\n" \ + "If enabled, writes can be lost, but only since the\n"\ + "last journal write (default 1 second)") \ + x(fsck, u8, \ + OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, true, \ + NULL, "Run fsck on mount") \ + x(fix_errors, u8, \ + OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Fix errors during fsck without asking") \ + x(nochanges, u8, \ + OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Super read only mode - no writes at all will be issued,\n"\ + "even if we have to replay the journal") \ + x(noreplay, u8, \ + OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Don't replay the journal (only for internal tools)")\ + x(norecovery, u8, \ + OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, NULL) \ + x(noexcl, u8, \ + OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Don't open device in exclusive mode") \ + x(sb, u64, \ + OPT_MOUNT, \ + OPT_UINT(0, S64_MAX), \ + NO_SB_OPT, BCH_SB_SECTOR, \ + "offset", "Sector offset of superblock") \ + x(read_only, u8, \ + 0, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, NULL) \ + x(nostart, u8, \ + 0, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Don\'t start filesystem, only open devices") \ + x(version_upgrade, u8, \ + OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Set superblock to latest version,\n" \ + "allowing any new features to be used") \ + x(project, u8, \ + OPT_INODE, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, NULL) \ + x(no_data_io, u8, \ + OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Skip submit_bio() for data reads and writes, " \ + "for performance testing purposes") \ + x(fs_size, u64, \ + OPT_DEVICE, \ + OPT_SECTORS(0, S64_MAX), \ + NO_SB_OPT, 0, \ + "size", "Size of filesystem on device") \ + x(bucket, u32, \ + OPT_DEVICE, \ + OPT_SECTORS(0, S64_MAX), \ + NO_SB_OPT, 0, \ + "size", "Size of filesystem on device") \ + x(durability, u8, \ + OPT_DEVICE, \ + OPT_UINT(0, BCH_REPLICAS_MAX), \ + NO_SB_OPT, 1, \ + "n", "Data written to this device will be considered\n"\ + "to have already been replicated n times") + struct bch_opts { -#define BCH_OPT(_name, _bits, ...) unsigned _name##_defined:1; +#define x(_name, _bits, ...) unsigned _name##_defined:1; BCH_OPTS() -#undef BCH_OPT +#undef x -#define BCH_OPT(_name, _bits, ...) _bits _name; +#define x(_name, _bits, ...) _bits _name; BCH_OPTS() -#undef BCH_OPT +#undef x }; static const struct bch_opts bch2_opts_default = { -#define BCH_OPT(_name, _bits, _mode, _type, _sb_opt, _default) \ +#define x(_name, _bits, _mode, _type, _sb_opt, _default, ...) \ ._name##_defined = true, \ ._name = _default, \ BCH_OPTS() -#undef BCH_OPT +#undef x }; #define opt_defined(_opts, _name) ((_opts)._name##_defined) @@ -231,9 +336,9 @@ static inline struct bch_opts bch2_opts_empty(void) void bch2_opts_apply(struct bch_opts *, struct bch_opts); enum bch_opt_id { -#define BCH_OPT(_name, ...) Opt_##_name, +#define x(_name, ...) Opt_##_name, BCH_OPTS() -#undef BCH_OPT +#undef x bch2_opts_nr }; @@ -259,6 +364,9 @@ struct bch_option { }; }; + const char *hint; + const char *help; + }; extern const struct bch_option bch2_opt_table[]; diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index eec74d4a5712..cc1a7deb90bc 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -263,13 +263,13 @@ ssize_t bch2_rebalance_work_show(struct bch_fs *c, char *buf) struct rebalance_work w = rebalance_work(c); char h1[21], h2[21]; - bch2_hprint(h1, w.dev_most_full_work << 9); - bch2_hprint(h2, w.dev_most_full_capacity << 9); + bch2_hprint(&PBUF(h1), w.dev_most_full_work << 9); + bch2_hprint(&PBUF(h2), w.dev_most_full_capacity << 9); pr_buf(&out, "fullest_dev (%i):\t%s/%s\n", w.dev_most_full_idx, h1, h2); - bch2_hprint(h1, w.total_work << 9); - bch2_hprint(h2, c->capacity << 9); + bch2_hprint(&PBUF(h1), w.total_work << 9); + bch2_hprint(&PBUF(h2), c->capacity << 9); pr_buf(&out, "total work:\t\t%s/%s\n", h1, h2); pr_buf(&out, "rate:\t\t\t%u\n", r->pd.rate.rate); @@ -279,7 +279,7 @@ ssize_t bch2_rebalance_work_show(struct bch_fs *c, char *buf) pr_buf(&out, "waiting\n"); break; case REBALANCE_THROTTLED: - bch2_hprint(h1, + bch2_hprint(&PBUF(h1), (r->throttled_until_iotime - atomic_long_read(&c->io_clock[WRITE].now)) << 9); pr_buf(&out, "throttled for %lu sec or %s io\n", diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index a423159b6ed5..b59b7a5a4cbb 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -73,9 +73,10 @@ do { \ #define sysfs_hprint(file, val) \ do { \ if (attr == &sysfs_ ## file) { \ - ssize_t ret = bch2_hprint(buf, val); \ - strcat(buf, "\n"); \ - return ret + 1; \ + struct printbuf out = _PBUF(buf, PAGE_SIZE); \ + bch2_hprint(&out, val); \ + pr_buf(&out, "\n"); \ + return out.pos - buf; \ } \ } while (0) @@ -658,7 +659,7 @@ int bch2_opts_create_sysfs_files(struct kobject *kobj) for (i = bch2_opt_table; i < bch2_opt_table + bch2_opts_nr; i++) { - if (i->mode == OPT_INTERNAL) + if (!(i->mode & (OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME))) continue; ret = sysfs_create_file(kobj, &i->attr); diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index dc8abce94ff0..bcbe782260f0 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -619,8 +619,8 @@ void bch2_btree_perf_test(struct bch_fs *c, const char *testname, time = j.finish - j.start; scnprintf(name_buf, sizeof(name_buf), "%s:", testname); - bch2_hprint(nr_buf, nr); - bch2_hprint(per_sec_buf, nr * NSEC_PER_SEC / time); + bch2_hprint(&PBUF(nr_buf), nr); + bch2_hprint(&PBUF(per_sec_buf), nr * NSEC_PER_SEC / time); printk(KERN_INFO "%-12s %s with %u threads in %5llu sec, %5llu nsec per iter, %5s per sec\n", name_buf, nr_buf, nr_threads, time / NSEC_PER_SEC, diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index bb6b4383d33f..8931aa6a1e2a 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -100,10 +100,10 @@ STRTO_H(strtoint, int) STRTO_H(strtouint, unsigned int) STRTO_H(strtoll, long long) STRTO_H(strtoull, unsigned long long) +STRTO_H(strtou64, u64) -ssize_t bch2_hprint(char *buf, s64 v) +void bch2_hprint(struct printbuf *buf, s64 v) { - char dec[4] = ""; int u, t = 0; for (u = 0; v >= 1024 || v <= -1024; u++) { @@ -111,17 +111,16 @@ ssize_t bch2_hprint(char *buf, s64 v) v >>= 10; } - if (!u) - return sprintf(buf, "%lli", v); + pr_buf(buf, "%lli", v); /* * 103 is magic: t is in the range [-1023, 1023] and we want * to turn it into [-9, 9] */ - if (v < 100 && v > -100) - scnprintf(dec, sizeof(dec), ".%i", t / 103); - - return sprintf(buf, "%lli%s%c", v, dec, si_units[u]); + if (u && v < 100 && v > -100) + pr_buf(buf, ".%i", t / 103); + if (u) + pr_buf(buf, "%c", si_units[u]); } void bch2_string_opt_to_text(struct printbuf *out, @@ -483,12 +482,12 @@ size_t bch2_pd_controller_print_debug(struct bch_pd_controller *pd, char *buf) char change[21]; s64 next_io; - bch2_hprint(rate, pd->rate.rate); - bch2_hprint(actual, pd->last_actual); - bch2_hprint(target, pd->last_target); - bch2_hprint(proportional, pd->last_proportional); - bch2_hprint(derivative, pd->last_derivative); - bch2_hprint(change, pd->last_change); + bch2_hprint(&PBUF(rate), pd->rate.rate); + bch2_hprint(&PBUF(actual), pd->last_actual); + bch2_hprint(&PBUF(target), pd->last_target); + bch2_hprint(&PBUF(proportional), pd->last_proportional); + bch2_hprint(&PBUF(derivative), pd->last_derivative); + bch2_hprint(&PBUF(change), pd->last_change); next_io = div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 47afd3955c7a..7d1e6cc6afda 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -265,6 +265,7 @@ int bch2_strtoint_h(const char *, int *); int bch2_strtouint_h(const char *, unsigned int *); int bch2_strtoll_h(const char *, long long *); int bch2_strtoull_h(const char *, unsigned long long *); +int bch2_strtou64_h(const char *, u64 *); static inline int bch2_strtol_h(const char *cp, long *res) { @@ -333,7 +334,7 @@ static inline int bch2_strtoul_h(const char *cp, long *res) : type_is(var, char *) ? "%s\n" \ : "%i\n", var) -ssize_t bch2_hprint(char *buf, s64 v); +void bch2_hprint(struct printbuf *, s64); bool bch2_is_zero(const void *, size_t); -- cgit v1.2.3 From 5154704b29e58a5fd9acd601b831d99298a76a6c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 20 Jul 2018 22:27:07 -0400 Subject: bcachefs: Use deferred btree updates for inode updates Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.h | 1 + fs/bcachefs/btree_locking.h | 2 -- fs/bcachefs/fs-io.c | 35 +++++++++++++++++++++-------------- fs/bcachefs/fs.c | 23 +++++++++++++++++++++-- fs/bcachefs/fs.h | 1 + 5 files changed, 44 insertions(+), 18 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 52e0e003153b..a64ed6d32175 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -104,6 +104,7 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *, unsigned, unsigned); int bch2_btree_iter_unlock(struct btree_iter *); +bool bch2_btree_iter_relock(struct btree_iter *); bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned); bool __bch2_btree_iter_upgrade_nounlock(struct btree_iter *, unsigned); diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index 48b50e066186..c036cd0458a4 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -203,8 +203,6 @@ static inline bool bch2_btree_node_relock(struct btree_iter *iter, __bch2_btree_node_relock(iter, level); } -bool bch2_btree_iter_relock(struct btree_iter *); - void bch2_btree_node_unlock_write(struct btree *, struct btree_iter *); void __bch2_btree_node_lock_write(struct btree *, struct btree_iter *); diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 7681cfbc6bed..f8657baf0521 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -287,11 +287,11 @@ static int bch2_extent_update(struct btree_trans *trans, bool direct, s64 *total_delta) { - struct btree_iter *inode_iter = NULL; struct bch_inode_unpacked inode_u; struct bkey_inode_buf inode_p; bool allocating = false; bool extended = false; + bool inode_locked = false; s64 i_sectors_delta; int ret; @@ -314,16 +314,20 @@ static int bch2_extent_update(struct btree_trans *trans, /* XXX: inode->i_size locking */ if (i_sectors_delta || new_i_size > inode->ei_inode.bi_size) { - inode_iter = bch2_trans_get_iter(trans, - BTREE_ID_INODES, - POS(k->k.p.inode, 0), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - if (IS_ERR(inode_iter)) - return PTR_ERR(inode_iter); + bch2_btree_iter_unlock(extent_iter); + mutex_lock(&inode->ei_update_lock); - ret = bch2_btree_iter_traverse(inode_iter); - if (ret) - goto err; + if (!bch2_btree_iter_relock(extent_iter)) { + mutex_unlock(&inode->ei_update_lock); + return -EINTR; + } + + inode_locked = true; + + if (!inode->ei_inode_update) + inode->ei_inode_update = + bch2_deferred_update_alloc(trans->c, + BTREE_ID_INODES, 64); inode_u = inode->ei_inode; inode_u.bi_sectors += i_sectors_delta; @@ -337,7 +341,8 @@ static int bch2_extent_update(struct btree_trans *trans, bch2_inode_pack(&inode_p, &inode_u); bch2_trans_update(trans, - BTREE_INSERT_ENTRY(inode_iter, &inode_p.inode.k_i)); + BTREE_INSERT_DEFERRED(inode->ei_inode_update, + &inode_p.inode.k_i)); } ret = bch2_trans_commit(trans, disk_res, @@ -371,13 +376,15 @@ static int bch2_extent_update(struct btree_trans *trans, if (total_delta) *total_delta += i_sectors_delta; err: - if (!IS_ERR_OR_NULL(inode_iter)) - bch2_trans_iter_put(trans, inode_iter); + if (inode_locked) + mutex_unlock(&inode->ei_update_lock); + return ret; } static int bchfs_write_index_update(struct bch_write_op *wop) { + struct bch_fs *c = wop->c; struct bchfs_write_op *op = container_of(wop, struct bchfs_write_op, op); struct quota_res *quota_res = op->is_dio @@ -392,7 +399,7 @@ static int bchfs_write_index_update(struct bch_write_op *wop) BUG_ON(k->k.p.inode != inode->v.i_ino); - bch2_trans_init(&trans, wop->c); + bch2_trans_init(&trans, c); bch2_trans_preload_iters(&trans); iter = bch2_trans_get_iter(&trans, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 02c7543e40c8..5f93ea76785f 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -156,12 +156,18 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, inode_set_fn set, void *p) { + struct bch_fs *c = trans->c; struct btree_iter *iter; struct bkey_inode_buf *inode_p; int ret; lockdep_assert_held(&inode->ei_update_lock); + /* XXX: Don't do this with btree locks held */ + if (!inode->ei_inode_update) + inode->ei_inode_update = + bch2_deferred_update_alloc(c, BTREE_ID_INODES, 64); +#if 0 iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(inode->v.i_ino, 0), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); @@ -172,7 +178,7 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, ret = bch2_btree_iter_traverse(iter); if (ret) return ret; - +#endif *inode_u = inode->ei_inode; if (set) { @@ -186,7 +192,15 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, return PTR_ERR(inode_p); bch2_inode_pack(inode_p, inode_u); - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &inode_p->inode.k_i)); + + if (!inode->ei_inode_update) + bch2_trans_update(trans, + BTREE_INSERT_ENTRY(iter, &inode_p->inode.k_i)); + else + bch2_trans_update(trans, + BTREE_INSERT_DEFERRED(inode->ei_inode_update, + &inode_p->inode.k_i)); + return 0; } @@ -1431,6 +1445,7 @@ static struct inode *bch2_alloc_inode(struct super_block *sb) mutex_init(&inode->ei_update_lock); pagecache_lock_init(&inode->ei_pagecache_lock); mutex_init(&inode->ei_quota_lock); + inode->ei_inode_update = NULL; inode->ei_journal_seq = 0; return &inode->v; @@ -1494,6 +1509,10 @@ static void bch2_evict_inode(struct inode *vinode) BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved); + if (inode->ei_inode_update) + bch2_deferred_update_free(c, inode->ei_inode_update); + inode->ei_inode_update = NULL; + if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) { bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks), KEY_TYPE_QUOTA_WARN); diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index f949cd0d2a68..b9a8a9bc3e90 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -34,6 +34,7 @@ struct bch_inode_info { struct inode v; struct mutex ei_update_lock; + struct deferred_update *ei_inode_update; u64 ei_journal_seq; u64 ei_quota_reserved; unsigned long ei_last_dirtied; -- cgit v1.2.3 From 61f321fc8bcb844ff0b2520ba71753cb5a511a9a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 13 Mar 2019 13:31:02 -0400 Subject: bcachefs: Make deferred inode updates a mount option Journal reclaim may still need performance tuning Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 8 ++++- fs/bcachefs/fs-io.c | 79 +++++++++++++++++++++++++++++------------- fs/bcachefs/fs.c | 26 +++++++------- fs/bcachefs/opts.h | 7 +++- 4 files changed, 82 insertions(+), 38 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index bb067e4f627e..0ea4bebdd0af 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -953,6 +953,13 @@ retry: a->k.p = iter->pos; bch2_alloc_pack(a, u); + /* + * XXX: + * when using deferred btree updates, we have journal reclaim doing + * btree updates and thus requiring the allocator to make forward + * progress, and here the allocator is requiring space in the journal - + * so we need a journal pre-reservation: + */ ret = bch2_btree_insert_at(c, NULL, invalidating_cached_data ? journal_seq : NULL, BTREE_INSERT_ATOMIC| @@ -960,7 +967,6 @@ retry: BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE| BTREE_INSERT_USE_ALLOC_RESERVE| - BTREE_INSERT_JOURNAL_RESERVED| flags, BTREE_INSERT_ENTRY(iter, &a->k_i)); if (ret == -EINTR) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index eda6d71646e1..251c811abeda 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -287,6 +287,8 @@ static int bch2_extent_update(struct btree_trans *trans, bool direct, s64 *total_delta) { + struct bch_fs *c = trans->c; + struct btree_iter *inode_iter = NULL; struct bch_inode_unpacked inode_u; struct bkey_inode_buf inode_p; bool allocating = false; @@ -319,35 +321,62 @@ static int bch2_extent_update(struct btree_trans *trans, /* XXX: inode->i_size locking */ if (i_sectors_delta || new_i_size > inode->ei_inode.bi_size) { - bch2_btree_iter_unlock(extent_iter); - mutex_lock(&inode->ei_update_lock); + if (c->opts.new_inode_updates) { + bch2_btree_iter_unlock(extent_iter); + mutex_lock(&inode->ei_update_lock); - if (!bch2_btree_iter_relock(extent_iter)) { - mutex_unlock(&inode->ei_update_lock); - return -EINTR; - } + if (!bch2_btree_iter_relock(extent_iter)) { + mutex_unlock(&inode->ei_update_lock); + return -EINTR; + } - inode_locked = true; + inode_locked = true; - if (!inode->ei_inode_update) - inode->ei_inode_update = - bch2_deferred_update_alloc(trans->c, - BTREE_ID_INODES, 64); + if (!inode->ei_inode_update) + inode->ei_inode_update = + bch2_deferred_update_alloc(c, + BTREE_ID_INODES, 64); - inode_u = inode->ei_inode; - inode_u.bi_sectors += i_sectors_delta; + inode_u = inode->ei_inode; + inode_u.bi_sectors += i_sectors_delta; - /* XXX: this is slightly suspect */ - if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) && - new_i_size > inode_u.bi_size) { - inode_u.bi_size = new_i_size; - extended = true; - } + /* XXX: this is slightly suspect */ + if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) && + new_i_size > inode_u.bi_size) { + inode_u.bi_size = new_i_size; + extended = true; + } - bch2_inode_pack(&inode_p, &inode_u); - bch2_trans_update(trans, - BTREE_INSERT_DEFERRED(inode->ei_inode_update, - &inode_p.inode.k_i)); + bch2_inode_pack(&inode_p, &inode_u); + bch2_trans_update(trans, + BTREE_INSERT_DEFERRED(inode->ei_inode_update, + &inode_p.inode.k_i)); + } else { + inode_iter = bch2_trans_get_iter(trans, + BTREE_ID_INODES, + POS(k->k.p.inode, 0), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + if (IS_ERR(inode_iter)) + return PTR_ERR(inode_iter); + + ret = bch2_btree_iter_traverse(inode_iter); + if (ret) + goto err; + + inode_u = inode->ei_inode; + inode_u.bi_sectors += i_sectors_delta; + + /* XXX: this is slightly suspect */ + if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) && + new_i_size > inode_u.bi_size) { + inode_u.bi_size = new_i_size; + extended = true; + } + + bch2_inode_pack(&inode_p, &inode_u); + bch2_trans_update(trans, + BTREE_INSERT_ENTRY(inode_iter, &inode_p.inode.k_i)); + } } ret = bch2_trans_commit(trans, disk_res, @@ -376,11 +405,13 @@ static int bch2_extent_update(struct btree_trans *trans, } if (direct) - i_sectors_acct(trans->c, inode, quota_res, i_sectors_delta); + i_sectors_acct(c, inode, quota_res, i_sectors_delta); if (total_delta) *total_delta += i_sectors_delta; err: + if (!IS_ERR_OR_NULL(inode_iter)) + bch2_trans_iter_put(trans, inode_iter); if (inode_locked) mutex_unlock(&inode->ei_update_lock); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 5f93ea76785f..dc55d36ecfd5 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -163,22 +163,24 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, lockdep_assert_held(&inode->ei_update_lock); + if (c->opts.new_inode_updates) { /* XXX: Don't do this with btree locks held */ if (!inode->ei_inode_update) inode->ei_inode_update = bch2_deferred_update_alloc(c, BTREE_ID_INODES, 64); -#if 0 - iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, - POS(inode->v.i_ino, 0), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - if (IS_ERR(iter)) - return PTR_ERR(iter); - - /* The btree node lock is our lock on the inode: */ - ret = bch2_btree_iter_traverse(iter); - if (ret) - return ret; -#endif + } else { + iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, + POS(inode->v.i_ino, 0), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + if (IS_ERR(iter)) + return PTR_ERR(iter); + + /* The btree node lock is our lock on the inode: */ + ret = bch2_btree_iter_traverse(iter); + if (ret) + return ret; + } + *inode_u = inode->ei_inode; if (set) { diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index f4cb0625c3cc..53bf06e70cd5 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -295,7 +295,12 @@ enum opt_type { OPT_UINT(0, BCH_REPLICAS_MAX), \ NO_SB_OPT, 1, \ "n", "Data written to this device will be considered\n"\ - "to have already been replicated n times") + "to have already been replicated n times") \ + x(new_inode_updates, u8, \ + OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false, \ + NULL, "Enable new btree write-cache for inode updates") struct bch_opts { -- cgit v1.2.3 From 134915f3d38d830374603b84a9fe2e280f4814ed Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 21 Mar 2019 22:19:57 -0400 Subject: bcachefs: Go rw lazily Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 32 ++--------- fs/bcachefs/bcachefs.h | 14 +---- fs/bcachefs/btree_update.h | 2 + fs/bcachefs/btree_update_leaf.c | 19 +++++- fs/bcachefs/fs.c | 10 ++-- fs/bcachefs/fsck.c | 16 ++++-- fs/bcachefs/journal.c | 2 - fs/bcachefs/journal_io.c | 2 + fs/bcachefs/recovery.c | 25 +++----- fs/bcachefs/super-io.c | 11 ++-- fs/bcachefs/super-io.h | 3 +- fs/bcachefs/super.c | 124 +++++++++++++++++++++++++++++----------- fs/bcachefs/super.h | 4 +- fs/bcachefs/sysfs.c | 4 +- 14 files changed, 153 insertions(+), 115 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index c11136506352..da25a1ed5206 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -345,6 +345,7 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k) ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW| BTREE_INSERT_JOURNAL_REPLAY| BTREE_INSERT_NOMARK); err: @@ -1626,7 +1627,7 @@ static bool bch2_fs_allocator_start_fast(struct bch_fs *c) return ret; } -static int __bch2_fs_allocator_start(struct bch_fs *c) +int bch2_fs_allocator_start(struct bch_fs *c) { struct bch_dev *ca; unsigned dev_iter; @@ -1635,6 +1636,10 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) long bu; int ret = 0; + if (!test_alloc_startup(c) && + bch2_fs_allocator_start_fast(c)) + return 0; + pr_debug("not enough empty buckets; scanning for reclaimable buckets"); /* @@ -1709,31 +1714,6 @@ err: return ret; } -int bch2_fs_allocator_start(struct bch_fs *c) -{ - struct bch_dev *ca; - unsigned i; - int ret; - - ret = bch2_fs_allocator_start_fast(c) ? 0 : - __bch2_fs_allocator_start(c); - if (ret) - return ret; - - set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags); - - for_each_rw_member(ca, c, i) { - ret = bch2_dev_allocator_start(ca); - if (ret) { - percpu_ref_put(&ca->io_ref); - return ret; - } - } - - set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); - return 0; -} - void bch2_fs_allocator_background_init(struct bch_fs *c) { spin_lock_init(&c->freelist_lock); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 5a9b776558f6..5eae18e92bd5 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -486,6 +486,7 @@ enum { BCH_FS_INITIAL_GC_DONE, BCH_FS_FSCK_DONE, BCH_FS_STARTED, + BCH_FS_RW, /* shutdown: */ BCH_FS_EMERGENCY_RO, @@ -510,13 +511,6 @@ struct btree_debug { struct dentry *failed; }; -enum bch_fs_state { - BCH_FS_STARTING = 0, - BCH_FS_STOPPING, - BCH_FS_RO, - BCH_FS_RW, -}; - struct bch_fs_pcpu { u64 sectors_available; }; @@ -538,7 +532,6 @@ struct bch_fs { /* ro/rw, add/remove devices: */ struct mutex state_lock; - enum bch_fs_state state; /* Counts outstanding writes, for clean transition to read-only */ struct percpu_ref writes; @@ -800,11 +793,6 @@ static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages) #endif } -static inline bool bch2_fs_running(struct bch_fs *c) -{ - return c->state == BCH_FS_RO || c->state == BCH_FS_RW; -} - static inline unsigned bucket_bytes(const struct bch_dev *ca) { return ca->mi.bucket_size << 9; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 4d7cef75a017..879e7ae39586 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -38,6 +38,7 @@ enum { __BTREE_INSERT_NOUNLOCK, __BTREE_INSERT_NOFAIL, __BTREE_INSERT_NOCHECK_RW, + __BTREE_INSERT_LAZY_RW, __BTREE_INSERT_USE_RESERVE, __BTREE_INSERT_USE_ALLOC_RESERVE, __BTREE_INSERT_JOURNAL_REPLAY, @@ -64,6 +65,7 @@ enum { #define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL) #define BTREE_INSERT_NOCHECK_RW (1 << __BTREE_INSERT_NOCHECK_RW) +#define BTREE_INSERT_LAZY_RW (1 << __BTREE_INSERT_LAZY_RW) /* for copygc, or when merging btree nodes */ #define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE) diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index a05fd7104a72..9c1ca9ad3ead 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -849,8 +849,23 @@ int bch2_trans_commit(struct btree_trans *trans, btree_insert_entry_checks(trans, i); if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) && - !percpu_ref_tryget(&c->writes))) - return -EROFS; + !percpu_ref_tryget(&c->writes))) { + if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW))) + return -EROFS; + + btree_trans_unlock(trans); + + ret = bch2_fs_read_write_early(c); + if (ret) + return ret; + + percpu_ref_get(&c->writes); + + if (!btree_trans_relock(trans)) { + ret = -EINTR; + goto err; + } + } retry: ret = bch2_trans_journal_preres_get(trans); if (ret) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index dc55d36ecfd5..2f01d97470b1 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1616,7 +1616,7 @@ static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * cons mutex_lock(&c->state_lock); - if (!bch2_fs_running(c)) { + if (!test_bit(BCH_FS_STARTED, &c->flags)) { mutex_unlock(&c->state_lock); closure_put(&c->cl); pr_err("err mounting %s: incomplete filesystem", dev_name); @@ -1672,8 +1672,6 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) return ret; if (opts.read_only != c->opts.read_only) { - const char *err = NULL; - mutex_lock(&c->state_lock); if (opts.read_only) { @@ -1681,9 +1679,9 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) sb->s_flags |= SB_RDONLY; } else { - err = bch2_fs_read_write(c); - if (err) { - bch_err(c, "error going rw: %s", err); + ret = bch2_fs_read_write(c); + if (ret) { + bch_err(c, "error going rw: %i", ret); return -EINVAL; } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 2561773cd6dc..439f758d8178 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -174,7 +174,8 @@ static int hash_redo_key(const struct bch_hash_desc desc, bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode, tmp, BCH_HASH_SET_MUST_CREATE); ret = bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL); + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); err: kfree(tmp); return ret; @@ -204,7 +205,8 @@ retry: ret = bch2_hash_delete_at(&trans, desc, info, iter) ?: bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| - BTREE_INSERT_NOFAIL); + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); err: if (ret == -EINTR) goto retry; @@ -365,7 +367,9 @@ static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h, buf, strlen(buf), d->v.d_name, len)) { bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &d->k_i)); - ret = bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); + ret = bch2_trans_commit(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); if (ret) goto err; @@ -630,7 +634,8 @@ static int check_dirents(struct bch_fs *c) BTREE_INSERT_ENTRY(iter, &n->k_i)); ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL); + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); kfree(n); if (ret) goto err; @@ -1268,7 +1273,8 @@ static int check_inode(struct btree_trans *trans, bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &p.inode.k_i)); ret = bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL); + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); if (ret && ret != -EINTR) bch_err(c, "error in fs gc: error %i " "updating inode", ret); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 64f9c5740ec8..c0dcc0ff65ce 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1027,8 +1027,6 @@ void bch2_fs_journal_start(struct journal *j) * only have to go down with the next journal entry we write: */ bch2_journal_seq_blacklist_write(j); - - queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0); } /* init/exit: */ diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index d20672a37fd3..1bb627c05188 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -861,6 +861,7 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k) ret = bch2_trans_commit(&trans, &disk_res, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW| BTREE_INSERT_JOURNAL_REPLAY); } while ((!ret || ret == -EINTR) && bkey_cmp(k->k.p, iter->pos)); @@ -906,6 +907,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) ret = bch2_btree_insert(c, entry->btree_id, k, NULL, NULL, BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW| BTREE_INSERT_JOURNAL_REPLAY| BTREE_INSERT_NOMARK); break; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d7be535f3cc1..f7e3060428cf 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -119,8 +119,13 @@ static int verify_superblock_clean(struct bch_fs *c, if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c, "superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown", le64_to_cpu(clean->journal_seq), - le64_to_cpu(j->seq))) - bch2_fs_mark_clean(c, false); + le64_to_cpu(j->seq))) { + ret = bch2_fs_mark_dirty(c); + if (ret) { + bch_err(c, "error going rw"); + return ret; + } + } mustfix_fsck_err_on(j->read_clock != clean->read_clock, c, "superblock read clock doesn't match journal after clean shutdown"); @@ -331,13 +336,6 @@ int bch2_fs_recovery(struct bch_fs *c) if (c->opts.noreplay) goto out; - /* - * Mark dirty before journal replay, fsck: - * XXX: after a clean shutdown, this could be done lazily only when fsck - * finds an error - */ - bch2_fs_mark_clean(c, false); - /* * bch2_fs_journal_start() can't happen sooner, or btree_gc_finish() * will give spurious errors about oldest_gen > bucket_gen - @@ -345,11 +343,6 @@ int bch2_fs_recovery(struct bch_fs *c) */ bch2_fs_journal_start(&c->journal); - err = "error starting allocator"; - ret = bch2_fs_allocator_start(c); - if (ret) - goto err; - bch_verbose(c, "starting journal replay:"); err = "journal replay failed"; ret = bch2_journal_replay(c, &journal); @@ -436,8 +429,8 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_fs_journal_start(&c->journal); bch2_journal_set_replay_done(&c->journal); - err = "error starting allocator"; - ret = bch2_fs_allocator_start(c); + err = "error going read write"; + ret = bch2_fs_read_write_early(c); if (ret) goto err; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index c89fe5d630e4..dec6a737f44f 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -886,7 +886,7 @@ void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write) bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write); } -static void bch2_fs_mark_dirty(struct bch_fs *c) +int bch2_fs_mark_dirty(struct bch_fs *c) { mutex_lock(&c->sb_lock); if (BCH_SB_CLEAN(c->disk_sb.sb) || @@ -896,6 +896,8 @@ static void bch2_fs_mark_dirty(struct bch_fs *c) bch2_write_super(c); } mutex_unlock(&c->sb_lock); + + return 0; } struct jset_entry * @@ -997,17 +999,12 @@ bch2_journal_super_entries_add_common(struct bch_fs *c, return entry; } -void bch2_fs_mark_clean(struct bch_fs *c, bool clean) +void bch2_fs_mark_clean(struct bch_fs *c) { struct bch_sb_field_clean *sb_clean; struct jset_entry *entry; unsigned u64s; - if (!clean) { - bch2_fs_mark_dirty(c); - return; - } - mutex_lock(&c->sb_lock); if (BCH_SB_CLEAN(c->disk_sb.sb)) goto out; diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index 498a9e887d4e..afc92d14c254 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -141,7 +141,8 @@ bch2_journal_super_entries_add_common(struct bch_fs *, void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int); -void bch2_fs_mark_clean(struct bch_fs *, bool); +int bch2_fs_mark_dirty(struct bch_fs *); +void bch2_fs_mark_clean(struct bch_fs *); void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, struct bch_sb_field *); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index e8242bb70b93..5364b95cfec9 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -258,8 +258,10 @@ static void bch2_writes_disabled(struct percpu_ref *writes) void bch2_fs_read_only(struct bch_fs *c) { - if (c->state == BCH_FS_RO) + if (!test_bit(BCH_FS_RW, &c->flags)) { + cancel_delayed_work_sync(&c->journal.reclaim_work); return; + } BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags)); @@ -301,10 +303,9 @@ void bch2_fs_read_only(struct bch_fs *c) !test_bit(BCH_FS_ERROR, &c->flags) && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) && test_bit(BCH_FS_STARTED, &c->flags)) - bch2_fs_mark_clean(c, true); + bch2_fs_mark_clean(c); - if (c->state != BCH_FS_STOPPING) - c->state = BCH_FS_RO; + clear_bit(BCH_FS_RW, &c->flags); } static void bch2_fs_read_only_work(struct work_struct *work) @@ -333,55 +334,106 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c) return ret; } -const char *bch2_fs_read_write(struct bch_fs *c) +static int bch2_fs_read_write_late(struct bch_fs *c) { struct bch_dev *ca; - const char *err = NULL; unsigned i; + int ret; - if (c->state == BCH_FS_RW) - return NULL; + ret = bch2_gc_thread_start(c); + if (ret) { + bch_err(c, "error starting gc thread"); + return ret; + } + + for_each_rw_member(ca, c, i) { + ret = bch2_copygc_start(c, ca); + if (ret) { + bch_err(c, "error starting copygc threads"); + percpu_ref_put(&ca->io_ref); + return ret; + } + } + + ret = bch2_rebalance_start(c); + if (ret) { + bch_err(c, "error starting rebalance thread"); + return ret; + } + + schedule_delayed_work(&c->pd_controllers_update, 5 * HZ); + + return 0; +} + +static int __bch2_fs_read_write(struct bch_fs *c, bool early) +{ + struct bch_dev *ca; + unsigned i; + int ret; + + if (test_bit(BCH_FS_RW, &c->flags)) + return 0; - bch2_fs_mark_clean(c, false); + ret = bch2_fs_mark_dirty(c); + if (ret) + goto err; for_each_rw_member(ca, c, i) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); - err = "error starting allocator thread"; - for_each_rw_member(ca, c, i) - if (bch2_dev_allocator_start(ca)) { - percpu_ref_put(&ca->io_ref); + if (!test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags)) { + ret = bch2_fs_allocator_start(c); + if (ret) { + bch_err(c, "error initializing allocator"); goto err; } - set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); - - err = "error starting btree GC thread"; - if (bch2_gc_thread_start(c)) - goto err; + set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags); + } - err = "error starting copygc thread"; - for_each_rw_member(ca, c, i) - if (bch2_copygc_start(c, ca)) { + for_each_rw_member(ca, c, i) { + ret = bch2_dev_allocator_start(ca); + if (ret) { + bch_err(c, "error starting allocator threads"); percpu_ref_put(&ca->io_ref); goto err; } + } - err = "error starting rebalance thread"; - if (bch2_rebalance_start(c)) - goto err; + set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); - schedule_delayed_work(&c->pd_controllers_update, 5 * HZ); + if (!early) { + ret = bch2_fs_read_write_late(c); + if (ret) + goto err; + } - if (c->state != BCH_FS_STARTING) - percpu_ref_reinit(&c->writes); + percpu_ref_reinit(&c->writes); + set_bit(BCH_FS_RW, &c->flags); - c->state = BCH_FS_RW; - return NULL; + queue_delayed_work(c->journal_reclaim_wq, + &c->journal.reclaim_work, 0); + return 0; err: __bch2_fs_read_only(c); - return err; + return ret; +} + +int bch2_fs_read_write(struct bch_fs *c) +{ + return __bch2_fs_read_write(c, false); +} + +int bch2_fs_read_write_early(struct bch_fs *c) +{ + lockdep_assert_held(&c->state_lock); + + if (c->opts.read_only) + return -EROFS; + + return __bch2_fs_read_write(c, true); } /* Filesystem startup/shutdown: */ @@ -638,7 +690,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || !(c->journal_reclaim_wq = alloc_workqueue("bcache_journal", WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || - percpu_ref_init(&c->writes, bch2_writes_disabled, 0, GFP_KERNEL) || + percpu_ref_init(&c->writes, bch2_writes_disabled, + PERCPU_REF_INIT_DEAD, GFP_KERNEL) || mempool_init_kmalloc_pool(&c->btree_reserve_pool, 1, sizeof(struct btree_reserve)) || mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1, @@ -712,7 +765,7 @@ const char *bch2_fs_start(struct bch_fs *c) mutex_lock(&c->state_lock); - BUG_ON(c->state != BCH_FS_STARTING); + BUG_ON(test_bit(BCH_FS_STARTED, &c->flags)); mutex_lock(&c->sb_lock); @@ -746,9 +799,12 @@ const char *bch2_fs_start(struct bch_fs *c) if (c->opts.read_only) { bch2_fs_read_only(c); } else { - err = bch2_fs_read_write(c); - if (err) + if (!test_bit(BCH_FS_RW, &c->flags) + ? bch2_fs_read_write(c) + : bch2_fs_read_write_late(c)) { + err = "error going read write"; goto err; + } } set_bit(BCH_FS_STARTED, &c->flags); diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index 3f730164ca69..91df0d729322 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -217,7 +217,9 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *); bool bch2_fs_emergency_read_only(struct bch_fs *); void bch2_fs_read_only(struct bch_fs *); -const char *bch2_fs_read_write(struct bch_fs *); + +int bch2_fs_read_write(struct bch_fs *); +int bch2_fs_read_write_early(struct bch_fs *); void bch2_fs_stop(struct bch_fs *); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 361f7b7addcf..f1e269671374 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -289,7 +289,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) compressed_sectors_compressed = 0, compressed_sectors_uncompressed = 0; - if (!bch2_fs_running(c)) + if (!test_bit(BCH_FS_STARTED, &c->flags)) return -EPERM; for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k) @@ -482,7 +482,7 @@ STORE(__bch2_fs) BCH_DEBUG_PARAMS() #undef BCH_DEBUG_PARAM - if (!bch2_fs_running(c)) + if (!test_bit(BCH_FS_STARTED, &c->flags)) return -EPERM; /* Debugging: */ -- cgit v1.2.3 From 03e183cb5d429a3bb53816d70da7c19f0745909e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 21 Mar 2019 23:13:46 -0400 Subject: bcachefs: Verify fs hasn't been modified before going rw Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/fs.c | 1 + fs/bcachefs/recovery.c | 15 +++++---- fs/bcachefs/super-io.c | 77 ++++++++++++++++++++++++++++++++++++++--------- fs/bcachefs/super-io.h | 2 +- fs/bcachefs/super.c | 4 ++- fs/bcachefs/super.h | 1 + fs/bcachefs/super_types.h | 1 + 8 files changed, 78 insertions(+), 24 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 5eae18e92bd5..a815d7a488a6 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -390,6 +390,7 @@ struct bch_dev { char name[BDEVNAME_SIZE]; struct bch_sb_handle disk_sb; + struct bch_sb *sb_read_scratch; int sb_write_error; struct bch_devs_mask self; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 2f01d97470b1..2a5a90b2a781 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1682,6 +1682,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) ret = bch2_fs_read_write(c); if (ret) { bch_err(c, "error going rw: %i", ret); + mutex_unlock(&c->state_lock); return -EINVAL; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index f7e3060428cf..93c4d5887e8b 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -107,10 +107,11 @@ static int journal_replay_entry_early(struct bch_fs *c, } static int verify_superblock_clean(struct bch_fs *c, - struct bch_sb_field_clean *clean, + struct bch_sb_field_clean **cleanp, struct jset *j) { unsigned i; + struct bch_sb_field_clean *clean = *cleanp; int ret = 0; if (!clean || !j) @@ -120,11 +121,9 @@ static int verify_superblock_clean(struct bch_fs *c, "superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown", le64_to_cpu(clean->journal_seq), le64_to_cpu(j->seq))) { - ret = bch2_fs_mark_dirty(c); - if (ret) { - bch_err(c, "error going rw"); - return ret; - } + kfree(clean); + *cleanp = NULL; + return 0; } mustfix_fsck_err_on(j->read_clock != clean->read_clock, c, @@ -236,7 +235,7 @@ int bch2_fs_recovery(struct bch_fs *c) BUG_ON(ret); } - ret = verify_superblock_clean(c, clean, j); + ret = verify_superblock_clean(c, &clean, j); if (ret) goto err; @@ -430,7 +429,7 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_journal_set_replay_done(&c->journal); err = "error going read write"; - ret = bch2_fs_read_write_early(c); + ret = __bch2_fs_read_write(c, true); if (ret) goto err; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index dec6a737f44f..f504743fff4d 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -509,6 +509,8 @@ reread: if (bch2_crc_cmp(csum, sb->sb->csum)) return "bad checksum reading superblock"; + sb->seq = le64_to_cpu(sb->sb->seq); + return NULL; } @@ -642,6 +644,25 @@ static void write_super_endio(struct bio *bio) percpu_ref_put(&ca->io_ref); } +static void read_back_super(struct bch_fs *c, struct bch_dev *ca) +{ + struct bch_sb *sb = ca->disk_sb.sb; + struct bio *bio = ca->disk_sb.bio; + + bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META); + bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]); + bio->bi_iter.bi_size = 4096; + bio->bi_end_io = write_super_endio; + bio->bi_private = ca; + bch2_bio_map(bio, ca->sb_read_scratch); + + this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_SB], + bio_sectors(bio)); + + percpu_ref_get(&ca->io_ref); + closure_bio_submit(bio, &c->sb_write); +} + static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) { struct bch_sb *sb = ca->disk_sb.sb; @@ -669,7 +690,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) closure_bio_submit(bio, &c->sb_write); } -void bch2_write_super(struct bch_fs *c) +int bch2_write_super(struct bch_fs *c) { struct closure *cl = &c->sb_write; struct bch_dev *ca; @@ -677,6 +698,7 @@ void bch2_write_super(struct bch_fs *c) const char *err; struct bch_devs_mask sb_written; bool wrote, can_mount_without_written, can_mount_with_written; + int ret = 0; lockdep_assert_held(&c->sb_lock); @@ -692,6 +714,7 @@ void bch2_write_super(struct bch_fs *c) err = bch2_sb_validate(&ca->disk_sb); if (err) { bch2_fs_inconsistent(c, "sb invalid before write: %s", err); + ret = -1; goto out; } } @@ -705,10 +728,27 @@ void bch2_write_super(struct bch_fs *c) ca->sb_write_error = 0; } + for_each_online_member(ca, c, i) + read_back_super(c, ca); + closure_sync(cl); + + for_each_online_member(ca, c, i) { + if (!ca->sb_write_error && + ca->disk_sb.seq != + le64_to_cpu(ca->sb_read_scratch->seq)) { + bch2_fs_fatal_error(c, + "Superblock modified by another process"); + percpu_ref_put(&ca->io_ref); + ret = -EROFS; + goto out; + } + } + do { wrote = false; for_each_online_member(ca, c, i) - if (sb < ca->disk_sb.sb->layout.nr_superblocks) { + if (!ca->sb_write_error && + sb < ca->disk_sb.sb->layout.nr_superblocks) { write_one_super(c, ca, sb); wrote = true; } @@ -716,9 +756,12 @@ void bch2_write_super(struct bch_fs *c) sb++; } while (wrote); - for_each_online_member(ca, c, i) + for_each_online_member(ca, c, i) { if (ca->sb_write_error) __clear_bit(ca->dev_idx, sb_written.d); + else + ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq); + } nr_wrote = dev_mask_nr(&sb_written); @@ -741,13 +784,15 @@ void bch2_write_super(struct bch_fs *c) * written anything (new filesystem), we continue if we'd be able to * mount with the devices we did successfully write to: */ - bch2_fs_fatal_err_on(!nr_wrote || - (can_mount_without_written && - !can_mount_with_written), c, - "Unable to write superblock to sufficient devices"); + if (bch2_fs_fatal_err_on(!nr_wrote || + (can_mount_without_written && + !can_mount_with_written), c, + "Unable to write superblock to sufficient devices")) + ret = -1; out: /* Make new options visible after they're persistent: */ bch2_sb_update(c); + return ret; } /* BCH_SB_FIELD_journal: */ @@ -888,16 +933,20 @@ void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write) int bch2_fs_mark_dirty(struct bch_fs *c) { + int ret; + + /* + * Unconditionally write superblock, to verify it hasn't changed before + * we go rw: + */ + mutex_lock(&c->sb_lock); - if (BCH_SB_CLEAN(c->disk_sb.sb) || - (c->disk_sb.sb->compat[0] & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO))) { - SET_BCH_SB_CLEAN(c->disk_sb.sb, false); - c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO); - bch2_write_super(c); - } + SET_BCH_SB_CLEAN(c->disk_sb.sb, false); + c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO); + ret = bch2_write_super(c); mutex_unlock(&c->sb_lock); - return 0; + return ret; } struct jset_entry * diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index afc92d14c254..31b8b8307ac3 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -89,7 +89,7 @@ int bch2_sb_realloc(struct bch_sb_handle *, unsigned); const char *bch2_sb_validate(struct bch_sb_handle *); int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *); -void bch2_write_super(struct bch_fs *); +int bch2_write_super(struct bch_fs *); /* BCH_SB_FIELD_journal: */ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 5364b95cfec9..dd1496af9a06 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -366,7 +366,7 @@ static int bch2_fs_read_write_late(struct bch_fs *c) return 0; } -static int __bch2_fs_read_write(struct bch_fs *c, bool early) +int __bch2_fs_read_write(struct bch_fs *c, bool early) { struct bch_dev *ca; unsigned i; @@ -907,6 +907,7 @@ static void bch2_dev_free(struct bch_dev *ca) free_percpu(ca->io_done); bioset_exit(&ca->replica_set); bch2_dev_buckets_free(ca); + kfree(ca->sb_read_scratch); bch2_time_stats_exit(&ca->io_latency[WRITE]); bch2_time_stats_exit(&ca->io_latency[READ]); @@ -1017,6 +1018,7 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, 0, GFP_KERNEL) || percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete, PERCPU_REF_INIT_DEAD, GFP_KERNEL) || + !(ca->sb_read_scratch = kmalloc(4096, GFP_KERNEL)) || bch2_dev_buckets_alloc(c, ca) || bioset_init(&ca->replica_set, 4, offsetof(struct bch_write_bio, bio), 0) || diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index 91df0d729322..92ef3e7c8dc2 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -218,6 +218,7 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *); bool bch2_fs_emergency_read_only(struct bch_fs *); void bch2_fs_read_only(struct bch_fs *); +int __bch2_fs_read_write(struct bch_fs *, bool); int bch2_fs_read_write(struct bch_fs *); int bch2_fs_read_write_early(struct bch_fs *); diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h index 04a15729a244..6d0168a73ee4 100644 --- a/fs/bcachefs/super_types.h +++ b/fs/bcachefs/super_types.h @@ -12,6 +12,7 @@ struct bch_sb_handle { unsigned have_layout:1; unsigned have_bio:1; unsigned fs_sb:1; + u64 seq; }; struct bch_devs_mask { -- cgit v1.2.3 From 424eb881300467a21a108d04c9dd08a6f8c007dc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 25 Mar 2019 15:10:15 -0400 Subject: bcachefs: Only get btree iters from btree transactions Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 16 ++-- fs/bcachefs/btree_gc.c | 34 +++++---- fs/bcachefs/btree_io.c | 18 +++-- fs/bcachefs/btree_iter.c | 63 ++++++++++++---- fs/bcachefs/btree_iter.h | 53 ++++++------- fs/bcachefs/btree_update_leaf.c | 2 +- fs/bcachefs/chardev.c | 4 +- fs/bcachefs/debug.c | 41 ++++++---- fs/bcachefs/dirent.c | 19 +++-- fs/bcachefs/ec.c | 39 ++++++---- fs/bcachefs/extents.c | 11 ++- fs/bcachefs/fs-io.c | 59 +++++++++------ fs/bcachefs/fs.c | 11 ++- fs/bcachefs/fsck.c | 146 ++++++++++++++++++------------------ fs/bcachefs/fsck.h | 1 - fs/bcachefs/inode.c | 12 +-- fs/bcachefs/io.c | 57 ++++++++------ fs/bcachefs/journal_seq_blacklist.c | 17 +++-- fs/bcachefs/migrate.c | 22 +++--- fs/bcachefs/move.c | 72 ++++++++++++------ fs/bcachefs/move_types.h | 3 +- fs/bcachefs/quota.c | 18 +++-- fs/bcachefs/rebalance.c | 4 +- fs/bcachefs/str_hash.h | 46 ++++++++---- fs/bcachefs/sysfs.c | 9 ++- fs/bcachefs/tests.c | 121 ++++++++++++++++++------------ fs/bcachefs/xattr.c | 10 ++- 27 files changed, 550 insertions(+), 358 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index da25a1ed5206..436eb1e1ab07 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -264,18 +264,21 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k) int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list) { struct journal_replay *r; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct bch_dev *ca; unsigned i; int ret; - for_each_btree_key(&iter, c, BTREE_ID_ALLOC, POS_MIN, 0, k) { + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_ALLOC, POS_MIN, 0, k) { bch2_alloc_read_key(c, k); - bch2_btree_iter_cond_resched(&iter); + bch2_trans_cond_resched(&trans); } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_exit(&trans); if (ret) return ret; @@ -391,8 +394,6 @@ static int __bch2_alloc_write_key(struct btree_trans *trans, struct bch_dev *ca, __alloc_write_key(a, g, m); percpu_up_read(&c->mark_lock); - bch2_btree_iter_cond_resched(iter); - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i)); ret = bch2_trans_commit(trans, NULL, journal_seq, @@ -450,6 +451,7 @@ int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote) if (ret) break; + bch2_trans_cond_resched(&trans); *wrote = true; } up_read(&ca->bucket_lock); @@ -938,8 +940,6 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans, spin_unlock(&c->freelist_lock); percpu_up_read(&c->mark_lock); - bch2_btree_iter_cond_resched(iter); - BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b)); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 302793d84b92..aa8ac7d661ee 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -207,13 +207,16 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, bool initial) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct btree *b; struct range_checks r; unsigned depth = btree_node_type_needs_gc(btree_id) ? 0 : 1; u8 max_stale; int ret = 0; + bch2_trans_init(&trans, c); + gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0)); /* @@ -227,7 +230,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, btree_node_range_checks_init(&r, depth); - __for_each_btree_node(&iter, c, btree_id, POS_MIN, + __for_each_btree_node(&trans, iter, btree_id, POS_MIN, 0, depth, BTREE_ITER_PREFETCH, b) { btree_node_range_checks(c, b, &r); @@ -241,22 +244,22 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, if (!initial) { if (max_stale > 64) - bch2_btree_node_rewrite(c, &iter, + bch2_btree_node_rewrite(c, iter, b->data->keys.seq, BTREE_INSERT_USE_RESERVE| BTREE_INSERT_NOWAIT| BTREE_INSERT_GC_LOCK_HELD); else if (!btree_gc_rewrite_disabled(c) && (btree_gc_always_rewrite(c) || max_stale > 16)) - bch2_btree_node_rewrite(c, &iter, + bch2_btree_node_rewrite(c, iter, b->data->keys.seq, BTREE_INSERT_NOWAIT| BTREE_INSERT_GC_LOCK_HELD); } - bch2_btree_iter_cond_resched(&iter); + bch2_trans_cond_resched(&trans); } - ret = bch2_btree_iter_unlock(&iter) ?: ret; + ret = bch2_trans_exit(&trans) ?: ret; if (ret) return ret; @@ -1030,7 +1033,8 @@ next: static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct btree *b; bool kthread = (current->flags & PF_KTHREAD) != 0; unsigned i; @@ -1039,6 +1043,8 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) struct btree *merge[GC_MERGE_NODES]; u32 lock_seq[GC_MERGE_NODES]; + bch2_trans_init(&trans, c); + /* * XXX: We don't have a good way of positively matching on sibling nodes * that have the same parent - this code works by handling the cases @@ -1048,7 +1054,7 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) */ memset(merge, 0, sizeof(merge)); - __for_each_btree_node(&iter, c, btree_id, POS_MIN, + __for_each_btree_node(&trans, iter, btree_id, POS_MIN, BTREE_MAX_DEPTH, 0, BTREE_ITER_PREFETCH, b) { memmove(merge + 1, merge, @@ -1070,7 +1076,7 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) } memset(merge + i, 0, (GC_MERGE_NODES - i) * sizeof(merge[0])); - bch2_coalesce_nodes(c, &iter, merge); + bch2_coalesce_nodes(c, iter, merge); for (i = 1; i < GC_MERGE_NODES && merge[i]; i++) { lock_seq[i] = merge[i]->lock.state.seq; @@ -1080,23 +1086,23 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) lock_seq[0] = merge[0]->lock.state.seq; if (kthread && kthread_should_stop()) { - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return -ESHUTDOWN; } - bch2_btree_iter_cond_resched(&iter); + bch2_trans_cond_resched(&trans); /* * If the parent node wasn't relocked, it might have been split * and the nodes in our sliding window might not have the same * parent anymore - blow away the sliding window: */ - if (btree_iter_node(&iter, iter.level + 1) && - !btree_node_intent_locked(&iter, iter.level + 1)) + if (btree_iter_node(iter, iter->level + 1) && + !btree_node_intent_locked(iter, iter->level + 1)) memset(merge + 1, 0, (GC_MERGE_NODES - 1) * sizeof(merge[0])); } - return bch2_btree_iter_unlock(&iter); + return bch2_trans_exit(&trans); } /** diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index d785e6ac22f7..10b3d53b6ebb 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1153,19 +1153,21 @@ static void bch2_btree_node_write_error(struct bch_fs *c, struct bkey_i_btree_ptr *new_key; struct bkey_s_btree_ptr bp; struct bch_extent_ptr *ptr; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; int ret; - __bch2_btree_iter_init(&iter, c, b->btree_id, b->key.k.p, - BTREE_MAX_DEPTH, - b->level, BTREE_ITER_NODES); + bch2_trans_init(&trans, c); + + iter = bch2_trans_get_node_iter(&trans, b->btree_id, b->key.k.p, + BTREE_MAX_DEPTH, b->level, 0); retry: - ret = bch2_btree_iter_traverse(&iter); + ret = bch2_btree_iter_traverse(iter); if (ret) goto err; /* has node been freed? */ - if (iter.l[b->level].b != b) { + if (iter->l[b->level].b != b) { /* node has been freed: */ BUG_ON(!btree_node_dying(b)); goto out; @@ -1184,13 +1186,13 @@ retry: if (!bch2_bkey_nr_ptrs(bp.s_c)) goto err; - ret = bch2_btree_node_update_key(c, &iter, b, new_key); + ret = bch2_btree_node_update_key(c, iter, b, new_key); if (ret == -EINTR) goto retry; if (ret) goto err; out: - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); bio_put(&wbio->wbio.bio); btree_node_write_done(c, b); return; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 3d613e8cd55b..b2446b14bf33 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1582,15 +1582,15 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) return __bch2_btree_iter_peek_slot(iter); } -void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c, - enum btree_id btree_id, struct bpos pos, - unsigned locks_want, unsigned depth, - unsigned flags) +static inline void bch2_btree_iter_init(struct btree_iter *iter, + struct bch_fs *c, enum btree_id btree_id, + struct bpos pos, unsigned flags) { unsigned i; - EBUG_ON(depth >= BTREE_MAX_DEPTH); - EBUG_ON(locks_want > BTREE_MAX_DEPTH); + if (btree_id == BTREE_ID_EXTENTS && + !(flags & BTREE_ITER_NODES)) + flags |= BTREE_ITER_IS_EXTENTS; iter->c = c; iter->pos = pos; @@ -1599,8 +1599,8 @@ void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c, iter->flags = flags; iter->uptodate = BTREE_ITER_NEED_TRAVERSE; iter->btree_id = btree_id; - iter->level = depth; - iter->locks_want = locks_want; + iter->level = 0; + iter->locks_want = flags & BTREE_ITER_INTENT ? 1 : 0; iter->nodes_locked = 0; iter->nodes_intent_locked = 0; for (i = 0; i < ARRAY_SIZE(iter->l); i++) @@ -1677,12 +1677,14 @@ static inline unsigned btree_trans_iter_idx(struct btree_trans *trans, return idx; } -void bch2_trans_iter_put(struct btree_trans *trans, - struct btree_iter *iter) +int bch2_trans_iter_put(struct btree_trans *trans, + struct btree_iter *iter) { ssize_t idx = btree_trans_iter_idx(trans, iter); + int ret = (iter->flags & BTREE_ITER_ERROR) ? -EIO : 0; trans->iters_live &= ~(1ULL << idx); + return ret; } static inline void __bch2_trans_iter_free(struct btree_trans *trans, @@ -1696,17 +1698,23 @@ static inline void __bch2_trans_iter_free(struct btree_trans *trans, bch2_btree_iter_unlink(&trans->iters[idx]); } -void bch2_trans_iter_free(struct btree_trans *trans, - struct btree_iter *iter) +int bch2_trans_iter_free(struct btree_trans *trans, + struct btree_iter *iter) { + int ret = (iter->flags & BTREE_ITER_ERROR) ? -EIO : 0; + __bch2_trans_iter_free(trans, btree_trans_iter_idx(trans, iter)); + return ret; } -void bch2_trans_iter_free_on_commit(struct btree_trans *trans, - struct btree_iter *iter) +int bch2_trans_iter_free_on_commit(struct btree_trans *trans, + struct btree_iter *iter) { + int ret = (iter->flags & BTREE_ITER_ERROR) ? -EIO : 0; + trans->iters_unlink_on_commit |= 1ULL << btree_trans_iter_idx(trans, iter); + return ret; } static int btree_trans_realloc_iters(struct btree_trans *trans, @@ -1820,7 +1828,7 @@ got_slot: iter = &trans->iters[idx]; iter->id = iter_id; - bch2_btree_iter_init(iter, trans->c, btree_id, POS_MIN, flags); + bch2_btree_iter_init(iter, trans->c, btree_id, pos, flags); } else { iter = &trans->iters[idx]; @@ -1861,6 +1869,31 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, return iter; } +struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans, + enum btree_id btree_id, + struct bpos pos, + unsigned locks_want, + unsigned depth, + unsigned flags) +{ + struct btree_iter *iter = + __btree_trans_get_iter(trans, btree_id, pos, + flags|BTREE_ITER_NODES, 0); + unsigned i; + + BUG_ON(IS_ERR(iter)); + BUG_ON(bkey_cmp(iter->pos, pos)); + + iter->locks_want = locks_want; + iter->level = depth; + + for (i = 0; i < ARRAY_SIZE(iter->l); i++) + iter->l[i].b = NULL; + iter->l[iter->level].b = BTREE_ITER_NOT_END; + + return iter; +} + struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *src, u64 iter_id) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 04f747180bd8..267cecd05d84 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -150,20 +150,6 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *); void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos); void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos); -void __bch2_btree_iter_init(struct btree_iter *, struct bch_fs *, - enum btree_id, struct bpos, - unsigned , unsigned, unsigned); - -static inline void bch2_btree_iter_init(struct btree_iter *iter, - struct bch_fs *c, enum btree_id btree_id, - struct bpos pos, unsigned flags) -{ - __bch2_btree_iter_init(iter, c, btree_id, pos, - flags & BTREE_ITER_INTENT ? 1 : 0, 0, - (btree_id == BTREE_ID_EXTENTS - ? BTREE_ITER_IS_EXTENTS : 0)|flags); -} - void bch2_btree_iter_copy(struct btree_iter *, struct btree_iter *); static inline struct bpos btree_type_successor(enum btree_id id, @@ -221,17 +207,18 @@ static inline void bch2_btree_iter_cond_resched(struct btree_iter *iter) } } -#define __for_each_btree_node(_iter, _c, _btree_id, _start, \ +#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ _locks_want, _depth, _flags, _b) \ - for (__bch2_btree_iter_init((_iter), (_c), (_btree_id), _start, \ - _locks_want, _depth, \ - _flags|BTREE_ITER_NODES), \ + for (iter = bch2_trans_get_node_iter((_trans), (_btree_id), \ + _start, _locks_want, _depth, _flags), \ _b = bch2_btree_iter_peek_node(_iter); \ (_b); \ (_b) = bch2_btree_iter_next_node(_iter, _depth)) -#define for_each_btree_node(_iter, _c, _btree_id, _start, _flags, _b) \ - __for_each_btree_node(_iter, _c, _btree_id, _start, 0, 0, _flags, _b) +#define for_each_btree_node(_trans, _iter, _btree_id, _start, \ + _flags, _b) \ + __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + 0, 0, _flags, _b) static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, unsigned flags) @@ -251,9 +238,9 @@ static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter, : bch2_btree_iter_next(iter); } -#define for_each_btree_key(_iter, _c, _btree_id, _start, _flags, _k) \ - for (bch2_btree_iter_init((_iter), (_c), (_btree_id), \ - (_start), (_flags)), \ +#define for_each_btree_key(_trans, _iter, _btree_id, _start, _flags, _k)\ + for (iter = bch2_trans_get_iter((_trans), (_btree_id), \ + (_start), (_flags)), \ (_k) = __bch2_btree_iter_peek(_iter, _flags); \ !IS_ERR_OR_NULL((_k).k); \ (_k) = __bch2_btree_iter_next(_iter, _flags)) @@ -271,9 +258,9 @@ static inline int btree_iter_err(struct bkey_s_c k) /* new multiple iterator interface: */ void bch2_trans_preload_iters(struct btree_trans *); -void bch2_trans_iter_put(struct btree_trans *, struct btree_iter *); -void bch2_trans_iter_free(struct btree_trans *, struct btree_iter *); -void bch2_trans_iter_free_on_commit(struct btree_trans *, struct btree_iter *); +int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *); +int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *); +int bch2_trans_iter_free_on_commit(struct btree_trans *, struct btree_iter *); void bch2_trans_unlink_iters(struct btree_trans *, u64); @@ -308,6 +295,10 @@ bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *src) return __bch2_trans_copy_iter(trans, src, __btree_iter_id()); } +struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *, + enum btree_id, struct bpos, + unsigned, unsigned, unsigned); + void __bch2_trans_begin(struct btree_trans *); static inline void bch2_trans_begin_updates(struct btree_trans *trans) @@ -320,6 +311,16 @@ int bch2_trans_unlock(struct btree_trans *); void bch2_trans_init(struct btree_trans *, struct bch_fs *); int bch2_trans_exit(struct btree_trans *); +static inline void bch2_trans_cond_resched(struct btree_trans *trans) +{ + if (need_resched()) { + bch2_trans_unlock(trans); + schedule(); + } else if (race_fault()) { + bch2_trans_unlock(trans); + } +} + #ifdef TRACE_TRANSACTION_RESTARTS #define bch2_trans_begin(_trans) \ do { \ diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 1c9bfec922c5..45838db7b991 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -999,7 +999,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, if (ret) break; - bch2_btree_iter_cond_resched(iter); + bch2_trans_cond_resched(&trans); } bch2_trans_exit(&trans); diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 5ee38a6a442f..f7cfec9f00f9 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -303,8 +303,8 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, struct bch_ioctl_data_event e = { .type = BCH_DATA_EVENT_PROGRESS, .p.data_type = ctx->stats.data_type, - .p.btree_id = ctx->stats.iter.btree_id, - .p.pos = ctx->stats.iter.pos, + .p.btree_id = ctx->stats.btree_id, + .p.pos = ctx->stats.pos, .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen), .p.sectors_total = bch2_fs_usage_read_short(c).used, }; diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index f15c29878a9e..64e079280a9a 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -205,7 +205,8 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int err; @@ -220,8 +221,10 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, if (!i->size) return i->ret; - bch2_btree_iter_init(&iter, i->c, i->id, i->from, BTREE_ITER_PREFETCH); - k = bch2_btree_iter_peek(&iter); + bch2_trans_init(&trans, i->c); + + iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH); + k = bch2_btree_iter_peek(iter); while (k.k && !(err = btree_iter_err(k))) { bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k); @@ -230,8 +233,8 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, i->buf[i->bytes] = '\n'; i->bytes++; - k = bch2_btree_iter_next(&iter); - i->from = iter.pos; + k = bch2_btree_iter_next(iter); + i->from = iter->pos; err = flush_buf(i); if (err) @@ -240,7 +243,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, if (!i->size) break; } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return err < 0 ? err : i->ret; } @@ -256,7 +259,8 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct btree *b; int err; @@ -271,7 +275,9 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, if (!i->size || !bkey_cmp(POS_MAX, i->from)) return i->ret; - for_each_btree_node(&iter, i->c, i->id, i->from, 0, b) { + bch2_trans_init(&trans, i->c); + + for_each_btree_node(&trans, iter, i->id, i->from, 0, b) { bch2_btree_node_to_text(&PBUF(i->buf), i->c, b); i->bytes = strlen(i->buf); err = flush_buf(i); @@ -289,7 +295,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, if (!i->size) break; } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return err < 0 ? err : i->ret; } @@ -305,7 +311,8 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct btree *prev_node = NULL; int err; @@ -321,11 +328,13 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, if (!i->size) return i->ret; - bch2_btree_iter_init(&iter, i->c, i->id, i->from, BTREE_ITER_PREFETCH); + bch2_trans_init(&trans, i->c); + + iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH); - while ((k = bch2_btree_iter_peek(&iter)).k && + while ((k = bch2_btree_iter_peek(iter)).k && !(err = btree_iter_err(k))) { - struct btree_iter_level *l = &iter.l[0]; + struct btree_iter_level *l = &iter->l[0]; struct bkey_packed *_k = bch2_btree_node_iter_peek(&l->iter, l->b); @@ -344,8 +353,8 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, if (err) break; - bch2_btree_iter_next(&iter); - i->from = iter.pos; + bch2_btree_iter_next(iter); + i->from = iter->pos; err = flush_buf(i); if (err) @@ -354,7 +363,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, if (!i->size) break; } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return err < 0 ? err : i->ret; } diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index dc3883204d80..672a94936179 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -331,11 +331,15 @@ out: int bch2_empty_dir(struct bch_fs *c, u64 dir_inum) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret = 0; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(dir_inum, 0), 0, k) { + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, + POS(dir_inum, 0), 0, k) { if (k.k->p.inode > dir_inum) break; @@ -344,7 +348,7 @@ int bch2_empty_dir(struct bch_fs *c, u64 dir_inum) break; } } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return ret; } @@ -353,7 +357,8 @@ int bch2_readdir(struct bch_fs *c, struct file *file, struct dir_context *ctx) { struct bch_inode_info *inode = file_bch_inode(file); - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct bkey_s_c_dirent dirent; unsigned len; @@ -361,7 +366,9 @@ int bch2_readdir(struct bch_fs *c, struct file *file, if (!dir_emit_dots(file, ctx)) return 0; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(inode->v.i_ino, ctx->pos), 0, k) { if (k.k->type != KEY_TYPE_dirent) continue; @@ -387,7 +394,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file, ctx->pos = k.k->p.offset + 1; } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return 0; } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index a989ba172faa..c33bcffa7871 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -398,7 +398,8 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, /* recovery read path: */ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct ec_stripe_buf *buf; struct closure cl; struct bkey_s_c k; @@ -419,19 +420,21 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) if (!buf) return -ENOMEM; - bch2_btree_iter_init(&iter, c, BTREE_ID_EC, - POS(0, stripe_idx), - BTREE_ITER_SLOTS); - k = bch2_btree_iter_peek_slot(&iter); + bch2_trans_init(&trans, c); + + iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, + POS(0, stripe_idx), + BTREE_ITER_SLOTS); + k = bch2_btree_iter_peek_slot(iter); if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe) { __bcache_io_error(c, "error doing reconstruct read: stripe not found"); kfree(buf); - return bch2_btree_iter_unlock(&iter) ?: -EIO; + return bch2_trans_exit(&trans) ?: -EIO; } bkey_reassemble(&buf->key.k_i, k); - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); v = &buf->key.v; @@ -1238,7 +1241,8 @@ static void bch2_stripe_read_key(struct bch_fs *c, struct bkey_s_c k) int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list) { struct journal_replay *r; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret; @@ -1246,12 +1250,14 @@ int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list) if (ret) return ret; - for_each_btree_key(&iter, c, BTREE_ID_EC, POS_MIN, 0, k) { + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN, 0, k) { bch2_stripe_read_key(c, k); - bch2_btree_iter_cond_resched(&iter); + bch2_trans_cond_resched(&trans); } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_exit(&trans); if (ret) return ret; @@ -1269,17 +1275,20 @@ int bch2_stripes_read(struct bch_fs *c, struct list_head *journal_replay_list) int bch2_ec_mem_alloc(struct bch_fs *c, bool gc) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; size_t i, idx = 0; int ret = 0; - bch2_btree_iter_init(&iter, c, BTREE_ID_EC, POS(0, U64_MAX), 0); + bch2_trans_init(&trans, c); + + iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, U64_MAX), 0); - k = bch2_btree_iter_prev(&iter); + k = bch2_btree_iter_prev(iter); if (!IS_ERR_OR_NULL(k.k)) idx = k.k->p.offset + 1; - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_exit(&trans); if (ret) return ret; diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 194b8d6da1bb..ce46417b07a0 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1623,15 +1623,18 @@ static bool bch2_extent_merge_inline(struct bch_fs *c, bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, unsigned nr_replicas) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bpos end = pos; struct bkey_s_c k; bool ret = true; end.offset += size; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, pos, - BTREE_ITER_SLOTS, k) { + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos, + BTREE_ITER_SLOTS, k) { if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) break; @@ -1640,7 +1643,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, break; } } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return ret; } diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 251c811abeda..efc189c02db7 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -997,7 +997,8 @@ void bch2_readahead(struct readahead_control *ractl) struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_io_opts opts = io_opts(c, inode); - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct page *page; struct readpages_iter readpages_iter; int ret; @@ -1005,8 +1006,10 @@ void bch2_readahead(struct readahead_control *ractl) ret = readpages_iter_init(&readpages_iter, ractl); BUG_ON(ret); - bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, - BTREE_ITER_SLOTS); + bch2_trans_init(&trans, c); + + iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, + BTREE_ITER_SLOTS); bch2_pagecache_add_get(&inode->ei_pagecache_lock); @@ -1027,26 +1030,33 @@ void bch2_readahead(struct readahead_control *ractl) rbio->bio.bi_end_io = bch2_readpages_end_io; __bio_add_page(&rbio->bio, page, PAGE_SIZE, 0); - bchfs_read(c, &iter, rbio, inode->v.i_ino, &readpages_iter); + bchfs_read(c, iter, rbio, inode->v.i_ino, &readpages_iter); } bch2_pagecache_add_put(&inode->ei_pagecache_lock); + + bch2_trans_exit(&trans); kfree(readpages_iter.pages); } static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, u64 inum, struct page *page) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; page_state_init_for_read(page); rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; bio_add_page_contig(&rbio->bio, page); - bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, - BTREE_ITER_SLOTS); - bchfs_read(c, &iter, rbio, inum, NULL); + bch2_trans_init(&trans, c); + iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, + BTREE_ITER_SLOTS); + + bchfs_read(c, iter, rbio, inum, NULL); + + bch2_trans_exit(&trans); } static void bch2_read_single_page_end_io(struct bio *bio) @@ -2111,7 +2121,7 @@ static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode, if (ret) break; - bch2_btree_iter_cond_resched(iter); + bch2_trans_cond_resched(&trans); } bch2_trans_exit(&trans); @@ -2123,13 +2133,14 @@ static inline int range_has_data(struct bch_fs *c, struct bpos start, struct bpos end) { - - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret = 0; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, - start, 0, k) { + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, start, 0, k) { if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) break; @@ -2139,7 +2150,7 @@ static inline int range_has_data(struct bch_fs *c, } } - return bch2_btree_iter_unlock(&iter) ?: ret; + return bch2_trans_exit(&trans) ?: ret; } static int __bch2_truncate_page(struct bch_inode_info *inode, @@ -2464,7 +2475,7 @@ btree_iter_err: * pointers... which isn't a _super_ serious problem... */ - bch2_btree_iter_cond_resched(src); + bch2_trans_cond_resched(&trans); } bch2_trans_unlock(&trans); @@ -2709,7 +2720,8 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 isize, next_data = MAX_LFS_FILESIZE; int ret; @@ -2718,7 +2730,9 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) if (offset >= isize) return -ENXIO; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode->v.i_ino, offset >> 9), 0, k) { if (k.k->p.inode != inode->v.i_ino) { break; @@ -2729,7 +2743,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) break; } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_exit(&trans); if (ret) return ret; @@ -2779,7 +2793,8 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 isize, next_hole = MAX_LFS_FILESIZE; int ret; @@ -2788,7 +2803,9 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) if (offset >= isize) return -ENXIO; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode->v.i_ino, offset >> 9), BTREE_ITER_SLOTS, k) { if (k.k->p.inode != inode->v.i_ino) { @@ -2807,7 +2824,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) } } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_exit(&trans); if (ret) return ret; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 2a5a90b2a781..6e377a0e176f 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -157,7 +157,7 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, void *p) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_iter *iter = NULL; struct bkey_inode_buf *inode_p; int ret; @@ -1193,7 +1193,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, { struct bch_fs *c = vinode->i_sb->s_fs_info; struct bch_inode_info *ei = to_bch_ei(vinode); - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; BKEY_PADDED(k) tmp; bool have_extent = false; @@ -1206,7 +1207,9 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (start + len < start) return -EINVAL; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(ei->v.i_ino, start >> 9), 0, k) if (bkey_extent_is_data(k.k) || k.k->type == KEY_TYPE_reservation) { @@ -1227,7 +1230,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (have_extent) ret = bch2_fill_extent(info, &tmp.k, FIEMAP_EXTENT_LAST); out: - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return ret < 0 ? ret : 0; } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 439f758d8178..41284d38db2f 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -16,6 +16,23 @@ #define QSTR(n) { { { .len = strlen(n) } }, .name = n } +static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) +{ + struct btree_iter *iter; + struct bkey_s_c k; + u64 sectors = 0; + + for_each_btree_key(trans, iter, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) { + if (k.k->p.inode != inum) + break; + + if (bkey_extent_is_allocation(k.k)) + sectors += k.k->size; + } + + return bch2_trans_iter_free(trans, iter) ?: sectors; +} + static int remove_dirent(struct bch_fs *c, struct btree_iter *iter, struct bkey_s_c_dirent dirent) { @@ -181,44 +198,32 @@ err: return ret; } -/* fsck hasn't been converted to new transactions yet: */ -static int fsck_hash_delete_at(const struct bch_hash_desc desc, +static int fsck_hash_delete_at(struct btree_trans *trans, + const struct bch_hash_desc desc, struct bch_hash_info *info, - struct btree_iter *orig_iter) + struct btree_iter *iter) { - struct btree_trans trans; - struct btree_iter *iter; int ret; - - bch2_btree_iter_unlock(orig_iter); - - bch2_trans_init(&trans, orig_iter->c); retry: - bch2_trans_begin(&trans); - - iter = bch2_trans_copy_iter(&trans, orig_iter); - if (IS_ERR(iter)) { - ret = PTR_ERR(iter); - goto err; - } - - ret = bch2_hash_delete_at(&trans, desc, info, iter) ?: - bch2_trans_commit(&trans, NULL, NULL, + ret = bch2_hash_delete_at(trans, desc, info, iter) ?: + bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); -err: - if (ret == -EINTR) - goto retry; + if (ret == -EINTR) { + ret = bch2_btree_iter_traverse(iter); + if (!ret) + goto retry; + } - bch2_trans_exit(&trans); return ret; } -static int hash_check_duplicates(const struct bch_hash_desc desc, - struct hash_check *h, struct bch_fs *c, - struct btree_iter *k_iter, struct bkey_s_c k) +static int hash_check_duplicates(struct btree_trans *trans, + const struct bch_hash_desc desc, struct hash_check *h, + struct btree_iter *k_iter, struct bkey_s_c k) { + struct bch_fs *c = trans->c; struct btree_iter *iter; struct bkey_s_c k2; char buf[200]; @@ -239,7 +244,7 @@ static int hash_check_duplicates(const struct bch_hash_desc desc, "duplicate hash table keys:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) { - ret = fsck_hash_delete_at(desc, &h->info, k_iter); + ret = fsck_hash_delete_at(trans, desc, &h->info, k_iter); if (ret) return ret; ret = 1; @@ -274,9 +279,9 @@ static bool key_has_correct_hash(const struct bch_hash_desc desc, hash <= k.k->p.offset; } -static int hash_check_key(const struct bch_hash_desc desc, - struct btree_trans *trans, struct hash_check *h, - struct btree_iter *k_iter, struct bkey_s_c k) +static int hash_check_key(struct btree_trans *trans, + const struct bch_hash_desc desc, struct hash_check *h, + struct btree_iter *k_iter, struct bkey_s_c k) { struct bch_fs *c = trans->c; char buf[200]; @@ -312,7 +317,7 @@ static int hash_check_key(const struct bch_hash_desc desc, return 1; } - ret = hash_check_duplicates(desc, h, c, k_iter, k); + ret = hash_check_duplicates(trans, desc, h, k_iter, k); fsck_err: return ret; } @@ -417,14 +422,17 @@ noinline_for_stack static int check_extents(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 i_sectors; int ret = 0; + bch2_trans_init(&trans, c); + bch_verbose(c, "checking extents"); - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(BCACHEFS_ROOT_INO, 0), 0, k) { ret = walk_inode(c, &w, k.k->p.inode); if (ret) @@ -437,7 +445,7 @@ static int check_extents(struct bch_fs *c) !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c, "extent type %u for non regular file, inode %llu mode %o", k.k->type, k.k->p.inode, w.inode.bi_mode)) { - bch2_btree_iter_unlock(&iter); + bch2_trans_unlock(&trans); ret = bch2_inode_truncate(c, k.k->p.inode, 0); if (ret) @@ -449,14 +457,14 @@ static int check_extents(struct bch_fs *c) w.have_inode && !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) && w.inode.bi_sectors != - (i_sectors = bch2_count_inode_sectors(c, w.cur_inum)), + (i_sectors = bch2_count_inode_sectors(&trans, w.cur_inum)), c, "i_sectors wrong: got %llu, should be %llu", w.inode.bi_sectors, i_sectors)) { struct bkey_inode_buf p; w.inode.bi_sectors = i_sectors; - bch2_btree_iter_unlock(&iter); + bch2_trans_unlock(&trans); bch2_inode_pack(&p, &w.inode); @@ -470,7 +478,7 @@ static int check_extents(struct bch_fs *c) } /* revalidate iterator: */ - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(iter); } if (fsck_err_on(w.have_inode && @@ -479,7 +487,7 @@ static int check_extents(struct bch_fs *c) k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c, "extent type %u offset %llu past end of inode %llu, i_size %llu", k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) { - bch2_btree_iter_unlock(&iter); + bch2_trans_unlock(&trans); ret = bch2_inode_truncate(c, k.k->p.inode, w.inode.bi_size); @@ -490,7 +498,7 @@ static int check_extents(struct bch_fs *c) } err: fsck_err: - return bch2_btree_iter_unlock(&iter) ?: ret; + return bch2_trans_exit(&trans) ?: ret; } /* @@ -688,7 +696,8 @@ static int check_xattrs(struct bch_fs *c) if (w.first_this_inode && w.have_inode) hash_check_set_inode(&h, c, &w.inode); - ret = hash_check_key(bch2_xattr_hash_desc, &trans, &h, iter, k); + ret = hash_check_key(&trans, bch2_xattr_hash_desc, + &h, iter, k); if (ret) goto fsck_err; } @@ -863,13 +872,16 @@ static int check_directory_structure(struct bch_fs *c, struct inode_bitmap dirs_done = { NULL, 0 }; struct pathbuf path = { 0, 0, NULL }; struct pathbuf_entry *e; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct bkey_s_c_dirent dirent; bool had_unreachable; u64 d_inum; int ret = 0; + bch2_trans_init(&trans, c); + bch_verbose(c, "checking directory structure"); /* DFS: */ @@ -894,7 +906,7 @@ next: if (e->offset == U64_MAX) goto up; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(e->inum, e->offset + 1), 0, k) { if (k.k->p.inode != e->inum) break; @@ -914,7 +926,7 @@ next: if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c, "directory %llu has multiple hardlinks", d_inum)) { - ret = remove_dirent(c, &iter, dirent); + ret = remove_dirent(c, iter, dirent); if (ret) goto err; continue; @@ -931,10 +943,14 @@ next: goto err; } - bch2_btree_iter_unlock(&iter); + ret = bch2_trans_iter_free(&trans, iter); + if (ret) { + bch_err(c, "btree error %i in fsck", ret); + goto err; + } goto next; } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_iter_free(&trans, iter); if (ret) { bch_err(c, "btree error %i in fsck", ret); goto err; @@ -943,7 +959,7 @@ up: path.nr--; } - for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k) { if (k.k->type != KEY_TYPE_inode) continue; @@ -956,7 +972,7 @@ up: if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c, "unreachable directory found (inum %llu)", k.k->p.inode)) { - bch2_btree_iter_unlock(&iter); + bch2_btree_iter_unlock(iter); ret = reattach_inode(c, lostfound_inode, k.k->p.inode); if (ret) { @@ -966,7 +982,7 @@ up: had_unreachable = true; } } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_iter_free(&trans, iter); if (ret) goto err; @@ -985,7 +1001,7 @@ out: return ret; err: fsck_err: - ret = bch2_btree_iter_unlock(&iter) ?: ret; + ret = bch2_trans_exit(&trans) ?: ret; goto out; } @@ -1022,15 +1038,18 @@ noinline_for_stack static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, u64 range_start, u64 *range_end) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct bkey_s_c_dirent d; u64 d_inum; int ret; + bch2_trans_init(&trans, c); + inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false); - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k) { switch (k.k->type) { case KEY_TYPE_dirent: d = bkey_s_c_to_dirent(k); @@ -1046,32 +1065,15 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, break; } - bch2_btree_iter_cond_resched(&iter); + bch2_trans_cond_resched(&trans); } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_trans_exit(&trans); if (ret) bch_err(c, "error in fs gc: btree error %i while walking dirents", ret); return ret; } -s64 bch2_count_inode_sectors(struct bch_fs *c, u64 inum) -{ - struct btree_iter iter; - struct bkey_s_c k; - u64 sectors = 0; - - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) { - if (k.k->p.inode != inum) - break; - - if (bkey_extent_is_allocation(k.k)) - sectors += k.k->size; - } - - return bch2_btree_iter_unlock(&iter) ?: sectors; -} - static int check_inode_nlink(struct bch_fs *c, struct bch_inode_unpacked *lostfound_inode, struct bch_inode_unpacked *u, @@ -1253,7 +1255,7 @@ static int check_inode(struct btree_trans *trans, bch_verbose(c, "recounting sectors for inode %llu", u.bi_inum); - sectors = bch2_count_inode_sectors(c, u.bi_inum); + sectors = bch2_count_inode_sectors(trans, u.bi_inum); if (sectors < 0) { bch_err(c, "error in fs gc: error %i " "recounting inode sectors", @@ -1346,7 +1348,7 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); genradix_iter_advance(&nlinks_iter, links); bch2_btree_iter_next(iter); - bch2_btree_iter_cond_resched(iter); + bch2_trans_cond_resched(&trans); } fsck_err: bch2_trans_exit(&trans); diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h index 88da06762d7d..97460452e842 100644 --- a/fs/bcachefs/fsck.h +++ b/fs/bcachefs/fsck.h @@ -2,7 +2,6 @@ #ifndef _BCACHEFS_FSCK_H #define _BCACHEFS_FSCK_H -s64 bch2_count_inode_sectors(struct bch_fs *, u64); int bch2_fsck(struct bch_fs *); #endif /* _BCACHEFS_FSCK_H */ diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 811c917cba84..c6336e7a2a23 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -447,13 +447,15 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, struct bch_inode_unpacked *inode) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret = -ENOENT; - for_each_btree_key(&iter, c, BTREE_ID_INODES, - POS(inode_nr, 0), - BTREE_ITER_SLOTS, k) { + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_INODES, + POS(inode_nr, 0), BTREE_ITER_SLOTS, k) { switch (k.k->type) { case KEY_TYPE_inode: ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode); @@ -466,7 +468,7 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, break; } - return bch2_btree_iter_unlock(&iter) ?: ret; + return bch2_trans_exit(&trans) ?: ret; } #ifdef CONFIG_BCACHEFS_DEBUG diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index f4c49bf82456..62ee09121036 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -1263,27 +1263,28 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio struct bch_io_failures *failed, unsigned flags) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; BKEY_PADDED(k) tmp; struct bkey_s_c k; int ret; flags &= ~BCH_READ_LAST_FRAGMENT; - bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, - rbio->pos, BTREE_ITER_SLOTS); + bch2_trans_init(&trans, c); + + iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + rbio->pos, BTREE_ITER_SLOTS); retry: rbio->bio.bi_status = 0; - k = bch2_btree_iter_peek_slot(&iter); - if (btree_iter_err(k)) { - bch2_btree_iter_unlock(&iter); + k = bch2_btree_iter_peek_slot(iter); + if (btree_iter_err(k)) goto err; - } bkey_reassemble(&tmp.k, k); k = bkey_i_to_s_c(&tmp.k); - bch2_btree_iter_unlock(&iter); + bch2_trans_unlock(&trans); if (!bkey_extent_is_data(k.k) || !bch2_extent_matches_ptr(c, bkey_i_to_s_c_extent(&tmp.k), @@ -1300,25 +1301,30 @@ retry: goto retry; if (ret) goto err; - goto out; -err: - rbio->bio.bi_status = BLK_STS_IOERR; out: bch2_rbio_done(rbio); + bch2_trans_exit(&trans); + return; +err: + rbio->bio.bi_status = BLK_STS_IOERR; + goto out; } static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, struct bvec_iter bvec_iter, u64 inode, struct bch_io_failures *failed, unsigned flags) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret; + bch2_trans_init(&trans, c); + flags &= ~BCH_READ_LAST_FRAGMENT; flags |= BCH_READ_MUST_CLONE; retry: - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode, bvec_iter.bi_sector), BTREE_ITER_SLOTS, k) { BKEY_PADDED(k) tmp; @@ -1326,7 +1332,7 @@ retry: bkey_reassemble(&tmp.k, k); k = bkey_i_to_s_c(&tmp.k); - bch2_btree_iter_unlock(&iter); + bch2_btree_iter_unlock(iter); bytes = min_t(unsigned, bvec_iter.bi_size, (k.k->p.offset - bvec_iter.bi_sector) << 9); @@ -1351,12 +1357,12 @@ retry: * If we get here, it better have been because there was an error * reading a btree node */ - ret = bch2_btree_iter_unlock(&iter); - BUG_ON(!ret); - __bcache_io_error(c, "btree IO error %i", ret); + BUG_ON(!(iter->flags & BTREE_ITER_ERROR)); + __bcache_io_error(c, "btree IO error"); err: rbio->bio.bi_status = BLK_STS_IOERR; out: + bch2_trans_exit(&trans); bch2_rbio_done(rbio); } @@ -1859,12 +1865,14 @@ out_read_done: void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; unsigned flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE| BCH_READ_USER_MAPPED; - int ret; + + bch2_trans_init(&trans, c); BUG_ON(rbio->_state); BUG_ON(flags & BCH_READ_NODECODE); @@ -1873,7 +1881,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) rbio->c = c; rbio->start_time = local_clock(); - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode, rbio->bio.bi_iter.bi_sector), BTREE_ITER_SLOTS, k) { BKEY_PADDED(k) tmp; @@ -1885,7 +1893,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) */ bkey_reassemble(&tmp.k, k); k = bkey_i_to_s_c(&tmp.k); - bch2_btree_iter_unlock(&iter); + bch2_btree_iter_unlock(iter); bytes = min_t(unsigned, rbio->bio.bi_iter.bi_size, (k.k->p.offset - rbio->bio.bi_iter.bi_sector) << 9); @@ -1907,9 +1915,10 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) * If we get here, it better have been because there was an error * reading a btree node */ - ret = bch2_btree_iter_unlock(&iter); - BUG_ON(!ret); - bcache_io_error(c, &rbio->bio, "btree IO error %i", ret); + BUG_ON(!(iter->flags & BTREE_ITER_ERROR)); + bcache_io_error(c, &rbio->bio, "btree IO error"); + + bch2_trans_exit(&trans); bch2_rbio_done(rbio); } diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c index c26f36d58633..45c8d38d12de 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -62,9 +62,12 @@ static void journal_seq_blacklist_flush(struct journal *j, closure_init_stack(&cl); for (i = 0;; i++) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct btree *b; + bch2_trans_init(&trans, c); + mutex_lock(&j->blacklist_lock); if (i >= bl->nr_entries) { mutex_unlock(&j->blacklist_lock); @@ -73,17 +76,17 @@ static void journal_seq_blacklist_flush(struct journal *j, n = bl->entries[i]; mutex_unlock(&j->blacklist_lock); - __bch2_btree_iter_init(&iter, c, n.btree_id, n.pos, - 0, 0, BTREE_ITER_NODES); + iter = bch2_trans_get_node_iter(&trans, n.btree_id, n.pos, + 0, 0, 0); - b = bch2_btree_iter_peek_node(&iter); + b = bch2_btree_iter_peek_node(iter); /* The node might have already been rewritten: */ if (b->data->keys.seq == n.seq) { - ret = bch2_btree_node_rewrite(c, &iter, n.seq, 0); + ret = bch2_btree_node_rewrite(c, iter, n.seq, 0); if (ret) { - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); bch2_fs_fatal_error(c, "error %i rewriting btree node with blacklisted journal seq", ret); @@ -92,7 +95,7 @@ static void journal_seq_blacklist_flush(struct journal *j, } } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); } for (i = 0;; i++) { diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 38bf75b6bc2d..2b63b07db2bc 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -106,7 +106,8 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct closure cl; struct btree *b; unsigned id; @@ -116,13 +117,15 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) if (flags & BCH_FORCE_IF_METADATA_LOST) return -EINVAL; + bch2_trans_init(&trans, c); closure_init_stack(&cl); mutex_lock(&c->replicas_gc_lock); bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE); for (id = 0; id < BTREE_ID_NR; id++) { - for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { + for_each_btree_node(&trans, iter, id, POS_MIN, + BTREE_ITER_PREFETCH, b) { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; struct bkey_i_btree_ptr *new_key; retry: @@ -134,7 +137,7 @@ retry: * but got -EINTR after upgrading the iter, but * then raced and the node is now gone: */ - bch2_btree_iter_downgrade(&iter); + bch2_btree_iter_downgrade(iter); ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); if (ret) @@ -148,16 +151,16 @@ retry: if (ret) goto err; - ret = bch2_btree_node_update_key(c, &iter, b, new_key); + ret = bch2_btree_node_update_key(c, iter, b, new_key); if (ret == -EINTR) { - b = bch2_btree_iter_peek_node(&iter); + b = bch2_btree_iter_peek_node(iter); goto retry; } if (ret) goto err; } } - bch2_btree_iter_unlock(&iter); + bch2_trans_iter_free(&trans, iter); } /* flush relevant btree updates */ @@ -171,14 +174,13 @@ retry: } ret = 0; -out: +err: + bch2_trans_exit(&trans); + ret = bch2_replicas_gc_end(c, ret); mutex_unlock(&c->replicas_gc_lock); return ret; -err: - bch2_btree_iter_unlock(&iter); - goto out; } int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 8c453ae31525..3f3e34e07f35 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -485,6 +485,8 @@ int bch2_move_data(struct bch_fs *c, struct moving_context ctxt = { .stats = stats }; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); BKEY_PADDED(k) tmp; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; struct data_opts data_opts; enum data_cmd data_cmd; @@ -495,9 +497,14 @@ int bch2_move_data(struct bch_fs *c, INIT_LIST_HEAD(&ctxt.reads); init_waitqueue_head(&ctxt.wait); + bch2_trans_init(&trans, c); + stats->data_type = BCH_DATA_USER; - bch2_btree_iter_init(&stats->iter, c, BTREE_ID_EXTENTS, start, - BTREE_ITER_PREFETCH); + stats->btree_id = BTREE_ID_EXTENTS; + stats->pos = POS_MIN; + + iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, start, + BTREE_ITER_PREFETCH); if (rate) bch2_ratelimit_reset(rate); @@ -507,7 +514,7 @@ int bch2_move_data(struct bch_fs *c, delay = rate ? bch2_ratelimit_delay(rate) : 0; if (delay) { - bch2_btree_iter_unlock(&stats->iter); + bch2_trans_unlock(&trans); set_current_state(TASK_INTERRUPTIBLE); } @@ -520,13 +527,16 @@ int bch2_move_data(struct bch_fs *c, schedule_timeout(delay); if (unlikely(freezing(current))) { - bch2_btree_iter_unlock(&stats->iter); + bch2_trans_unlock(&trans); move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads)); try_to_freeze(); } } while (delay); peek: - k = bch2_btree_iter_peek(&stats->iter); + k = bch2_btree_iter_peek(iter); + + stats->pos = iter->pos; + if (!k.k) break; ret = btree_iter_err(k); @@ -542,7 +552,7 @@ peek: struct bch_inode_unpacked inode; /* don't hold btree locks while looking up inode: */ - bch2_btree_iter_unlock(&stats->iter); + bch2_trans_unlock(&trans); io_opts = bch2_opts_to_inode_opts(c->opts); if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode)) @@ -567,7 +577,7 @@ peek: /* unlock before doing IO: */ bkey_reassemble(&tmp.k, k); k = bkey_i_to_s_c(&tmp.k); - bch2_btree_iter_unlock(&stats->iter); + bch2_trans_unlock(&trans); ret2 = bch2_move_extent(c, &ctxt, wp, io_opts, bkey_s_c_to_extent(k), @@ -589,11 +599,11 @@ next: atomic64_add(k.k->size * bch2_bkey_nr_dirty_ptrs(k), &stats->sectors_seen); next_nondata: - bch2_btree_iter_next(&stats->iter); - bch2_btree_iter_cond_resched(&stats->iter); + bch2_btree_iter_next(iter); + bch2_trans_cond_resched(&trans); } out: - bch2_btree_iter_unlock(&stats->iter); + bch2_trans_exit(&trans); move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads)); closure_sync(&ctxt.cl); @@ -609,20 +619,23 @@ out: static int bch2_gc_data_replicas(struct bch_fs *c) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret; + bch2_trans_init(&trans, c); + mutex_lock(&c->replicas_gc_lock); bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED)); - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_PREFETCH, k) { ret = bch2_mark_bkey_replicas(c, k); if (ret) break; } - ret = bch2_btree_iter_unlock(&iter) ?: ret; + ret = bch2_trans_exit(&trans) ?: ret; bch2_replicas_gc_end(c, ret); mutex_unlock(&c->replicas_gc_lock); @@ -632,24 +645,30 @@ static int bch2_gc_data_replicas(struct bch_fs *c) static int bch2_gc_btree_replicas(struct bch_fs *c) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct btree *b; unsigned id; int ret = 0; + bch2_trans_init(&trans, c); + mutex_lock(&c->replicas_gc_lock); bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE); for (id = 0; id < BTREE_ID_NR; id++) { - for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { + for_each_btree_node(&trans, iter, id, POS_MIN, + BTREE_ITER_PREFETCH, b) { ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); - bch2_btree_iter_cond_resched(&iter); + bch2_trans_cond_resched(&trans); } - ret = bch2_btree_iter_unlock(&iter) ?: ret; + ret = bch2_trans_iter_free(&trans, iter) ?: ret; } + bch2_trans_exit(&trans); + bch2_replicas_gc_end(c, ret); mutex_unlock(&c->replicas_gc_lock); @@ -662,16 +681,25 @@ static int bch2_move_btree(struct bch_fs *c, struct bch_move_stats *stats) { struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); + struct btree_trans trans; + struct btree_iter *iter; struct btree *b; unsigned id; struct data_opts data_opts; enum data_cmd cmd; int ret = 0; + bch2_trans_init(&trans, c); + stats->data_type = BCH_DATA_BTREE; for (id = 0; id < BTREE_ID_NR; id++) { - for_each_btree_node(&stats->iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { + stats->btree_id = id; + + for_each_btree_node(&trans, iter, id, POS_MIN, + BTREE_ITER_PREFETCH, b) { + stats->pos = iter->pos; + switch ((cmd = pred(c, arg, bkey_i_to_s_c(&b->key), &io_opts, &data_opts))) { @@ -686,15 +714,17 @@ static int bch2_move_btree(struct bch_fs *c, BUG(); } - ret = bch2_btree_node_rewrite(c, &stats->iter, + ret = bch2_btree_node_rewrite(c, iter, b->data->keys.seq, 0) ?: ret; next: - bch2_btree_iter_cond_resched(&stats->iter); + bch2_trans_cond_resched(&trans); } - ret = bch2_btree_iter_unlock(&stats->iter) ?: ret; + ret = bch2_trans_iter_free(&trans, iter) ?: ret; } + bch2_trans_exit(&trans); + return ret; } diff --git a/fs/bcachefs/move_types.h b/fs/bcachefs/move_types.h index 8dbeb6ef727c..6788170d3f95 100644 --- a/fs/bcachefs/move_types.h +++ b/fs/bcachefs/move_types.h @@ -4,7 +4,8 @@ struct bch_move_stats { enum bch_data_type data_type; - struct btree_iter iter; + enum btree_id btree_id; + struct bpos pos; atomic64_t keys_moved; atomic64_t sectors_moved; diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 492ab73c39e7..f5dd13e92200 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -356,11 +356,14 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k) static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; int ret = 0; - for_each_btree_key(&iter, c, BTREE_ID_QUOTAS, POS(type, 0), + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_QUOTAS, POS(type, 0), BTREE_ITER_PREFETCH, k) { if (k.k->p.inode != type) break; @@ -370,7 +373,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) break; } - return bch2_btree_iter_unlock(&iter) ?: ret; + return bch2_trans_exit(&trans) ?: ret; } void bch2_fs_quota_exit(struct bch_fs *c) @@ -414,7 +417,8 @@ int bch2_fs_quota_read(struct bch_fs *c) { unsigned i, qtypes = enabled_qtypes(c); struct bch_memquota_type *q; - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bch_inode_unpacked u; struct bkey_s_c k; int ret; @@ -429,7 +433,9 @@ int bch2_fs_quota_read(struct bch_fs *c) return ret; } - for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, BTREE_ITER_PREFETCH, k) { switch (k.k->type) { case KEY_TYPE_inode: @@ -443,7 +449,7 @@ int bch2_fs_quota_read(struct bch_fs *c) KEY_TYPE_QUOTA_NOCHECK); } } - return bch2_btree_iter_unlock(&iter) ?: ret; + return bch2_trans_exit(&trans) ?: ret; } /* Enable/disable/delete quotas for an entire filesystem: */ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index cc1a7deb90bc..fe4a9af92a76 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -289,8 +289,8 @@ ssize_t bch2_rebalance_work_show(struct bch_fs *c, char *buf) case REBALANCE_RUNNING: pr_buf(&out, "running\n"); pr_buf(&out, "pos %llu:%llu\n", - r->move_stats.iter.pos.inode, - r->move_stats.iter.pos.offset); + r->move_stats.pos.inode, + r->move_stats.pos.offset); break; } diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index ffa7af0820ea..0ed28d7f074d 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -203,13 +203,16 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans, for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) { if (k.k->type != desc.key_type && k.k->type != KEY_TYPE_whiteout) - return false; + break; if (k.k->type == desc.key_type && - desc.hash_bkey(info, k) <= start->pos.offset) - return true; + desc.hash_bkey(info, k) <= start->pos.offset) { + bch2_trans_iter_free_on_commit(trans, iter); + return 1; + } } - return btree_iter_err(k); + + return bch2_trans_iter_free(trans, iter); } static __always_inline @@ -220,6 +223,8 @@ int bch2_hash_set(struct btree_trans *trans, { struct btree_iter *iter, *slot = NULL; struct bkey_s_c k; + bool found = false; + int ret = 0; iter = bch2_trans_get_iter(trans, desc.btree_id, POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))), @@ -250,21 +255,30 @@ int bch2_hash_set(struct btree_trans *trans, goto not_found; } - return btree_iter_err(k) ?: -ENOSPC; -not_found: - if (flags & BCH_HASH_SET_MUST_REPLACE) - return -ENOENT; + if (slot) + bch2_trans_iter_free(trans, iter); - insert->k.p = slot->pos; - bch2_trans_update(trans, BTREE_INSERT_ENTRY(slot, insert)); - return 0; + return bch2_trans_iter_free(trans, iter) ?: -ENOSPC; found: - if (flags & BCH_HASH_SET_MUST_CREATE) - return -EEXIST; + found = true; +not_found: - insert->k.p = iter->pos; - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, insert)); - return 0; + if (!found && (flags & BCH_HASH_SET_MUST_REPLACE)) { + ret = -ENOENT; + } else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) { + ret = -EEXIST; + } else { + if (!found && slot) { + bch2_trans_iter_free(trans, iter); + iter = slot; + } + + insert->k.p = iter->pos; + bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, insert)); + bch2_trans_iter_free_on_commit(trans, iter); + } + + return ret; } static __always_inline diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index f1e269671374..1354dd33874c 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -282,7 +282,8 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf) static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0, nr_compressed_extents = 0, @@ -292,7 +293,9 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) if (!test_bit(BCH_FS_STARTED, &c->flags)) return -EPERM; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k) + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k) if (k.k->type == KEY_TYPE_extent) { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const union bch_extent_entry *entry; @@ -314,7 +317,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) break; } } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); return scnprintf(buf, PAGE_SIZE, "uncompressed data:\n" diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 652e22125dcf..c8682fe674f6 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -89,11 +89,14 @@ static void test_delete_written(struct bch_fs *c, u64 nr) static void test_iterate(struct bch_fs *c, u64 nr) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 i; int ret; + bch2_trans_init(&trans, c); + delete_test_keys(c); pr_info("inserting test keys"); @@ -113,28 +116,31 @@ static void test_iterate(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), 0, k) + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0), 0, k) BUG_ON(k.k->p.offset != i++); - bch2_btree_iter_unlock(&iter); BUG_ON(i != nr); pr_info("iterating backwards"); - while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) + while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) BUG_ON(k.k->p.offset != --i); - bch2_btree_iter_unlock(&iter); BUG_ON(i); + + bch2_trans_exit(&trans); } static void test_iterate_extents(struct bch_fs *c, u64 nr) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 i; int ret; + bch2_trans_init(&trans, c); + delete_test_keys(c); pr_info("inserting test extents"); @@ -155,32 +161,35 @@ static void test_iterate_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0), 0, k) { BUG_ON(bkey_start_offset(k.k) != i); i = k.k->p.offset; } - bch2_btree_iter_unlock(&iter); BUG_ON(i != nr); pr_info("iterating backwards"); - while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) { + while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) { BUG_ON(k.k->p.offset != i); i = bkey_start_offset(k.k); } - bch2_btree_iter_unlock(&iter); BUG_ON(i); + + bch2_trans_exit(&trans); } static void test_iterate_slots(struct bch_fs *c, u64 nr) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 i; int ret; + bch2_trans_init(&trans, c); + delete_test_keys(c); pr_info("inserting test keys"); @@ -200,11 +209,11 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0), 0, k) { BUG_ON(k.k->p.offset != i); i += 2; } - bch2_btree_iter_unlock(&iter); + bch2_trans_iter_free(&trans, iter); BUG_ON(i != nr * 2); @@ -212,7 +221,7 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(0, 0), + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0), BTREE_ITER_SLOTS, k) { BUG_ON(bkey_deleted(k.k) != (i & 1)); BUG_ON(k.k->p.offset != i++); @@ -220,16 +229,20 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr) if (i == nr * 2) break; } - bch2_btree_iter_unlock(&iter); + + bch2_trans_exit(&trans); } static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 i; int ret; + bch2_trans_init(&trans, c); + delete_test_keys(c); pr_info("inserting test keys"); @@ -250,12 +263,12 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0), 0, k) { BUG_ON(bkey_start_offset(k.k) != i + 8); BUG_ON(k.k->size != 8); i += 16; } - bch2_btree_iter_unlock(&iter); + bch2_trans_iter_free(&trans, iter); BUG_ON(i != nr); @@ -263,7 +276,7 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(0, 0), + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0), BTREE_ITER_SLOTS, k) { BUG_ON(bkey_deleted(k.k) != !(i % 16)); @@ -274,7 +287,8 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) if (i == nr) break; } - bch2_btree_iter_unlock(&iter); + + bch2_trans_exit(&trans); } /* @@ -283,34 +297,40 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) */ static void test_peek_end(struct bch_fs *c, u64 nr) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; - bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0); + bch2_trans_init(&trans, c); + + iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS_MIN, 0); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(iter); BUG_ON(k.k); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(iter); BUG_ON(k.k); - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); } static void test_peek_end_extents(struct bch_fs *c, u64 nr) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; - bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0); + bch2_trans_init(&trans, c); + + iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, 0); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(iter); BUG_ON(k.k); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(iter); BUG_ON(k.k); - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); } /* extent unit tests */ @@ -401,32 +421,35 @@ static void rand_insert(struct bch_fs *c, u64 nr) static void rand_lookup(struct bch_fs *c, u64 nr) { + struct btree_trans trans; + struct btree_iter *iter; + struct bkey_s_c k; u64 i; - for (i = 0; i < nr; i++) { - struct btree_iter iter; - struct bkey_s_c k; + bch2_trans_init(&trans, c); - bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, - POS(0, test_rand()), 0); + for (i = 0; i < nr; i++) { + iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, + POS(0, test_rand()), 0); - k = bch2_btree_iter_peek(&iter); - bch2_btree_iter_unlock(&iter); + k = bch2_btree_iter_peek(iter); + bch2_trans_iter_free(&trans, iter); } + + bch2_trans_exit(&trans); } static void rand_mixed(struct bch_fs *c, u64 nr) { + struct btree_trans trans; + struct btree_iter *iter; + struct bkey_s_c k; int ret; u64 i; - for (i = 0; i < nr; i++) { - struct btree_trans trans; - struct btree_iter *iter; - struct bkey_s_c k; - - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c); + for (i = 0; i < nr; i++) { iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS(0, test_rand()), 0); @@ -443,9 +466,10 @@ static void rand_mixed(struct bch_fs *c, u64 nr) BUG_ON(ret); } - bch2_trans_exit(&trans); + bch2_trans_iter_free(&trans, iter); } + bch2_trans_exit(&trans); } static void rand_delete(struct bch_fs *c, u64 nr) @@ -495,12 +519,15 @@ static void seq_insert(struct bch_fs *c, u64 nr) static void seq_lookup(struct bch_fs *c, u64 nr) { - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k) ; - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); } static void seq_overwrite(struct bch_fs *c, u64 nr) diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 545e743972fb..68ece7c0ee7a 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -271,12 +271,16 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) { struct bch_fs *c = dentry->d_sb->s_fs_info; struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); - struct btree_iter iter; + struct btree_trans trans; + struct btree_iter *iter; struct bkey_s_c k; u64 inum = dentry->d_inode->i_ino; ssize_t ret = 0; - for_each_btree_key(&iter, c, BTREE_ID_XATTRS, POS(inum, 0), 0, k) { + bch2_trans_init(&trans, c); + + for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, + POS(inum, 0), 0, k) { BUG_ON(k.k->p.inode < inum); if (k.k->p.inode > inum) @@ -290,7 +294,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (ret < 0) break; } - bch2_btree_iter_unlock(&iter); + bch2_trans_exit(&trans); if (ret < 0) return ret; -- cgit v1.2.3 From 94f651e2c7e2808e82673b46776f951a67da4a2d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 17 Apr 2019 15:49:28 -0400 Subject: bcachefs: Return errors from for_each_btree_key() Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 10 ++--- fs/bcachefs/btree_gc.c | 4 +- fs/bcachefs/btree_iter.h | 16 +++++--- fs/bcachefs/buckets.c | 41 ++++++++++--------- fs/bcachefs/buckets.h | 10 ++--- fs/bcachefs/dirent.c | 17 ++++---- fs/bcachefs/ec.c | 19 ++++----- fs/bcachefs/extents.c | 3 +- fs/bcachefs/fs-io.c | 10 ++--- fs/bcachefs/fs.c | 10 ++--- fs/bcachefs/fsck.c | 28 ++++++------- fs/bcachefs/io.c | 13 +++--- fs/bcachefs/move.c | 2 +- fs/bcachefs/quota.c | 4 +- fs/bcachefs/str_hash.h | 43 ++++++++----------- fs/bcachefs/sysfs.c | 8 +++- fs/bcachefs/tests.c | 35 ++++++++-------- fs/bcachefs/xattr.c | 93 ++++++++++++++++++++---------------------- 18 files changed, 182 insertions(+), 184 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 4a8f6fa3db1e..a6d3417ac262 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -273,14 +273,14 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys) bch2_trans_init(&trans, c); - for_each_btree_key(&trans, iter, BTREE_ID_ALLOC, POS_MIN, 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_ALLOC, POS_MIN, 0, k, ret) bch2_alloc_read_key(c, k); - bch2_trans_cond_resched(&trans); - } - ret = bch2_trans_exit(&trans); - if (ret) + ret = bch2_trans_exit(&trans) ?: ret; + if (ret) { + bch_err(c, "error reading alloc info: %i", ret); return ret; + } for_each_journal_key(*journal_keys, j) if (j->btree_id == BTREE_ID_ALLOC) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 2650f60b7cd7..3ba0910c2a47 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -289,7 +289,7 @@ static int mark_journal_key(struct bch_fs *c, enum btree_id id, bch2_trans_init(&trans, c); for_each_btree_key(&trans, iter, id, bkey_start_pos(&insert->k), - BTREE_ITER_SLOTS, k) { + BTREE_ITER_SLOTS, k, ret) { percpu_down_read(&c->mark_lock); ret = bch2_mark_overwrite(&trans, iter, k, insert, NULL, BCH_BUCKET_MARK_GC| @@ -300,7 +300,7 @@ static int mark_journal_key(struct bch_fs *c, enum btree_id id, break; } - return bch2_trans_exit(&trans); + return bch2_trans_exit(&trans) ?: ret; } static int bch2_gc_btrees(struct bch_fs *c, struct journal_keys *journal_keys, diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 291c805e3cc5..0a4c6c76e43b 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -238,12 +238,16 @@ static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter, : bch2_btree_iter_next(iter); } -#define for_each_btree_key(_trans, _iter, _btree_id, _start, _flags, _k)\ - for (iter = bch2_trans_get_iter((_trans), (_btree_id), \ - (_start), (_flags)), \ - (_k) = __bch2_btree_iter_peek(_iter, _flags); \ - !IS_ERR_OR_NULL((_k).k); \ - (_k) = __bch2_btree_iter_next(_iter, _flags)) +#define for_each_btree_key(_trans, _iter, _btree_id, \ + _start, _flags, _k, _ret) \ + for ((_ret) = PTR_ERR_OR_ZERO((_iter) = \ + bch2_trans_get_iter((_trans), (_btree_id), \ + (_start), (_flags))) ?: \ + PTR_ERR_OR_ZERO(((_k) = \ + __bch2_btree_iter_peek(_iter, _flags)).k); \ + !ret && (_k).k; \ + (_ret) = PTR_ERR_OR_ZERO(((_k) = \ + __bch2_btree_iter_next(_iter, _flags)).k)) #define for_each_btree_key_continue(_iter, _flags, _k) \ for ((_k) = __bch2_btree_iter_peek(_iter, _flags); \ diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index e9c5889b2c0f..ff4c61371830 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1035,12 +1035,12 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, return ret; } -inline bool bch2_mark_overwrite(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c old, - struct bkey_i *new, - struct bch_fs_usage *fs_usage, - unsigned flags) +inline int bch2_mark_overwrite(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c old, + struct bkey_i *new, + struct bch_fs_usage *fs_usage, + unsigned flags) { struct bch_fs *c = trans->c; struct btree *b = iter->l[0].b; @@ -1049,7 +1049,7 @@ inline bool bch2_mark_overwrite(struct btree_trans *trans, if (btree_node_is_extents(b) ? bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0 : bkey_cmp(new->k.p, old.k->p)) - return false; + return 0; if (btree_node_is_extents(b)) { switch (bch2_extent_overlap(&new->k, old.k)) { @@ -1080,24 +1080,24 @@ inline bool bch2_mark_overwrite(struct btree_trans *trans, BUG_ON(sectors >= 0); } - bch2_mark_key_locked(c, old, false, sectors, - fs_usage, trans->journal_res.seq, flags); - return true; + return bch2_mark_key_locked(c, old, false, sectors, fs_usage, + trans->journal_res.seq, flags) ?: 1; } -void bch2_mark_update(struct btree_trans *trans, - struct btree_insert_entry *insert, - struct bch_fs_usage *fs_usage, - unsigned flags) +int bch2_mark_update(struct btree_trans *trans, + struct btree_insert_entry *insert, + struct bch_fs_usage *fs_usage, + unsigned flags) { struct bch_fs *c = trans->c; struct btree_iter *iter = insert->iter; struct btree *b = iter->l[0].b; struct btree_node_iter node_iter = iter->l[0].iter; struct bkey_packed *_k; + int ret = 0; if (!btree_node_type_needs_gc(iter->btree_id)) - return; + return 0; bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true, bpos_min(insert->k->k.p, b->key.k.p).offset - @@ -1105,7 +1105,7 @@ void bch2_mark_update(struct btree_trans *trans, fs_usage, trans->journal_res.seq, flags); if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES)) - return; + return 0; /* * For non extents, we only mark the new key, not the key being @@ -1114,19 +1114,22 @@ void bch2_mark_update(struct btree_trans *trans, if ((iter->btree_id == BTREE_ID_ALLOC || iter->btree_id == BTREE_ID_EC) && !bkey_deleted(&insert->k->k)) - return; + return 0; while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b, KEY_TYPE_discard))) { struct bkey unpacked; struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked); - if (!bch2_mark_overwrite(trans, iter, k, insert->k, - fs_usage, flags)) + ret = bch2_mark_overwrite(trans, iter, k, insert->k, + fs_usage, flags); + if (ret <= 0) break; bch2_btree_node_iter_advance(&node_iter, b); } + + return ret; } void bch2_trans_fs_usage_apply(struct btree_trans *trans, diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 90fffee1c289..c51192fae503 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -254,11 +254,11 @@ int bch2_mark_key(struct bch_fs *, struct bkey_s_c, int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *, struct disk_reservation *); -bool bch2_mark_overwrite(struct btree_trans *, struct btree_iter *, - struct bkey_s_c, struct bkey_i *, - struct bch_fs_usage *, unsigned); -void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *, - struct bch_fs_usage *, unsigned); +int bch2_mark_overwrite(struct btree_trans *, struct btree_iter *, + struct bkey_s_c, struct bkey_i *, + struct bch_fs_usage *, unsigned); +int bch2_mark_update(struct btree_trans *, struct btree_insert_entry *, + struct bch_fs_usage *, unsigned); void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *); /* disk reservations: */ diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 4479a9f55ddf..71971b3cc851 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -333,14 +333,10 @@ int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum) { struct btree_iter *iter; struct bkey_s_c k; - int ret = 0; - - iter = bch2_trans_get_iter(trans, BTREE_ID_DIRENTS, - POS(dir_inum, 0), 0); - if (IS_ERR(iter)) - return PTR_ERR(iter); + int ret; - for_each_btree_key_continue(iter, 0, k) { + for_each_btree_key(trans, iter, BTREE_ID_DIRENTS, + POS(dir_inum, 0), 0, k, ret) { if (k.k->p.inode > dir_inum) break; @@ -369,6 +365,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file, struct bkey_s_c k; struct bkey_s_c_dirent dirent; unsigned len; + int ret; if (!dir_emit_dots(file, ctx)) return 0; @@ -376,7 +373,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file, bch2_trans_init(&trans, c); for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, - POS(inode->v.i_ino, ctx->pos), 0, k) { + POS(inode->v.i_ino, ctx->pos), 0, k, ret) { if (k.k->type != KEY_TYPE_dirent) continue; @@ -401,7 +398,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file, ctx->pos = k.k->p.offset + 1; } - bch2_trans_exit(&trans); + ret = bch2_trans_exit(&trans) ?: ret; - return 0; + return ret; } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 47d197ed5c99..063f91fc1b09 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -679,10 +679,8 @@ retry: bch2_trans_begin(&trans); /* XXX: start pos hint */ - iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - - for_each_btree_key_continue(iter, BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k) { + for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN, + BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) break; @@ -690,7 +688,8 @@ retry: goto found_slot; } - ret = -ENOSPC; + if (!ret) + ret = -ENOSPC; goto out; found_slot: ret = ec_stripe_mem_alloc(c, iter); @@ -1249,14 +1248,14 @@ int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys) bch2_trans_init(&trans, c); - for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN, 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN, 0, k, ret) bch2_stripe_read_key(c, k); - bch2_trans_cond_resched(&trans); - } - ret = bch2_trans_exit(&trans); - if (ret) + ret = bch2_trans_exit(&trans) ?: ret; + if (ret) { + bch_err(c, "error reading stripes: %i", ret); return ret; + } for_each_journal_key(*journal_keys, i) if (i->btree_id == BTREE_ID_EC) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 2e7c3e82f03b..257c862c9856 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1632,13 +1632,14 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, struct bpos end = pos; struct bkey_s_c k; bool ret = true; + int err; end.offset += size; bch2_trans_init(&trans, c); for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos, - BTREE_ITER_SLOTS, k) { + BTREE_ITER_SLOTS, k, err) { if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) break; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index c8f6104553aa..f76dd4d89f25 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -2139,7 +2139,7 @@ static inline int range_has_data(struct bch_fs *c, bch2_trans_init(&trans, c); - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, start, 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, start, 0, k, ret) { if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) break; @@ -2732,7 +2732,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) bch2_trans_init(&trans, c); for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, - POS(inode->v.i_ino, offset >> 9), 0, k) { + POS(inode->v.i_ino, offset >> 9), 0, k, ret) { if (k.k->p.inode != inode->v.i_ino) { break; } else if (bkey_extent_is_data(k.k)) { @@ -2742,7 +2742,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) break; } - ret = bch2_trans_exit(&trans); + ret = bch2_trans_exit(&trans) ?: ret; if (ret) return ret; @@ -2806,7 +2806,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode->v.i_ino, offset >> 9), - BTREE_ITER_SLOTS, k) { + BTREE_ITER_SLOTS, k, ret) { if (k.k->p.inode != inode->v.i_ino) { next_hole = bch2_next_pagecache_hole(&inode->v, offset, MAX_LFS_FILESIZE); @@ -2823,7 +2823,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) } } - ret = bch2_trans_exit(&trans); + ret = bch2_trans_exit(&trans) ?: ret; if (ret) return ret; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 6e377a0e176f..ba4b4e942f0c 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1210,7 +1210,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bch2_trans_init(&trans, c); for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, - POS(ei->v.i_ino, start >> 9), 0, k) + POS(ei->v.i_ino, start >> 9), 0, k, ret) if (bkey_extent_is_data(k.k) || k.k->type == KEY_TYPE_reservation) { if (bkey_cmp(bkey_start_pos(k.k), @@ -1220,17 +1220,17 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (have_extent) { ret = bch2_fill_extent(info, &tmp.k, 0); if (ret) - goto out; + break; } bkey_reassemble(&tmp.k, k); have_extent = true; } - if (have_extent) + if (!ret && have_extent) ret = bch2_fill_extent(info, &tmp.k, FIEMAP_EXTENT_LAST); -out: - bch2_trans_exit(&trans); + + ret = bch2_trans_exit(&trans) ?: ret; return ret < 0 ? ret : 0; } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 9db01437315b..ade3446d8dc3 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -21,8 +21,10 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) struct btree_iter *iter; struct bkey_s_c k; u64 sectors = 0; + int ret; - for_each_btree_key(trans, iter, BTREE_ID_EXTENTS, POS(inum, 0), 0, k) { + for_each_btree_key(trans, iter, BTREE_ID_EXTENTS, + POS(inum, 0), 0, k, ret) { if (k.k->p.inode != inum) break; @@ -30,7 +32,9 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) sectors += k.k->size; } - return bch2_trans_iter_free(trans, iter) ?: sectors; + bch2_trans_iter_free(trans, iter); + + return ret ?: sectors; } static int remove_dirent(struct btree_trans *trans, @@ -942,7 +946,7 @@ next: goto up; for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, - POS(e->inum, e->offset + 1), 0, k) { + POS(e->inum, e->offset + 1), 0, k, ret) { if (k.k->p.inode != e->inum) break; @@ -985,7 +989,7 @@ next: } goto next; } - ret = bch2_trans_iter_free(&trans, iter); + ret = bch2_trans_iter_free(&trans, iter) ?: ret; if (ret) { bch_err(c, "btree error %i in fsck", ret); goto err; @@ -1087,7 +1091,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false); - for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k, ret) { switch (k.k->type) { case KEY_TYPE_dirent: d = bkey_s_c_to_dirent(k); @@ -1105,7 +1109,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, bch2_trans_cond_resched(&trans); } - ret = bch2_trans_exit(&trans); + ret = bch2_trans_exit(&trans) ?: ret; if (ret) bch_err(c, "error in fs gc: btree error %i while walking dirents", ret); @@ -1432,15 +1436,12 @@ static int check_inodes_fast(struct bch_fs *c) struct btree_iter *iter; struct bkey_s_c k; struct bkey_s_c_inode inode; - int ret = 0, ret2; + int ret; bch2_trans_init(&trans, c); bch2_trans_preload_iters(&trans); - iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, - POS_MIN, 0); - - for_each_btree_key_continue(iter, 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k, ret) { if (k.k->type != KEY_TYPE_inode) continue; @@ -1456,10 +1457,9 @@ static int check_inodes_fast(struct bch_fs *c) break; } } + BUG_ON(ret == -EINTR); - ret2 = bch2_trans_exit(&trans); - - return ret ?: ret2; + return bch2_trans_exit(&trans) ?: ret; } /* diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 71481b9728f5..b07b0f92d4f9 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -1326,7 +1326,7 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, retry: for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode, bvec_iter.bi_sector), - BTREE_ITER_SLOTS, k) { + BTREE_ITER_SLOTS, k, ret) { BKEY_PADDED(k) tmp; unsigned bytes; @@ -1357,8 +1357,8 @@ retry: * If we get here, it better have been because there was an error * reading a btree node */ - BUG_ON(!btree_iter_err(iter)); - __bcache_io_error(c, "btree IO error"); + BUG_ON(!ret); + __bcache_io_error(c, "btree IO error: %i", ret); err: rbio->bio.bi_status = BLK_STS_IOERR; out: @@ -1871,6 +1871,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) unsigned flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE| BCH_READ_USER_MAPPED; + int ret; bch2_trans_init(&trans, c); @@ -1883,7 +1884,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode, rbio->bio.bi_iter.bi_sector), - BTREE_ITER_SLOTS, k) { + BTREE_ITER_SLOTS, k, ret) { BKEY_PADDED(k) tmp; unsigned bytes; @@ -1915,8 +1916,8 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) * If we get here, it better have been because there was an error * reading a btree node */ - BUG_ON(!btree_iter_err(iter)); - bcache_io_error(c, &rbio->bio, "btree IO error"); + BUG_ON(!ret); + bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret); bch2_trans_exit(&trans); bch2_rbio_done(rbio); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 9793896bee77..1ad585ee27ca 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -630,7 +630,7 @@ static int bch2_gc_data_replicas(struct bch_fs *c) bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED)); for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, - BTREE_ITER_PREFETCH, k) { + BTREE_ITER_PREFETCH, k, ret) { ret = bch2_mark_bkey_replicas(c, k); if (ret) break; diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index a4f75d53b42c..b78df735d94c 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -364,7 +364,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) bch2_trans_init(&trans, c); for_each_btree_key(&trans, iter, BTREE_ID_QUOTAS, POS(type, 0), - BTREE_ITER_PREFETCH, k) { + BTREE_ITER_PREFETCH, k, ret) { if (k.k->p.inode != type) break; @@ -436,7 +436,7 @@ int bch2_fs_quota_read(struct bch_fs *c) bch2_trans_init(&trans, c); for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, - BTREE_ITER_PREFETCH, k) { + BTREE_ITER_PREFETCH, k, ret) { switch (k.k->type) { case KEY_TYPE_inode: ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u); diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 0ed28d7f074d..c47af32ce983 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -134,14 +134,11 @@ bch2_hash_lookup(struct btree_trans *trans, { struct btree_iter *iter; struct bkey_s_c k; + int ret; - iter = bch2_trans_get_iter(trans, desc.btree_id, - POS(inode, desc.hash_key(info, key)), - BTREE_ITER_SLOTS|flags); - if (IS_ERR(iter)) - return iter; - - for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) { + for_each_btree_key(trans, iter, desc.btree_id, + POS(inode, desc.hash_key(info, key)), + BTREE_ITER_SLOTS|flags, k, ret) { if (iter->pos.inode != inode) break; @@ -156,7 +153,7 @@ bch2_hash_lookup(struct btree_trans *trans, } } - return IS_ERR(k.k) ? ERR_CAST(k.k) : ERR_PTR(-ENOENT); + return ERR_PTR(ret ?: -ENOENT); } static __always_inline struct btree_iter * @@ -167,14 +164,11 @@ bch2_hash_hole(struct btree_trans *trans, { struct btree_iter *iter; struct bkey_s_c k; + int ret; - iter = bch2_trans_get_iter(trans, desc.btree_id, - POS(inode, desc.hash_key(info, key)), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - if (IS_ERR(iter)) - return iter; - - for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) { + for_each_btree_key(trans, iter, desc.btree_id, + POS(inode, desc.hash_key(info, key)), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { if (iter->pos.inode != inode) break; @@ -182,7 +176,7 @@ bch2_hash_hole(struct btree_trans *trans, return iter; } - return IS_ERR(k.k) ? ERR_CAST(k.k) : ERR_PTR(-ENOSPC); + return ERR_PTR(ret ?: -ENOSPC); } static __always_inline @@ -224,15 +218,11 @@ int bch2_hash_set(struct btree_trans *trans, struct btree_iter *iter, *slot = NULL; struct bkey_s_c k; bool found = false; - int ret = 0; - - iter = bch2_trans_get_iter(trans, desc.btree_id, - POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - if (IS_ERR(iter)) - return PTR_ERR(iter); + int ret; - for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) { + for_each_btree_key(trans, iter, desc.btree_id, + POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { if (iter->pos.inode != inode) break; @@ -256,9 +246,10 @@ int bch2_hash_set(struct btree_trans *trans, } if (slot) - bch2_trans_iter_free(trans, iter); + bch2_trans_iter_free(trans, slot); + bch2_trans_iter_free(trans, iter); - return bch2_trans_iter_free(trans, iter) ?: -ENOSPC; + return ret ?: -ENOSPC; found: found = true; not_found: diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index f4b70f66d0ac..ee4c0764d4ad 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -289,13 +289,14 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) nr_compressed_extents = 0, compressed_sectors_compressed = 0, compressed_sectors_uncompressed = 0; + int ret; if (!test_bit(BCH_FS_STARTED, &c->flags)) return -EPERM; bch2_trans_init(&trans, c); - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k) + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k, ret) if (k.k->type == KEY_TYPE_extent) { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const union bch_extent_entry *entry; @@ -317,7 +318,10 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) break; } } - bch2_trans_exit(&trans); + + ret = bch2_trans_exit(&trans) ?: ret; + if (ret) + return ret; return scnprintf(buf, PAGE_SIZE, "uncompressed data:\n" diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index c8682fe674f6..0f5a3ed13f3e 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -116,7 +116,8 @@ static void test_iterate(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0), 0, k) + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, + POS_MIN, 0, k, ret) BUG_ON(k.k->p.offset != i++); BUG_ON(i != nr); @@ -161,7 +162,8 @@ static void test_iterate_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0), 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, + POS_MIN, 0, k, ret) { BUG_ON(bkey_start_offset(k.k) != i); i = k.k->p.offset; } @@ -209,7 +211,8 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0), 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, + 0, k, ret) { BUG_ON(k.k->p.offset != i); i += 2; } @@ -221,8 +224,8 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(0, 0), - BTREE_ITER_SLOTS, k) { + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, + BTREE_ITER_SLOTS, k, ret) { BUG_ON(bkey_deleted(k.k) != (i & 1)); BUG_ON(k.k->p.offset != i++); @@ -263,7 +266,8 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0), 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, + 0, k, ret) { BUG_ON(bkey_start_offset(k.k) != i + 8); BUG_ON(k.k->size != 8); i += 16; @@ -276,8 +280,8 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(0, 0), - BTREE_ITER_SLOTS, k) { + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, + BTREE_ITER_SLOTS, k, ret) { BUG_ON(bkey_deleted(k.k) != !(i % 16)); BUG_ON(bkey_start_offset(k.k) != i); @@ -501,10 +505,8 @@ static void seq_insert(struct bch_fs *c, u64 nr) bch2_trans_init(&trans, c); - iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS_MIN, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - - for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) { + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, + BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { insert.k.p = iter->pos; bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &insert.k_i)); @@ -522,10 +524,11 @@ static void seq_lookup(struct bch_fs *c, u64 nr) struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; + int ret; bch2_trans_init(&trans, c); - for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k) + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k, ret) ; bch2_trans_exit(&trans); } @@ -539,10 +542,8 @@ static void seq_overwrite(struct bch_fs *c, u64 nr) bch2_trans_init(&trans, c); - iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS_MIN, - BTREE_ITER_INTENT); - - for_each_btree_key_continue(iter, 0, k) { + for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, + BTREE_ITER_INTENT, k, ret) { struct bkey_i_cookie u; bkey_reassemble(&u.k_i, k); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 68ece7c0ee7a..99fb42225508 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -198,55 +198,54 @@ int bch2_xattr_set(struct btree_trans *trans, u64 inum, return ret; } -static void __bch2_xattr_emit(const char *prefix, - const char *name, size_t name_len, - char **buffer, size_t *buffer_size, - ssize_t *ret) +struct xattr_buf { + char *buf; + size_t len; + size_t used; +}; + +static int __bch2_xattr_emit(const char *prefix, + const char *name, size_t name_len, + struct xattr_buf *buf) { const size_t prefix_len = strlen(prefix); const size_t total_len = prefix_len + name_len + 1; - if (*buffer) { - if (total_len > *buffer_size) { - *ret = -ERANGE; - return; - } + if (buf->buf) { + if (buf->used + total_len > buf->len) + return -ERANGE; - memcpy(*buffer, prefix, prefix_len); - memcpy(*buffer + prefix_len, + memcpy(buf->buf + buf->used, prefix, prefix_len); + memcpy(buf->buf + buf->used + prefix_len, name, name_len); - (*buffer)[prefix_len + name_len] = '\0'; - - *buffer += total_len; - *buffer_size -= total_len; + buf->buf[buf->used + prefix_len + name_len] = '\0'; } - *ret += total_len; + buf->used += total_len; + return 0; } -static void bch2_xattr_emit(struct dentry *dentry, +static int bch2_xattr_emit(struct dentry *dentry, const struct bch_xattr *xattr, - char **buffer, size_t *buffer_size, - ssize_t *ret) + struct xattr_buf *buf) { const struct xattr_handler *handler = bch2_xattr_type_to_handler(xattr->x_type); - if (handler && (!handler->list || handler->list(dentry))) - __bch2_xattr_emit(handler->prefix ?: handler->name, - xattr->x_name, xattr->x_name_len, - buffer, buffer_size, ret); + return handler && (!handler->list || handler->list(dentry)) + ? __bch2_xattr_emit(handler->prefix ?: handler->name, + xattr->x_name, xattr->x_name_len, buf) + : 0; } -static void bch2_xattr_list_bcachefs(struct bch_fs *c, - struct bch_inode_info *inode, - char **buffer, - size_t *buffer_size, - ssize_t *ret, - bool all) +static int bch2_xattr_list_bcachefs(struct bch_fs *c, + struct bch_inode_info *inode, + struct xattr_buf *buf, + bool all) { const char *prefix = all ? "bcachefs_effective." : "bcachefs."; unsigned id; + int ret = 0; u64 v; for (id = 0; id < Inode_opt_nr; id++) { @@ -258,13 +257,13 @@ static void bch2_xattr_list_bcachefs(struct bch_fs *c, !(inode->ei_inode.bi_fields_set & (1 << id))) continue; - __bch2_xattr_emit(prefix, - bch2_inode_opts[id], - strlen(bch2_inode_opts[id]), - buffer, buffer_size, ret); - if (*ret < 0) + ret = __bch2_xattr_emit(prefix, bch2_inode_opts[id], + strlen(bch2_inode_opts[id]), buf); + if (ret) break; } + + return ret; } ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) @@ -274,13 +273,14 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; + struct xattr_buf buf = { .buf = buffer, .len = buffer_size }; u64 inum = dentry->d_inode->i_ino; - ssize_t ret = 0; + int ret; bch2_trans_init(&trans, c); for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, - POS(inum, 0), 0, k) { + POS(inum, 0), 0, k, ret) { BUG_ON(k.k->p.inode < inum); if (k.k->p.inode > inum) @@ -289,27 +289,24 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (k.k->type != KEY_TYPE_xattr) continue; - bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, - &buffer, &buffer_size, &ret); - if (ret < 0) + ret = bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, &buf); + if (ret) break; } - bch2_trans_exit(&trans); + ret = bch2_trans_exit(&trans) ?: ret; - if (ret < 0) + if (ret) return ret; - bch2_xattr_list_bcachefs(c, inode, &buffer, - &buffer_size, &ret, false); - if (ret < 0) + ret = bch2_xattr_list_bcachefs(c, inode, &buf, false); + if (ret) return ret; - bch2_xattr_list_bcachefs(c, inode, &buffer, - &buffer_size, &ret, true); - if (ret < 0) + ret = bch2_xattr_list_bcachefs(c, inode, &buf, true); + if (ret) return ret; - return ret; + return buf.used; } static int bch2_xattr_get_handler(const struct xattr_handler *handler, -- cgit v1.2.3 From 69eb5390afd287e73f781c26526796b45a77f9d1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 17 Apr 2019 20:10:43 -0400 Subject: bcachefs: copy correct journal_seq to dir in create Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index ba4b4e942f0c..7ae1b7520351 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -454,7 +454,7 @@ retry: if (!tmpfile) { bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(dir, inode->ei_journal_seq); + journal_seq_copy(dir, journal_seq); mutex_unlock(&dir->ei_update_lock); } -- cgit v1.2.3 From 619f5bee86b558e0dad91c3759b90652cd5f55d2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 17 Apr 2019 18:21:19 -0400 Subject: bcachefs: some improvements to startup messages and options Signed-off-by: Kent Overstreet --- fs/bcachefs/chardev.c | 2 +- fs/bcachefs/fs-ioctl.c | 3 +- fs/bcachefs/fs.c | 3 ++ fs/bcachefs/fsck.c | 115 +++++++++++++++---------------------------------- fs/bcachefs/fsck.h | 4 +- fs/bcachefs/opts.h | 7 +-- fs/bcachefs/recovery.c | 81 +++++++++++++++++++++++----------- fs/bcachefs/super.c | 85 +++++++++++++++++++++++++----------- fs/bcachefs/super.h | 2 +- 9 files changed, 163 insertions(+), 139 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 2573376290bb..4d8331022648 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -158,7 +158,7 @@ static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg) if (arg.flags || arg.pad) return -EINVAL; - return bch2_fs_start(c) ? -EIO : 0; + return bch2_fs_start(c); } static long bch2_ioctl_stop(struct bch_fs *c) diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index b00d25b18ed4..4dca716217a6 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -267,7 +267,8 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) down_write(&sb->s_umount); sb->s_flags |= SB_RDONLY; - bch2_fs_emergency_read_only(c); + if (bch2_fs_emergency_read_only(c)) + bch_err(c, "emergency read only due to ioctl"); up_write(&sb->s_umount); return 0; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 7ae1b7520351..aac59b8a15eb 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1834,12 +1834,15 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO); if (IS_ERR(vinode)) { + bch_err(c, "error mounting: error getting root inode %i", + (int) PTR_ERR(vinode)); ret = PTR_ERR(vinode); goto err_put_super; } sb->s_root = d_make_root(vinode); if (!sb->s_root) { + bch_err(c, "error mounting: error allocating root dentry"); ret = -ENOMEM; goto err_put_super; } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index ade3446d8dc3..61569e4e1c77 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -499,8 +499,7 @@ retry: BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); if (ret) { - bch_err(c, "error in fs gc: error %i " - "updating inode", ret); + bch_err(c, "error in fsck: error %i updating inode", ret); goto err; } @@ -1064,7 +1063,7 @@ static void inc_link(struct bch_fs *c, nlink_table *links, link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL); if (!link) { - bch_verbose(c, "allocation failed during fs gc - will need another pass"); + bch_verbose(c, "allocation failed during fsck - will need another pass"); *range_end = inum; return; } @@ -1111,7 +1110,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, } ret = bch2_trans_exit(&trans) ?: ret; if (ret) - bch_err(c, "error in fs gc: btree error %i while walking dirents", ret); + bch_err(c, "error in fsck: btree error %i while walking dirents", ret); return ret; } @@ -1252,8 +1251,7 @@ static int check_inode(struct btree_trans *trans, ret = bch2_inode_rm(c, u.bi_inum); if (ret) - bch_err(c, "error in fs gc: error %i " - "while deleting inode", ret); + bch_err(c, "error in fsck: error %i while deleting inode", ret); return ret; } @@ -1270,8 +1268,7 @@ static int check_inode(struct btree_trans *trans, ret = bch2_inode_truncate(c, u.bi_inum, u.bi_size); if (ret) { - bch_err(c, "error in fs gc: error %i " - "truncating inode", ret); + bch_err(c, "error in fsck: error %i truncating inode", ret); return ret; } @@ -1296,8 +1293,7 @@ static int check_inode(struct btree_trans *trans, sectors = bch2_count_inode_sectors(trans, u.bi_inum); if (sectors < 0) { - bch_err(c, "error in fs gc: error %i " - "recounting inode sectors", + bch_err(c, "error in fsck: error %i recounting inode sectors", (int) sectors); return sectors; } @@ -1317,7 +1313,7 @@ static int check_inode(struct btree_trans *trans, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); if (ret && ret != -EINTR) - bch_err(c, "error in fs gc: error %i " + bch_err(c, "error in fsck: error %i " "updating inode", ret); } fsck_err: @@ -1388,7 +1384,7 @@ fsck_err: bch2_trans_exit(&trans); if (ret2) - bch_err(c, "error in fs gc: btree error %i while walking inodes", ret2); + bch_err(c, "error in fsck: btree error %i while walking inodes", ret2); return ret ?: ret2; } @@ -1429,101 +1425,60 @@ static int check_inode_nlinks(struct bch_fs *c, return ret; } -noinline_for_stack -static int check_inodes_fast(struct bch_fs *c) -{ - struct btree_trans trans; - struct btree_iter *iter; - struct bkey_s_c k; - struct bkey_s_c_inode inode; - int ret; - - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); - - for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k, ret) { - if (k.k->type != KEY_TYPE_inode) - continue; - - inode = bkey_s_c_to_inode(k); - - if (inode.v->bi_flags & - (BCH_INODE_I_SIZE_DIRTY| - BCH_INODE_I_SECTORS_DIRTY| - BCH_INODE_UNLINKED)) { - ret = check_inode(&trans, NULL, iter, inode, NULL); - BUG_ON(ret == -EINTR); - if (ret) - break; - } - } - BUG_ON(ret == -EINTR); - - return bch2_trans_exit(&trans) ?: ret; -} - /* * Checks for inconsistencies that shouldn't happen, unless we have a bug. * Doesn't fix them yet, mainly because they haven't yet been observed: */ -static int bch2_fsck_full(struct bch_fs *c) +int bch2_fsck_full(struct bch_fs *c) { struct bch_inode_unpacked root_inode, lostfound_inode; - int ret; - bch_verbose(c, "starting fsck:"); - ret = check_extents(c) ?: + return check_extents(c) ?: check_dirents(c) ?: check_xattrs(c) ?: check_root(c, &root_inode) ?: check_lostfound(c, &root_inode, &lostfound_inode) ?: check_directory_structure(c, &lostfound_inode) ?: check_inode_nlinks(c, &lostfound_inode); - - bch2_flush_fsck_errs(c); - bch_verbose(c, "fsck done"); - - return ret; } -static int bch2_fsck_inode_nlink(struct bch_fs *c) +int bch2_fsck_inode_nlink(struct bch_fs *c) { struct bch_inode_unpacked root_inode, lostfound_inode; - int ret; - bch_verbose(c, "checking inode link counts:"); - ret = check_root(c, &root_inode) ?: + return check_root(c, &root_inode) ?: check_lostfound(c, &root_inode, &lostfound_inode) ?: check_inode_nlinks(c, &lostfound_inode); - - bch2_flush_fsck_errs(c); - bch_verbose(c, "done"); - - return ret; } -static int bch2_fsck_walk_inodes_only(struct bch_fs *c) +int bch2_fsck_walk_inodes_only(struct bch_fs *c) { + struct btree_trans trans; + struct btree_iter *iter; + struct bkey_s_c k; + struct bkey_s_c_inode inode; int ret; - bch_verbose(c, "walking inodes:"); - ret = check_inodes_fast(c); - - bch2_flush_fsck_errs(c); - bch_verbose(c, "done"); + bch2_trans_init(&trans, c); + bch2_trans_preload_iters(&trans); - return ret; -} + for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k, ret) { + if (k.k->type != KEY_TYPE_inode) + continue; -int bch2_fsck(struct bch_fs *c) -{ - if (c->opts.fsck) - return bch2_fsck_full(c); + inode = bkey_s_c_to_inode(k); - if (c->sb.clean) - return 0; + if (inode.v->bi_flags & + (BCH_INODE_I_SIZE_DIRTY| + BCH_INODE_I_SECTORS_DIRTY| + BCH_INODE_UNLINKED)) { + ret = check_inode(&trans, NULL, iter, inode, NULL); + BUG_ON(ret == -EINTR); + if (ret) + break; + } + } + BUG_ON(ret == -EINTR); - return c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK) - ? bch2_fsck_walk_inodes_only(c) - : bch2_fsck_inode_nlink(c); + return bch2_trans_exit(&trans) ?: ret; } diff --git a/fs/bcachefs/fsck.h b/fs/bcachefs/fsck.h index 97460452e842..9e4af02bde1e 100644 --- a/fs/bcachefs/fsck.h +++ b/fs/bcachefs/fsck.h @@ -2,6 +2,8 @@ #ifndef _BCACHEFS_FSCK_H #define _BCACHEFS_FSCK_H -int bch2_fsck(struct bch_fs *); +int bch2_fsck_full(struct bch_fs *); +int bch2_fsck_inode_nlink(struct bch_fs *); +int bch2_fsck_walk_inodes_only(struct bch_fs *); #endif /* _BCACHEFS_FSCK_H */ diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 53bf06e70cd5..a69bd3718ac4 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -233,16 +233,11 @@ enum opt_type { NO_SB_OPT, false, \ NULL, "Super read only mode - no writes at all will be issued,\n"\ "even if we have to replay the journal") \ - x(noreplay, u8, \ - OPT_MOUNT, \ - OPT_BOOL(), \ - NO_SB_OPT, false, \ - NULL, "Don't replay the journal (only for internal tools)")\ x(norecovery, u8, \ OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false, \ - NULL, NULL) \ + NULL, "Don't replay the journal") \ x(noexcl, u8, \ OPT_MOUNT, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index a80de5d814d6..3f0eda9f5d0c 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -714,8 +714,8 @@ int bch2_fs_recovery(struct bch_fs *c) if (!c->sb.clean) { ret = bch2_journal_seq_blacklist_add(c, - journal_seq, - journal_seq + 4); + journal_seq, + journal_seq + 4); if (ret) { bch_err(c, "error creating new journal seq blacklist entry"); goto err; @@ -763,7 +763,7 @@ int bch2_fs_recovery(struct bch_fs *c) * journal; after an unclean shutdown we have to walk all * pointers to metadata: */ - bch_verbose(c, "starting metadata mark and sweep:"); + bch_info(c, "starting metadata mark and sweep"); err = "error in mark and sweep"; ret = bch2_gc(c, NULL, true, true); if (ret) @@ -774,7 +774,7 @@ int bch2_fs_recovery(struct bch_fs *c) if (c->opts.fsck || !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) || test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) { - bch_verbose(c, "starting mark and sweep:"); + bch_info(c, "starting mark and sweep"); err = "error in mark and sweep"; ret = bch2_gc(c, &journal_keys, true, false); if (ret) @@ -792,36 +792,63 @@ int bch2_fs_recovery(struct bch_fs *c) if (c->sb.encryption_type && !c->sb.clean) atomic64_add(1 << 16, &c->key_version); - if (c->opts.noreplay) + if (c->opts.norecovery) goto out; - bch_verbose(c, "starting journal replay:"); + bch_verbose(c, "starting journal replay"); err = "journal replay failed"; ret = bch2_journal_replay(c, journal_keys); if (ret) goto err; bch_verbose(c, "journal replay done"); - bch_verbose(c, "writing allocation info:"); - err = "error writing out alloc info"; - ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW, &wrote) ?: - bch2_alloc_write(c, BTREE_INSERT_LAZY_RW, &wrote); - if (ret) { - bch_err(c, "error writing alloc info"); - goto err; + if (!c->opts.nochanges) { + /* + * note that even when filesystem was clean there might be work + * to do here, if we ran gc (because of fsck) which recalculated + * oldest_gen: + */ + bch_verbose(c, "writing allocation info"); + err = "error writing out alloc info"; + ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW, &wrote) ?: + bch2_alloc_write(c, BTREE_INSERT_LAZY_RW, &wrote); + if (ret) { + bch_err(c, "error writing alloc info"); + goto err; + } + bch_verbose(c, "alloc write done"); } - bch_verbose(c, "alloc write done"); - if (c->opts.norecovery) - goto out; + if (!c->sb.clean) { + if (!(c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK))) { + bch_info(c, "checking inode link counts"); + err = "error in recovery"; + ret = bch2_fsck_inode_nlink(c); + if (ret) + goto err; + bch_verbose(c, "check inodes done"); - err = "error in fsck"; - ret = bch2_fsck(c); - if (ret) - goto err; + } else { + bch_verbose(c, "checking for deleted inodes"); + err = "error in recovery"; + ret = bch2_fsck_walk_inodes_only(c); + if (ret) + goto err; + bch_verbose(c, "check inodes done"); + } + } + + if (c->opts.fsck) { + bch_info(c, "starting fsck"); + err = "error in fsck"; + ret = bch2_fsck_full(c); + if (ret) + goto err; + bch_verbose(c, "fsck done"); + } if (enabled_qtypes(c)) { - bch_verbose(c, "reading quotas:"); + bch_verbose(c, "reading quotas"); ret = bch2_fs_quota_read(c); if (ret) goto err; @@ -857,14 +884,18 @@ int bch2_fs_recovery(struct bch_fs *c) c->journal_seq_blacklist_table->nr > 128) queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work); out: + ret = 0; +err: +fsck_err: + bch2_flush_fsck_errs(c); journal_keys_free(&journal_keys); journal_entries_free(&journal_entries); kfree(clean); + if (ret) + bch_err(c, "Error in recovery: %s (%i)", err, ret); + else + bch_verbose(c, "ret %i", ret); return ret; -err: -fsck_err: - bch_err(c, "Error in recovery: %s (%i)", err, ret); - goto out; } int bch2_fs_initialize(struct bch_fs *c) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 8f25c1d9b8cb..654ccc611099 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -305,7 +305,6 @@ void bch2_fs_read_only(struct bch_fs *c) !test_bit(BCH_FS_ERROR, &c->flags) && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) && test_bit(BCH_FS_STARTED, &c->flags) && - !c->opts.noreplay && !c->opts.norecovery) bch2_fs_mark_clean(c); @@ -379,9 +378,14 @@ int __bch2_fs_read_write(struct bch_fs *c, bool early) if (test_bit(BCH_FS_RW, &c->flags)) return 0; - if (c->opts.nochanges || - c->opts.noreplay) - return -EINVAL; + /* + * nochanges is used for fsck -n mode - we have to allow going rw + * during recovery for that to work: + */ + if (c->opts.norecovery || + (c->opts.nochanges && + (!early || c->opts.read_only))) + return -EROFS; ret = bch2_fs_mark_dirty(c); if (ret) @@ -694,10 +698,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->block_bits = ilog2(c->opts.block_size); c->btree_foreground_merge_threshold = BTREE_FOREGROUND_MERGE_THRESHOLD(c); - c->opts.nochanges |= c->opts.noreplay; - c->opts.read_only |= c->opts.nochanges; - c->opts.read_only |= c->opts.noreplay; - if (bch2_fs_init_fault("fs_alloc")) goto err; @@ -776,7 +776,41 @@ err: goto out; } -const char *bch2_fs_start(struct bch_fs *c) +noinline_for_stack +static void print_mount_opts(struct bch_fs *c) +{ + enum bch_opt_id i; + char buf[512]; + struct printbuf p = PBUF(buf); + bool first = true; + + strcpy(buf, "(null)"); + + if (c->opts.read_only) { + pr_buf(&p, "ro"); + first = false; + } + + for (i = 0; i < bch2_opts_nr; i++) { + const struct bch_option *opt = &bch2_opt_table[i]; + u64 v = bch2_opt_get_by_id(&c->opts, i); + + if (!(opt->mode & OPT_MOUNT)) + continue; + + if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) + continue; + + if (!first) + pr_buf(&p, ","); + first = false; + bch2_opt_to_text(&p, c, opt, v, OPT_SHOW_MOUNT_STYLE); + } + + bch_info(c, "mounted with opts: %s", buf); +} + +int bch2_fs_start(struct bch_fs *c) { const char *err = "cannot allocate memory"; struct bch_sb_field_members *mi; @@ -815,26 +849,27 @@ const char *bch2_fs_start(struct bch_fs *c) goto err; err = "dynamic fault"; + ret = -EINVAL; if (bch2_fs_init_fault("fs_start")) goto err; - if (c->opts.read_only) { + if (c->opts.read_only || c->opts.nochanges) { bch2_fs_read_only(c); } else { - if (!test_bit(BCH_FS_RW, &c->flags) - ? bch2_fs_read_write(c) - : bch2_fs_read_write_late(c)) { - err = "error going read write"; + err = "error going read write"; + ret = !test_bit(BCH_FS_RW, &c->flags) + ? bch2_fs_read_write(c) + : bch2_fs_read_write_late(c); + if (ret) goto err; - } } set_bit(BCH_FS_STARTED, &c->flags); - - err = NULL; + print_mount_opts(c); + ret = 0; out: mutex_unlock(&c->state_lock); - return err; + return ret; err: switch (ret) { case BCH_FSCK_ERRORS_NOT_FIXED: @@ -862,7 +897,7 @@ err: break; } - BUG_ON(!err); + BUG_ON(!ret); goto out; } @@ -1789,9 +1824,9 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, goto err_print; if (!c->opts.nostart) { - err = bch2_fs_start(c); - if (err) - goto err_print; + ret = bch2_fs_start(c); + if (ret) + goto err; } out: kfree(sb); @@ -1818,6 +1853,7 @@ static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb, const char *err; struct bch_fs *c; bool allocated_fs = false; + int ret; err = bch2_sb_validate(sb); if (err) @@ -1850,8 +1886,9 @@ static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb, mutex_unlock(&c->sb_lock); if (!c->opts.nostart && bch2_fs_may_start(c)) { - err = bch2_fs_start(c); - if (err) + err = "error starting filesystem"; + ret = bch2_fs_start(c); + if (ret) goto err; } diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index 92ef3e7c8dc2..1b97c6115535 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -224,7 +224,7 @@ int bch2_fs_read_write_early(struct bch_fs *); void bch2_fs_stop(struct bch_fs *); -const char *bch2_fs_start(struct bch_fs *); +int bch2_fs_start(struct bch_fs *); struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts); const char *bch2_fs_open_incremental(const char *path); -- cgit v1.2.3 From 7d825866604b34ba02b4c286c6fd6d232fd06cd0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 15 May 2019 10:08:55 -0400 Subject: bcachefs: Avoid spurious transaction restarts Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 43 ++++++++++++++++++++----------------------- fs/bcachefs/btree_iter.h | 2 ++ fs/bcachefs/fs.c | 1 + 3 files changed, 23 insertions(+), 23 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index cbf9281e195b..b058b6f3b89d 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -160,7 +160,7 @@ success: } static inline bool btree_iter_get_locks(struct btree_iter *iter, - bool upgrade) + bool upgrade, bool trace) { unsigned l = iter->level; int fail_idx = -1; @@ -172,16 +172,10 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter, if (!(upgrade ? bch2_btree_node_upgrade(iter, l) : bch2_btree_node_relock(iter, l))) { - if (upgrade) - trace_node_upgrade_fail(l, iter->l[l].lock_seq, - is_btree_node(iter, l) - ? 0 - : (unsigned long) iter->l[l].b, - is_btree_node(iter, l) - ? iter->l[l].b->c.lock.state.seq - : 0); - else - trace_node_relock_fail(l, iter->l[l].lock_seq, + if (trace) + (upgrade + ? trace_node_upgrade_fail + : trace_node_relock_fail)(l, iter->l[l].lock_seq, is_btree_node(iter, l) ? 0 : (unsigned long) iter->l[l].b, @@ -251,7 +245,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, linked->locks_want = max_t(unsigned, linked->locks_want, __fls(linked->nodes_locked) + 1); - btree_iter_get_locks(linked, true); + btree_iter_get_locks(linked, true, false); } ret = false; } @@ -268,7 +262,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, max(level + 1, max_t(unsigned, linked->locks_want, iter->locks_want)); - btree_iter_get_locks(linked, true); + btree_iter_get_locks(linked, true, false); } ret = false; } @@ -312,10 +306,10 @@ void bch2_btree_trans_verify_locks(struct btree_trans *trans) #endif __flatten -static bool bch2_btree_iter_relock(struct btree_iter *iter) +static bool bch2_btree_iter_relock(struct btree_iter *iter, bool trace) { return iter->uptodate >= BTREE_ITER_NEED_RELOCK - ? btree_iter_get_locks(iter, false) + ? btree_iter_get_locks(iter, false, trace) : true; } @@ -328,7 +322,7 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter, iter->locks_want = new_locks_want; - if (btree_iter_get_locks(iter, true)) + if (btree_iter_get_locks(iter, true, true)) return true; /* @@ -341,7 +335,7 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter, linked->btree_id == iter->btree_id && linked->locks_want < new_locks_want) { linked->locks_want = new_locks_want; - btree_iter_get_locks(linked, true); + btree_iter_get_locks(linked, true, false); } return false; @@ -416,7 +410,8 @@ bool bch2_trans_relock(struct btree_trans *trans) bool ret = true; trans_for_each_iter(trans, iter) - ret &= bch2_btree_iter_relock(iter); + if (iter->uptodate == BTREE_ITER_NEED_RELOCK) + ret &= bch2_btree_iter_relock(iter, true); return ret; } @@ -1061,7 +1056,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) if (unlikely(iter->level >= BTREE_MAX_DEPTH)) return 0; - if (bch2_btree_iter_relock(iter)) + if (bch2_btree_iter_relock(iter, false)) return 0; /* @@ -1672,11 +1667,13 @@ int bch2_trans_iter_free_on_commit(struct btree_trans *trans, return ret; } -static int btree_trans_realloc_iters(struct btree_trans *trans, - unsigned new_size) +int bch2_trans_realloc_iters(struct btree_trans *trans, + unsigned new_size) { void *new_iters, *new_updates; + new_size = roundup_pow_of_two(new_size); + BUG_ON(new_size > BTREE_ITER_MAX); if (new_size <= trans->size) @@ -1727,7 +1724,7 @@ success: void bch2_trans_preload_iters(struct btree_trans *trans) { - btree_trans_realloc_iters(trans, BTREE_ITER_MAX); + bch2_trans_realloc_iters(trans, BTREE_ITER_MAX); } static int btree_trans_iter_alloc(struct btree_trans *trans) @@ -1738,7 +1735,7 @@ static int btree_trans_iter_alloc(struct btree_trans *trans) goto got_slot; if (trans->nr_iters == trans->size) { - int ret = btree_trans_realloc_iters(trans, trans->size * 2); + int ret = bch2_trans_realloc_iters(trans, trans->size * 2); if (ret) return ret; } diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index ee2cea2b0b44..3089aa7cf8e9 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -258,7 +258,9 @@ static inline int bkey_err(struct bkey_s_c k) /* new multiple iterator interface: */ +int bch2_trans_realloc_iters(struct btree_trans *, unsigned); void bch2_trans_preload_iters(struct btree_trans *); + int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *); int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *); int bch2_trans_iter_free_on_commit(struct btree_trans *, struct btree_iter *); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index aac59b8a15eb..b5a025939f51 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -415,6 +415,7 @@ __bch2_create(struct mnt_idmap *idmap, mutex_lock(&dir->ei_update_lock); bch2_trans_init(&trans, c); + bch2_trans_realloc_iters(&trans, 8); retry: bch2_trans_begin(&trans); -- cgit v1.2.3 From 20bceecb3159bbe06a26fc6747457d9de02ec227 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 15 May 2019 10:54:43 -0400 Subject: bcachefs: More work to avoid transaction restarts Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 4 +- fs/bcachefs/alloc_background.c | 8 ++-- fs/bcachefs/btree_cache.c | 3 +- fs/bcachefs/btree_gc.c | 6 +-- fs/bcachefs/btree_io.c | 2 +- fs/bcachefs/btree_iter.c | 53 +++++++++++++--------- fs/bcachefs/btree_iter.h | 5 +-- fs/bcachefs/btree_update.h | 2 +- fs/bcachefs/btree_update_interior.c | 2 +- fs/bcachefs/btree_update_leaf.c | 31 ++++++++----- fs/bcachefs/debug.c | 6 +-- fs/bcachefs/dirent.c | 4 +- fs/bcachefs/ec.c | 13 +++--- fs/bcachefs/extents.c | 2 +- fs/bcachefs/fs-io.c | 22 ++++----- fs/bcachefs/fs.c | 17 ++++--- fs/bcachefs/fsck.c | 21 +++------ fs/bcachefs/inode.c | 2 +- fs/bcachefs/io.c | 10 ++--- fs/bcachefs/journal_seq_blacklist.c | 2 +- fs/bcachefs/migrate.c | 5 +-- fs/bcachefs/move.c | 7 ++- fs/bcachefs/quota.c | 6 +-- fs/bcachefs/recovery.c | 3 +- fs/bcachefs/sysfs.c | 2 +- fs/bcachefs/tests.c | 26 +++++------ fs/bcachefs/trace.h | 90 +++++++++++++++++++++++-------------- fs/bcachefs/xattr.c | 4 +- 28 files changed, 189 insertions(+), 169 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index c7f6bcb87387..1c3343252129 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -222,7 +222,7 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap, struct bkey_s_c_xattr xattr; struct posix_acl *acl = NULL; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -305,7 +305,7 @@ int bch2_set_acl(struct mnt_idmap *idmap, int ret; mutex_lock(&inode->ei_update_lock); - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); if (type == ACL_TYPE_ACCESS && acl) { ret = posix_acl_update_mode(idmap, &inode->v, &mode, &acl); diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 61991d898d99..23b81f6615ca 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -229,7 +229,7 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys) unsigned i; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_ALLOC, POS_MIN, 0, k, ret) bch2_mark_key(c, k, true, 0, NULL, 0, @@ -288,7 +288,7 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k) if (k->k.p.offset >= ca->mi.nbuckets) return 0; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, k->k.p, BTREE_ITER_INTENT); @@ -333,7 +333,7 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote) BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); @@ -1032,7 +1032,7 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca) u64 journal_seq = 0; int ret = 0; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS(ca->dev_idx, 0), diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 34a6d67a5bf1..60a7acd18603 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -732,8 +732,7 @@ retry: goto retry; trans_restart(); - trace_trans_restart_btree_node_reused(c, - iter->trans->ip); + trace_trans_restart_btree_node_reused(iter->trans->ip); return ERR_PTR(-EINTR); } } diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 3dc073e5e5b6..047f30efdd7a 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -217,7 +217,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, u8 max_stale; int ret = 0; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0)); @@ -286,7 +286,7 @@ static int mark_journal_key(struct bch_fs *c, enum btree_id id, if (ret) return ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, id, bkey_start_pos(&insert->k), BTREE_ITER_SLOTS, k, ret) { @@ -1055,7 +1055,7 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) struct btree *merge[GC_MERGE_NODES]; u32 lock_seq[GC_MERGE_NODES]; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); /* * XXX: We don't have a good way of positively matching on sibling nodes diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index baffb58fd10b..d4806809fc0d 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1151,7 +1151,7 @@ static void bch2_btree_node_write_error(struct bch_fs *c, struct btree_iter *iter; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_node_iter(&trans, b->c.btree_id, b->key.k.p, BTREE_MAX_DEPTH, b->c.level, 0); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index b058b6f3b89d..a906eb1c5f5a 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -270,8 +270,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, if (unlikely(!ret)) { trans_restart(); - trace_trans_restart_would_deadlock(iter->trans->c, - iter->trans->ip); + trace_trans_restart_would_deadlock(iter->trans->ip); return false; } @@ -1667,7 +1666,7 @@ int bch2_trans_iter_free_on_commit(struct btree_trans *trans, return ret; } -int bch2_trans_realloc_iters(struct btree_trans *trans, +static int bch2_trans_realloc_iters(struct btree_trans *trans, unsigned new_size) { void *new_iters, *new_updates; @@ -1715,18 +1714,13 @@ success: if (trans->iters_live) { trans_restart(); - trace_trans_restart_iters_realloced(trans->c, trans->ip); + trace_trans_restart_iters_realloced(trans->ip, trans->size); return -EINTR; } return 0; } -void bch2_trans_preload_iters(struct btree_trans *trans) -{ - bch2_trans_realloc_iters(trans, BTREE_ITER_MAX); -} - static int btree_trans_iter_alloc(struct btree_trans *trans) { unsigned idx = __ffs64(~trans->iters_linked); @@ -1866,32 +1860,41 @@ struct btree_iter *bch2_trans_copy_iter(struct btree_trans *trans, return &trans->iters[idx]; } -void *bch2_trans_kmalloc(struct btree_trans *trans, - size_t size) +static int bch2_trans_preload_mem(struct btree_trans *trans, size_t size) { - void *ret; - - if (trans->mem_top + size > trans->mem_bytes) { + if (size > trans->mem_bytes) { size_t old_bytes = trans->mem_bytes; - size_t new_bytes = roundup_pow_of_two(trans->mem_top + size); + size_t new_bytes = roundup_pow_of_two(size); void *new_mem = krealloc(trans->mem, new_bytes, GFP_NOFS); if (!new_mem) - return ERR_PTR(-ENOMEM); + return -ENOMEM; trans->mem = new_mem; trans->mem_bytes = new_bytes; if (old_bytes) { trans_restart(); - trace_trans_restart_mem_realloced(trans->c, trans->ip); - return ERR_PTR(-EINTR); + trace_trans_restart_mem_realloced(trans->ip, new_bytes); + return -EINTR; } } - ret = trans->mem + trans->mem_top; + return 0; +} + +void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) +{ + void *p; + int ret; + + ret = bch2_trans_preload_mem(trans, trans->mem_top + size); + if (ret) + return ERR_PTR(ret); + + p = trans->mem + trans->mem_top; trans->mem_top += size; - return ret; + return p; } inline void bch2_trans_unlink_iters(struct btree_trans *trans, u64 iters) @@ -1938,7 +1941,9 @@ void __bch2_trans_begin(struct btree_trans *trans) bch2_btree_iter_traverse_all(trans); } -void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c) +void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, + unsigned expected_nr_iters, + size_t expected_mem_bytes) { memset(trans, 0, offsetof(struct btree_trans, iters_onstack)); @@ -1947,6 +1952,12 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c) trans->size = ARRAY_SIZE(trans->iters_onstack); trans->iters = trans->iters_onstack; trans->updates = trans->updates_onstack; + + if (expected_nr_iters > trans->size) + bch2_trans_realloc_iters(trans, expected_nr_iters); + + if (expected_mem_bytes) + bch2_trans_preload_mem(trans, expected_mem_bytes); } int bch2_trans_exit(struct btree_trans *trans) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 3089aa7cf8e9..e8c31852d5fd 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -258,9 +258,6 @@ static inline int bkey_err(struct bkey_s_c k) /* new multiple iterator interface: */ -int bch2_trans_realloc_iters(struct btree_trans *, unsigned); -void bch2_trans_preload_iters(struct btree_trans *); - int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *); int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *); int bch2_trans_iter_free_on_commit(struct btree_trans *, struct btree_iter *); @@ -303,7 +300,7 @@ static inline void bch2_trans_begin_updates(struct btree_trans *trans) } void *bch2_trans_kmalloc(struct btree_trans *, size_t); -void bch2_trans_init(struct btree_trans *, struct bch_fs *); +void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t); int bch2_trans_exit(struct btree_trans *); #ifdef TRACE_TRANSACTION_RESTARTS diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index a967f196c87a..c25e7a752cc9 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -128,7 +128,7 @@ struct btree_insert_entry *bch2_trans_update(struct btree_trans *, struct btree_trans trans; \ int _ret; \ \ - bch2_trans_init(&trans, (_c)); \ + bch2_trans_init(&trans, (_c), 0, 0); \ \ do { \ bch2_trans_begin(&trans); \ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index d0ca08a323a1..dcfcfe97b6f4 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1586,7 +1586,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, * instead of locking/reserving all the way to the root: */ if (!bch2_btree_iter_upgrade(iter, U8_MAX)) { - trace_trans_restart_iter_upgrade(c, iter->trans->ip); + trace_trans_restart_iter_upgrade(trans->ip); ret = -EINTR; goto out; } diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 88e038c1ccef..0aca109dac06 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -440,7 +440,7 @@ static int bch2_trans_journal_preres_get(struct btree_trans *trans) if (!bch2_trans_relock(trans)) { trans_restart(" (iter relock after journal preres get blocked)"); - trace_trans_restart_journal_preres_get(c, trans->ip); + trace_trans_restart_journal_preres_get(trans->ip); return -EINTR; } @@ -560,7 +560,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, ret = bch2_trans_mark_update(trans, i, &trans->fs_usage_deltas); if (ret == -EINTR) - trace_trans_restart_mark(c, trans->ip); + trace_trans_restart_mark(trans->ip); if (ret) return ret; } @@ -570,7 +570,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, if (race_fault()) { ret = -EINTR; trans_restart(" (race)"); - trace_trans_restart_fault_inject(c, trans->ip); + trace_trans_restart_fault_inject(trans->ip); goto out; } @@ -719,7 +719,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, ret == -EINTR || (flags & BTREE_INSERT_NOUNLOCK)) { trans_restart(" (split)"); - trace_trans_restart_btree_node_split(c, trans->ip); + trace_trans_restart_btree_node_split(trans->ip); ret = -EINTR; } break; @@ -739,7 +739,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, return 0; trans_restart(" (iter relock after marking replicas)"); - trace_trans_restart_mark_replicas(c, trans->ip); + trace_trans_restart_mark_replicas(trans->ip); ret = -EINTR; break; case BTREE_INSERT_NEED_JOURNAL_RES: @@ -753,7 +753,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, return 0; trans_restart(" (iter relock after journal res get blocked)"); - trace_trans_restart_journal_res_get(c, trans->ip); + trace_trans_restart_journal_res_get(trans->ip); ret = -EINTR; break; default: @@ -766,7 +766,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (ret2) { trans_restart(" (traverse)"); - trace_trans_restart_traverse(c, trans->ip); + trace_trans_restart_traverse(trans->ip); return ret2; } @@ -778,7 +778,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, return 0; trans_restart(" (atomic)"); - trace_trans_restart_atomic(c, trans->ip); + trace_trans_restart_atomic(trans->ip); } return ret; @@ -809,7 +809,7 @@ static int __bch2_trans_commit(struct btree_trans *trans, if (!bch2_btree_iter_upgrade(i->iter, 1)) { trans_restart(" (failed upgrade, locks_want %u uptodate %u)", old_locks_want, old_uptodate); - trace_trans_restart_upgrade(c, trans->ip); + trace_trans_restart_upgrade(trans->ip); ret = -EINTR; goto err; } @@ -975,7 +975,9 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct btree_iter *iter; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); iter = bch2_trans_get_iter(&trans, id, bkey_start_pos(&k->k), BTREE_ITER_INTENT); @@ -983,6 +985,8 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, k)); ret = bch2_trans_commit(&trans, disk_res, journal_seq, flags); + if (ret == -EINTR) + goto retry; bch2_trans_exit(&trans); return ret; @@ -1071,8 +1075,11 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, struct btree_iter *iter; int ret = 0; - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + /* + * XXX: whether we need mem/more iters depends on whether this btree id + * has triggers + */ + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512); iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT); diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index a11d7923ea5a..4c6fcb6f918e 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -221,7 +221,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, if (!i->size) return i->ret; - bch2_trans_init(&trans, i->c); + bch2_trans_init(&trans, i->c, 0, 0); iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH); k = bch2_btree_iter_peek(iter); @@ -275,7 +275,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, if (!i->size || !bkey_cmp(POS_MAX, i->from)) return i->ret; - bch2_trans_init(&trans, i->c); + bch2_trans_init(&trans, i->c, 0, 0); for_each_btree_node(&trans, iter, i->id, i->from, 0, b) { bch2_btree_node_to_text(&PBUF(i->buf), i->c, b); @@ -328,7 +328,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, if (!i->size) return i->ret; - bch2_trans_init(&trans, i->c); + bch2_trans_init(&trans, i->c, 0, 0); iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH); diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 71971b3cc851..1442dacef0de 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -313,7 +313,7 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum, struct bkey_s_c k; u64 inum = 0; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_hash_lookup(&trans, bch2_dirent_hash_desc, hash_info, dir_inum, name, 0); @@ -370,7 +370,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file, if (!dir_emit_dots(file, ctx)) return 0; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS(inode->v.i_ino, ctx->pos), 0, k, ret) { diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 49cbc0bcd522..4a8aa7491fb5 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -441,7 +441,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) if (!buf) return -ENOMEM; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, stripe_idx), @@ -698,7 +698,7 @@ static int ec_stripe_bkey_insert(struct bch_fs *c, struct bkey_s_c k; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -765,8 +765,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, BKEY_PADDED(k) tmp; int ret = 0, dev, idx; - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, bkey_start_pos(pos), @@ -1236,7 +1235,7 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote) new_key = kmalloc(255 * sizeof(u64), GFP_KERNEL); BUG_ON(!new_key); - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); @@ -1272,7 +1271,7 @@ int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys) if (ret) return ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN, 0, k, ret) bch2_mark_key(c, k, true, 0, NULL, 0, 0); @@ -1299,7 +1298,7 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc) size_t i, idx = 0; int ret = 0; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, U64_MAX), 0); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 33c00db899e0..d8d128cae5b4 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1712,7 +1712,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, end.offset += size; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos, BTREE_ITER_SLOTS, k, err) { diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index bce45c87c901..9d0cca0bdfa3 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -435,8 +435,7 @@ static int bchfs_write_index_update(struct bch_write_op *wop) BUG_ON(k->k.p.inode != inode->v.i_ino); - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -1004,7 +1003,7 @@ void bch2_readahead(struct readahead_control *ractl) ret = readpages_iter_init(&readpages_iter, ractl); BUG_ON(ret); - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_SLOTS); @@ -1049,7 +1048,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; bio_add_page_contig(&rbio->bio, page); - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_SLOTS); @@ -2090,8 +2089,7 @@ static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode, struct bkey_s_c k; int ret = 0; - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, start, BTREE_ITER_INTENT); @@ -2137,7 +2135,7 @@ static inline int range_has_data(struct bch_fs *c, struct bkey_s_c k; int ret = 0; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, start, 0, k, ret) { if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) @@ -2394,8 +2392,7 @@ static long bch2_fcollapse(struct bch_inode_info *inode, if ((offset | len) & (block_bytes(c) - 1)) return -EINVAL; - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256); /* * We need i_mutex to keep the page cache consistent with the extents @@ -2510,8 +2507,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, unsigned replicas = io_opts(c, inode).data_replicas; int ret; - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); inode_lock(&inode->v); inode_dio_wait(&inode->v); @@ -2729,7 +2725,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) if (offset >= isize) return -ENXIO; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode->v.i_ino, offset >> 9), 0, k, ret) { @@ -2802,7 +2798,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) if (offset >= isize) return -ENXIO; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode->v.i_ino, offset >> 9), diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b5a025939f51..defd35d04750 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -215,7 +215,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, struct bch_inode_unpacked inode_u; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -414,8 +414,7 @@ __bch2_create(struct mnt_idmap *idmap, if (!tmpfile) mutex_lock(&dir->ei_update_lock); - bch2_trans_init(&trans, c); - bch2_trans_realloc_iters(&trans, 8); + bch2_trans_init(&trans, c, 8, 1024); retry: bch2_trans_begin(&trans); @@ -572,7 +571,7 @@ static int __bch2_link(struct bch_fs *c, int ret; mutex_lock(&inode->ei_update_lock); - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 4, 1024); retry: bch2_trans_begin(&trans); @@ -659,7 +658,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) int ret; bch2_lock_inodes(dir, inode); - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 4, 1024); retry: bch2_trans_begin(&trans); @@ -870,13 +869,13 @@ static int bch2_rename2(struct mnt_idmap *idmap, return ret; } + bch2_trans_init(&trans, c, 8, 2048); + bch2_lock_inodes(i.src_dir, i.dst_dir, i.src_inode, i.dst_inode); - bch2_trans_init(&trans, c); - if (S_ISDIR(i.src_inode->v.i_mode) && inode_attrs_changing(i.dst_dir, i.src_inode)) { ret = -EXDEV; @@ -1045,7 +1044,7 @@ static int bch2_setattr_nonsize(struct mnt_idmap *idmap, if (ret) goto err; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); kfree(acl); @@ -1208,7 +1207,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (start + len < start) return -EINVAL; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(ei->v.i_ino, start >> 9), 0, k, ret) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 2dfa87edb116..e3738757b6a0 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -451,8 +451,7 @@ static int check_extents(struct bch_fs *c) u64 i_sectors; int ret = 0; - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); bch_verbose(c, "checking extents"); @@ -547,8 +546,7 @@ static int check_dirents(struct bch_fs *c) bch_verbose(c, "checking dirents"); - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); hash_check_init(&h); @@ -704,8 +702,7 @@ static int check_xattrs(struct bch_fs *c) hash_check_init(&h); - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, POS(BCACHEFS_ROOT_INO, 0), 0); @@ -918,8 +915,7 @@ static int check_directory_structure(struct bch_fs *c, u64 d_inum; int ret = 0; - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); bch_verbose(c, "checking directory structure"); @@ -1085,8 +1081,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, u64 d_inum; int ret; - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false); @@ -1334,8 +1329,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c, int ret = 0, ret2 = 0; u64 nlinks_pos; - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(range_start, 0), 0); @@ -1459,8 +1453,7 @@ int bch2_fsck_walk_inodes_only(struct bch_fs *c) struct bkey_s_c_inode inode; int ret; - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k, ret) { if (k.k->type != KEY_TYPE_inode) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 8e7bec8ce542..e6ad0ad51cb2 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -391,7 +391,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) if (ret) return ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(inode_nr, 0), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index a676cc1e390d..11cdaddb1551 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -302,7 +302,7 @@ int bch2_write_index_default(struct bch_write_op *op) BUG_ON(bch2_keylist_empty(keys)); bch2_verify_keylist_sorted(keys); - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, bkey_start_pos(&bch2_keylist_front(keys)->k), @@ -1271,7 +1271,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio flags &= ~BCH_READ_LAST_FRAGMENT; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, rbio->pos, BTREE_ITER_SLOTS); @@ -1319,7 +1319,7 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, struct bkey_s_c k; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); flags &= ~BCH_READ_LAST_FRAGMENT; flags |= BCH_READ_MUST_CLONE; @@ -1428,7 +1428,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) if (rbio->pick.crc.compression_type) return; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -1868,7 +1868,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) BCH_READ_USER_MAPPED; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); BUG_ON(rbio->_state); BUG_ON(flags & BCH_READ_NODECODE); diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c index ae64bf3248ef..787d9f7638d0 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -258,7 +258,7 @@ void bch2_blacklist_entries_gc(struct work_struct *work) unsigned i, nr, new_nr; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for (i = 0; i < BTREE_ID_NR; i++) { struct btree_iter *iter; diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 6b17d7918aa4..ad41f5e36a7c 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -42,8 +42,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) BKEY_PADDED(key) tmp; int ret = 0; - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_PREFETCH); @@ -113,7 +112,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) if (flags & BCH_FORCE_IF_METADATA_LOST) return -EINVAL; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); closure_init_stack(&cl); for (id = 0; id < BTREE_ID_NR; id++) { diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 64ac8244e1e0..96f9f5950438 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -61,8 +61,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) struct keylist *keys = &op->insert_keys; int ret = 0; - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, bkey_start_pos(&bch2_keylist_front(keys)->k), @@ -499,7 +498,7 @@ int bch2_move_data(struct bch_fs *c, INIT_LIST_HEAD(&ctxt.reads); init_waitqueue_head(&ctxt.wait); - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); stats->data_type = BCH_DATA_USER; stats->btree_id = BTREE_ID_EXTENTS; @@ -633,7 +632,7 @@ static int bch2_move_btree(struct bch_fs *c, enum data_cmd cmd; int ret = 0; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); stats->data_type = BCH_DATA_BTREE; diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index b78df735d94c..f0da0fac09bf 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -361,7 +361,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) struct bkey_s_c k; int ret = 0; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_QUOTAS, POS(type, 0), BTREE_ITER_PREFETCH, k, ret) { @@ -433,7 +433,7 @@ int bch2_fs_quota_read(struct bch_fs *c) return ret; } - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { @@ -726,7 +726,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, bkey_quota_init(&new_quota.k_i); new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid)); - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_QUOTAS, new_quota.k.p, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 3f0eda9f5d0c..59f678596a64 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -214,8 +214,7 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k) bool split_compressed = false; int ret; - bch2_trans_init(&trans, c); - bch2_trans_preload_iters(&trans); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); retry: bch2_trans_begin(&trans); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 3139161fbe88..b0f09a31c41e 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -263,7 +263,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) if (!test_bit(BCH_FS_STARTED, &c->flags)) return -EPERM; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k, ret) if (k.k->type == KEY_TYPE_extent) { diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 0f5a3ed13f3e..92843bd09b04 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -35,7 +35,7 @@ static void test_delete(struct bch_fs *c, u64 nr) bkey_cookie_init(&k.k_i); - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, k.k.p, BTREE_ITER_INTENT); @@ -67,7 +67,7 @@ static void test_delete_written(struct bch_fs *c, u64 nr) bkey_cookie_init(&k.k_i); - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, k.k.p, BTREE_ITER_INTENT); @@ -95,7 +95,7 @@ static void test_iterate(struct bch_fs *c, u64 nr) u64 i; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); delete_test_keys(c); @@ -140,7 +140,7 @@ static void test_iterate_extents(struct bch_fs *c, u64 nr) u64 i; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); delete_test_keys(c); @@ -190,7 +190,7 @@ static void test_iterate_slots(struct bch_fs *c, u64 nr) u64 i; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); delete_test_keys(c); @@ -244,7 +244,7 @@ static void test_iterate_slots_extents(struct bch_fs *c, u64 nr) u64 i; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); delete_test_keys(c); @@ -305,7 +305,7 @@ static void test_peek_end(struct bch_fs *c, u64 nr) struct btree_iter *iter; struct bkey_s_c k; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS_MIN, 0); @@ -324,7 +324,7 @@ static void test_peek_end_extents(struct bch_fs *c, u64 nr) struct btree_iter *iter; struct bkey_s_c k; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, 0); @@ -430,7 +430,7 @@ static void rand_lookup(struct bch_fs *c, u64 nr) struct bkey_s_c k; u64 i; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for (i = 0; i < nr; i++) { iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, @@ -451,7 +451,7 @@ static void rand_mixed(struct bch_fs *c, u64 nr) int ret; u64 i; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for (i = 0; i < nr; i++) { iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, @@ -503,7 +503,7 @@ static void seq_insert(struct bch_fs *c, u64 nr) bkey_cookie_init(&insert.k_i); - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { @@ -526,7 +526,7 @@ static void seq_lookup(struct bch_fs *c, u64 nr) struct bkey_s_c k; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k, ret) ; @@ -540,7 +540,7 @@ static void seq_overwrite(struct bch_fs *c, u64 nr) struct bkey_s_c k; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, BTREE_ITER_INTENT, k, ret) { diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 86f58206365d..a9fcb5442186 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -500,16 +500,14 @@ TRACE_EVENT(copygc, ); DECLARE_EVENT_CLASS(transaction_restart, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip), + TP_PROTO(unsigned long ip), + TP_ARGS(ip), TP_STRUCT__entry( - __array(char, name, 16) __field(unsigned long, ip ) ), TP_fast_assign( - memcpy(__entry->name, c->name, 16); __entry->ip = ip; ), @@ -517,73 +515,97 @@ DECLARE_EVENT_CLASS(transaction_restart, ); DEFINE_EVENT(transaction_restart, trans_restart_btree_node_reused, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip) + TP_PROTO(unsigned long ip), + TP_ARGS(ip) ); DEFINE_EVENT(transaction_restart, trans_restart_would_deadlock, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip) + TP_PROTO(unsigned long ip), + TP_ARGS(ip) ); -DEFINE_EVENT(transaction_restart, trans_restart_iters_realloced, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip) +TRACE_EVENT(trans_restart_iters_realloced, + TP_PROTO(unsigned long ip, unsigned nr), + TP_ARGS(ip, nr), + + TP_STRUCT__entry( + __field(unsigned long, ip ) + __field(unsigned, nr ) + ), + + TP_fast_assign( + __entry->ip = ip; + __entry->nr = nr; + ), + + TP_printk("%pS nr %u", (void *) __entry->ip, __entry->nr) ); -DEFINE_EVENT(transaction_restart, trans_restart_mem_realloced, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip) +TRACE_EVENT(trans_restart_mem_realloced, + TP_PROTO(unsigned long ip, unsigned long bytes), + TP_ARGS(ip, bytes), + + TP_STRUCT__entry( + __field(unsigned long, ip ) + __field(unsigned long, bytes ) + ), + + TP_fast_assign( + __entry->ip = ip; + __entry->bytes = bytes; + ), + + TP_printk("%pS bytes %lu", (void *) __entry->ip, __entry->bytes) ); DEFINE_EVENT(transaction_restart, trans_restart_journal_res_get, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip) + TP_PROTO(unsigned long ip), + TP_ARGS(ip) ); DEFINE_EVENT(transaction_restart, trans_restart_journal_preres_get, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip) + TP_PROTO(unsigned long ip), + TP_ARGS(ip) ); DEFINE_EVENT(transaction_restart, trans_restart_mark_replicas, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip) + TP_PROTO(unsigned long ip), + TP_ARGS(ip) ); DEFINE_EVENT(transaction_restart, trans_restart_fault_inject, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip) + TP_PROTO(unsigned long ip), + TP_ARGS(ip) ); DEFINE_EVENT(transaction_restart, trans_restart_btree_node_split, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip) + TP_PROTO(unsigned long ip), + TP_ARGS(ip) ); DEFINE_EVENT(transaction_restart, trans_restart_mark, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip) + TP_PROTO(unsigned long ip), + TP_ARGS(ip) ); DEFINE_EVENT(transaction_restart, trans_restart_upgrade, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip) + TP_PROTO(unsigned long ip), + TP_ARGS(ip) ); DEFINE_EVENT(transaction_restart, trans_restart_iter_upgrade, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip) + TP_PROTO(unsigned long ip), + TP_ARGS(ip) ); DEFINE_EVENT(transaction_restart, trans_restart_traverse, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip) + TP_PROTO(unsigned long ip), + TP_ARGS(ip) ); DEFINE_EVENT(transaction_restart, trans_restart_atomic, - TP_PROTO(struct bch_fs *c, unsigned long ip), - TP_ARGS(c, ip) + TP_PROTO(unsigned long ip), + TP_ARGS(ip) ); DECLARE_EVENT_CLASS(node_lock_fail, diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 99fb42225508..2ccf64db8147 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -126,7 +126,7 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, struct bkey_s_c_xattr xattr; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc, &inode->ei_str_hash, inode->v.i_ino, @@ -277,7 +277,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) u64 inum = dentry->d_inode->i_ino; int ret; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS(inum, 0), 0, k, ret) { -- cgit v1.2.3 From cdeeb75ea9e329b6e02e7956f741de7c9ddfbb3b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 21 May 2019 13:42:02 -0400 Subject: bcachefs: fix a mount error path Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index defd35d04750..afe930532224 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1597,7 +1597,7 @@ static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * cons */ c1 = bch2_path_to_fs(devs[0]); - if (!c1) + if (IS_ERR(c1)) return c; for (i = 1; i < nr_devs; i++) { -- cgit v1.2.3 From 168f4c5fb375131bd0f5996b549c5e13cc2c2bb5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 24 Jun 2019 18:24:38 -0400 Subject: bcachefs: Improve bch2_lock_inodes() Can now be used for the two different types of locks we have so far Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-ioctl.c | 4 ++-- fs/bcachefs/fs.c | 10 ++++++---- fs/bcachefs/fs.h | 34 ++++++++++++++++++++++++++-------- 3 files changed, 34 insertions(+), 14 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 4dca716217a6..0cf2621ec4fc 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -205,7 +205,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c, if (ret) goto err2; - bch2_lock_inodes(src, dst); + bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst); if (inode_attr_changing(src, dst, Inode_opt_project)) { ret = bch2_fs_quota_transfer(c, dst, @@ -218,7 +218,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c, ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0); err3: - bch2_unlock_inodes(src, dst); + bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst); /* return true if we did work */ if (ret >= 0) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index afe930532224..c806ebad9cde 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -657,7 +657,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) struct btree_trans trans; int ret; - bch2_lock_inodes(dir, inode); + bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); bch2_trans_init(&trans, c, 4, 1024); retry: bch2_trans_begin(&trans); @@ -690,7 +690,7 @@ retry: ATTR_MTIME); err: bch2_trans_exit(&trans); - bch2_unlock_inodes(dir, inode); + bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); return ret; } @@ -871,7 +871,8 @@ static int bch2_rename2(struct mnt_idmap *idmap, bch2_trans_init(&trans, c, 8, 2048); - bch2_lock_inodes(i.src_dir, + bch2_lock_inodes(INODE_UPDATE_LOCK, + i.src_dir, i.dst_dir, i.src_inode, i.dst_inode); @@ -969,7 +970,8 @@ err: 1 << QTYP_PRJ, KEY_TYPE_QUOTA_NOCHECK); - bch2_unlock_inodes(i.src_dir, + bch2_unlock_inodes(INODE_UPDATE_LOCK, + i.src_dir, i.dst_dir, i.src_inode, i.dst_inode); diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index e72d6a58b322..de07f0f1dd51 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -57,24 +57,42 @@ static inline int ptrcmp(void *l, void *r) return cmp_int(l, r); } -#define __bch2_lock_inodes(_lock, ...) \ +enum bch_inode_lock_op { + INODE_LOCK = (1U << 0), + INODE_UPDATE_LOCK = (1U << 1), +}; + +#define bch2_lock_inodes(_locks, ...) \ do { \ struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \ unsigned i; \ \ - bubble_sort(&a[1], ARRAY_SIZE(a) - 1 , ptrcmp); \ + bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp); \ \ - for (i = ARRAY_SIZE(a) - 1; a[i]; --i) \ + for (i = 1; i < ARRAY_SIZE(a); i++) \ if (a[i] != a[i - 1]) { \ - if (_lock) \ + if (_locks & INODE_LOCK) \ + down_write_nested(&a[i]->v.i_rwsem, i); \ + if (_locks & INODE_UPDATE_LOCK) \ mutex_lock_nested(&a[i]->ei_update_lock, i);\ - else \ - mutex_unlock(&a[i]->ei_update_lock); \ } \ } while (0) -#define bch2_lock_inodes(...) __bch2_lock_inodes(true, __VA_ARGS__) -#define bch2_unlock_inodes(...) __bch2_lock_inodes(false, __VA_ARGS__) +#define bch2_unlock_inodes(_locks, ...) \ +do { \ + struct bch_inode_info *a[] = { NULL, __VA_ARGS__ }; \ + unsigned i; \ + \ + bubble_sort(&a[1], ARRAY_SIZE(a) - 1, ptrcmp); \ + \ + for (i = 1; i < ARRAY_SIZE(a); i++) \ + if (a[i] != a[i - 1]) { \ + if (_locks & INODE_LOCK) \ + up_write(&a[i]->v.i_rwsem); \ + if (_locks & INODE_UPDATE_LOCK) \ + mutex_unlock(&a[i]->ei_update_lock); \ + } \ +} while (0) static inline struct bch_inode_info *file_bch_inode(struct file *file) { -- cgit v1.2.3 From e1036a2a718f2cbd082568c881f677cf5fd9f442 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 2 Jul 2019 14:59:15 -0400 Subject: bcachefs: Always touch page state with page locked This will mean we don't have to use cmpxchg for modifying page state, which will simplify a fair amount of code Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 93 ++++++++++++++++++++++++++++++----------------------- fs/bcachefs/fs-io.h | 2 -- fs/bcachefs/fs.c | 2 +- 3 files changed, 54 insertions(+), 43 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 54b071b9ca2c..bf03048252ec 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -500,11 +500,6 @@ static inline struct bch_io_opts io_opts(struct bch_fs *c, struct bch_inode_info /* stored in page->private: */ -/* - * bch_page_state has to (unfortunately) be manipulated with cmpxchg - we could - * almost protected it with the page lock, except that bch2_writepage_io_done has - * to update the sector counts (and from interrupt/bottom half context). - */ struct bch_page_state { union { struct { /* existing data: */ @@ -550,6 +545,7 @@ static inline struct bch_page_state *page_state(struct page *page) { struct bch_page_state *s = (void *) &page->private; + EBUG_ON(!PageLocked(page)); BUILD_BUG_ON(sizeof(*s) > sizeof(page->private)); if (!PagePrivate(page)) @@ -589,15 +585,20 @@ static void bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *i __bch2_put_page_reservation(c, inode, s); } +static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode) +{ + /* XXX: this should not be open coded */ + return inode->ei_inode.bi_data_replicas + ? inode->ei_inode.bi_data_replicas - 1 + : c->opts.data_replicas; +} + static int bch2_get_page_reservation(struct bch_fs *c, struct bch_inode_info *inode, struct page *page, bool check_enospc) { struct bch_page_state *s = page_state(page), new; - /* XXX: this should not be open coded */ - unsigned nr_replicas = inode->ei_inode.bi_data_replicas - ? inode->ei_inode.bi_data_replicas - 1 - : c->opts.data_replicas; + unsigned nr_replicas = inode_nr_replicas(c, inode); struct disk_reservation disk_res; struct quota_res quota_res = { 0 }; int ret; @@ -655,7 +656,7 @@ static void bch2_clear_page_bits(struct page *page) __bch2_put_page_reservation(c, inode, s); } -bool bch2_dirty_folio(struct address_space *mapping, struct folio *folio) +static void __bch2_set_page_dirty(struct address_space *mapping, struct folio *folio) { struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; @@ -673,8 +674,14 @@ bool bch2_dirty_folio(struct address_space *mapping, struct folio *folio) i_sectors_acct(c, inode, "a_res, new.dirty_sectors - old.dirty_sectors); bch2_quota_reservation_put(c, inode, "a_res); +} + +static void bch2_set_page_dirty(struct address_space *mapping, struct page *page) +{ + struct folio *folio = page_folio(page); - return filemap_dirty_folio(mapping, folio); + __bch2_set_page_dirty(mapping, folio); + filemap_dirty_folio(mapping, folio); } vm_fault_t bch2_page_fault(struct vm_fault *vmf) @@ -725,7 +732,7 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) } if (!PageDirty(page)) - set_page_dirty(page); + bch2_set_page_dirty(mapping, page); wait_for_stable_page(page); out: bch2_pagecache_add_put(&inode->ei_pagecache_lock); @@ -1210,10 +1217,12 @@ static int __bch2_writepage(struct folio *folio, struct bch_inode_info *inode = to_bch_ei(page->mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_writepage_state *w = data; - struct bch_page_state new, old; + struct bch_page_state *s; unsigned offset, nr_replicas_this_write; + unsigned dirty_sectors, replicas_reserved; loff_t i_size = i_size_read(&inode->v); pgoff_t end_index = i_size >> PAGE_SHIFT; + int ret; EBUG_ON(!PageUptodate(page)); @@ -1237,33 +1246,37 @@ static int __bch2_writepage(struct folio *folio, */ zero_user_segment(page, offset, PAGE_SIZE); do_io: - EBUG_ON(!PageLocked(page)); + s = page_state(page); - /* Before unlocking the page, transfer reservation to w->io: */ - old = page_state_cmpxchg(page_state(page), new, { - /* - * If we didn't get a reservation, we can only write out the - * number of (fully allocated) replicas that currently exist, - * and only if the entire page has been written: - */ - nr_replicas_this_write = - max_t(unsigned, - new.replicas_reserved, - (new.sectors == PAGE_SECTORS - ? new.nr_replicas : 0)); + ret = bch2_get_page_reservation(c, inode, page, true); + if (ret) { + SetPageError(page); + mapping_set_error(page->mapping, ret); + unlock_page(page); + return 0; + } - BUG_ON(!nr_replicas_this_write); + __bch2_set_page_dirty(page->mapping, page_folio(page)); - new.nr_replicas = w->opts.compression - ? 0 - : nr_replicas_this_write; + nr_replicas_this_write = + max_t(unsigned, + s->replicas_reserved, + (s->sectors == PAGE_SECTORS + ? s->nr_replicas : 0)); - new.replicas_reserved = 0; + s->nr_replicas = w->opts.compression + ? 0 + : nr_replicas_this_write; - new.sectors += new.dirty_sectors; - BUG_ON(new.sectors != PAGE_SECTORS); - new.dirty_sectors = 0; - }); + /* Before unlocking the page, transfer reservation to w->io: */ + replicas_reserved = s->replicas_reserved; + s->replicas_reserved = 0; + + dirty_sectors = s->dirty_sectors; + s->dirty_sectors = 0; + + s->sectors += dirty_sectors; + BUG_ON(s->sectors != PAGE_SECTORS); BUG_ON(PageWriteback(page)); set_page_writeback(page); @@ -1278,12 +1291,12 @@ do_io: bch2_writepage_io_alloc(c, w, inode, page, nr_replicas_this_write); - w->io->new_sectors += new.sectors - old.sectors; + w->io->new_sectors += dirty_sectors; BUG_ON(inode != w->io->op.inode); BUG_ON(bio_add_page_contig(&w->io->op.op.wbio.bio, page)); - w->io->op.op.res.sectors += old.replicas_reserved * PAGE_SECTORS; + w->io->op.op.res.sectors += replicas_reserved * PAGE_SECTORS; w->io->op.new_i_size = i_size; if (wbc->sync_mode == WB_SYNC_ALL) @@ -1421,7 +1434,7 @@ int bch2_write_end(struct file *file, struct address_space *mapping, if (!PageUptodate(page)) SetPageUptodate(page); if (!PageDirty(page)) - set_page_dirty(page); + bch2_set_page_dirty(mapping, page); inode->ei_last_dirtied = (unsigned long) current; } else { @@ -1538,7 +1551,7 @@ out: if (!PageUptodate(pages[i])) SetPageUptodate(pages[i]); if (!PageDirty(pages[i])) - set_page_dirty(pages[i]); + bch2_set_page_dirty(mapping, pages[i]); unlock_page(pages[i]); put_page(pages[i]); } @@ -2212,7 +2225,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode, zero_user_segment(page, 0, end_offset); if (!PageDirty(page)) - set_page_dirty(page); + bch2_set_page_dirty(mapping, page); unlock: unlock_page(page); put_page(page); diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h index 2e4bfee877d9..e263b515e901 100644 --- a/fs/bcachefs/fs-io.h +++ b/fs/bcachefs/fs-io.h @@ -9,8 +9,6 @@ #include -bool bch2_dirty_folio(struct address_space *, struct folio *); - int bch2_writepage(struct page *, struct writeback_control *); int bch2_read_folio(struct file *, struct folio *); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index c806ebad9cde..f69b535b1b82 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1349,7 +1349,7 @@ static const struct address_space_operations bch_address_space_operations = { .read_folio = bch2_read_folio, .writepages = bch2_writepages, .readahead = bch2_readahead, - .dirty_folio = bch2_dirty_folio, + .dirty_folio = filemap_dirty_folio, .write_begin = bch2_write_begin, .write_end = bch2_write_end, .invalidate_folio = bch2_invalidate_folio, -- cgit v1.2.3 From 99aaf57000b4091d2471ed30387d96e15f2fc38b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 25 Jul 2019 13:52:14 -0400 Subject: bcachefs: Refactor various code to not be extent specific With reflink, various code now has to handle both KEY_TYPE_extent or KEY_TYPE_reflink_v - so, convert it to be generic across all keys with pointers. Signed-off-by: Kent Overstreet --- fs/bcachefs/bkey.h | 2 +- fs/bcachefs/ec.c | 46 ++++++++++---------- fs/bcachefs/extents.c | 109 ++++++++++++++++++++++++++++-------------------- fs/bcachefs/extents.h | 95 ++++++++++++++++------------------------- fs/bcachefs/fs-io.c | 4 +- fs/bcachefs/fs.c | 44 ++++++++++--------- fs/bcachefs/io.c | 50 +++++++--------------- fs/bcachefs/migrate.c | 3 +- fs/bcachefs/move.c | 34 +++++++-------- fs/bcachefs/movinggc.c | 27 +++++------- fs/bcachefs/rebalance.c | 6 +-- 11 files changed, 195 insertions(+), 225 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index ba08d95aae6f..b3a08e52e6b3 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -58,7 +58,7 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes) k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64)); } -#define bkey_val_end(_k) vstruct_idx((_k).v, bkey_val_u64s((_k).k)) +#define bkey_val_end(_k) ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k))) #define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted) diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index de31ea6c20de..77a5c3613ff7 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -162,19 +162,20 @@ static int extent_matches_stripe(struct bch_fs *c, struct bch_stripe *v, struct bkey_s_c k) { - struct bkey_s_c_extent e; - const struct bch_extent_ptr *ptr; - int idx; - if (!bkey_extent_is_data(k.k)) - return -1; - - e = bkey_s_c_to_extent(k); + switch (k.k->type) { + case KEY_TYPE_extent: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + const struct bch_extent_ptr *ptr; + int idx; - extent_for_each_ptr(e, ptr) { - idx = ptr_matches_stripe(c, v, ptr); - if (idx >= 0) - return idx; + extent_for_each_ptr(e, ptr) { + idx = ptr_matches_stripe(c, v, ptr); + if (idx >= 0) + return idx; + } + break; + } } return -1; @@ -182,19 +183,20 @@ static int extent_matches_stripe(struct bch_fs *c, static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx) { - struct bkey_s_c_extent e; - const union bch_extent_entry *entry; + switch (k.k->type) { + case KEY_TYPE_extent: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + const union bch_extent_entry *entry; - if (!bkey_extent_is_data(k.k)) - return false; + extent_for_each_entry(e, entry) + if (extent_entry_type(entry) == + BCH_EXTENT_ENTRY_stripe_ptr && + entry->stripe_ptr.idx == idx) + return true; - e = bkey_s_c_to_extent(k); - - extent_for_each_entry(e, entry) - if (extent_entry_type(entry) == - BCH_EXTENT_ENTRY_stripe_ptr && - entry->stripe_ptr.idx == idx) - return true; + break; + } + } return false; } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 60fe50368d21..9f17780b8bc0 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -249,6 +249,33 @@ void bch2_bkey_drop_device(struct bkey_s k, unsigned dev) bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev); } +const struct bch_extent_ptr * +bch2_bkey_has_device(struct bkey_s_c k, unsigned dev) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; + + bkey_for_each_ptr(ptrs, ptr) + if (ptr->dev == dev) + return ptr; + + return NULL; +} + +bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; + + bkey_for_each_ptr(ptrs, ptr) + if (bch2_dev_in_target(c, ptr->dev, target) && + (!ptr->cached || + !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr))) + return true; + + return false; +} + /* extent specific utility code */ const struct bch_extent_ptr * @@ -279,20 +306,6 @@ bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group return NULL; } -const struct bch_extent_ptr * -bch2_extent_has_target(struct bch_fs *c, struct bkey_s_c_extent e, unsigned target) -{ - const struct bch_extent_ptr *ptr; - - extent_for_each_ptr(e, ptr) - if (bch2_dev_in_target(c, ptr->dev, target) && - (!ptr->cached || - !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr))) - return ptr; - - return NULL; -} - unsigned bch2_extent_is_compressed(struct bkey_s_c k) { unsigned ret = 0; @@ -313,16 +326,17 @@ unsigned bch2_extent_is_compressed(struct bkey_s_c k) return ret; } -bool bch2_extent_matches_ptr(struct bch_fs *c, struct bkey_s_c_extent e, - struct bch_extent_ptr m, u64 offset) +bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k, + struct bch_extent_ptr m, u64 offset) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; - extent_for_each_ptr_decode(e, p, entry) + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) if (p.ptr.dev == m.dev && p.ptr.gen == m.gen && - (s64) p.ptr.offset + p.crc.offset - bkey_start_offset(e.k) == + (s64) p.ptr.offset + p.crc.offset - bkey_start_offset(k.k) == (s64) m.offset - offset) return true; @@ -389,16 +403,17 @@ static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u, bch2_csum_type_is_encryption(n.csum_type); } -bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent e, +bool bch2_can_narrow_extent_crcs(struct bkey_s_c k, struct bch_extent_crc_unpacked n) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); struct bch_extent_crc_unpacked crc; const union bch_extent_entry *i; if (!n.csum_type) return false; - extent_for_each_crc(e, crc, i) + bkey_for_each_crc(k.k, ptrs, crc, i) if (can_narrow_crc(crc, n)) return true; @@ -414,9 +429,9 @@ bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent e, * currently live (so that readers won't have to bounce) while we've got the * checksum we need: */ -bool bch2_extent_narrow_crcs(struct bkey_i_extent *e, - struct bch_extent_crc_unpacked n) +bool bch2_bkey_narrow_crcs(struct bkey_i *k, struct bch_extent_crc_unpacked n) { + struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k)); struct bch_extent_crc_unpacked u; struct extent_ptr_decoded p; union bch_extent_entry *i; @@ -424,7 +439,7 @@ bool bch2_extent_narrow_crcs(struct bkey_i_extent *e, /* Find a checksum entry that covers only live data: */ if (!n.csum_type) { - extent_for_each_crc(extent_i_to_s(e), u, i) + bkey_for_each_crc(&k->k, ptrs, u, i) if (!u.compression_type && u.csum_type && u.live_size == u.uncompressed_size) { @@ -436,15 +451,15 @@ bool bch2_extent_narrow_crcs(struct bkey_i_extent *e, found: BUG_ON(n.compression_type); BUG_ON(n.offset); - BUG_ON(n.live_size != e->k.size); + BUG_ON(n.live_size != k->k.size); restart_narrow_pointers: - extent_for_each_ptr_decode(extent_i_to_s(e), p, i) + bkey_for_each_ptr_decode(&k->k, ptrs, p, i) if (can_narrow_crc(p.crc, n)) { - bch2_bkey_drop_ptr(extent_i_to_s(e).s, &i->ptr); + bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr); p.ptr.offset += p.crc.offset; p.crc = n; - bch2_extent_ptr_decoded_append(e, &p); + bch2_extent_ptr_decoded_append(k, &p); ret = true; goto restart_narrow_pointers; } @@ -1397,9 +1412,12 @@ static void bch2_extent_crc_pack(union bch_extent_crc *dst, #undef set_common_fields } -static void bch2_extent_crc_init(union bch_extent_crc *crc, - struct bch_extent_crc_unpacked new) +void bch2_extent_crc_append(struct bkey_i *k, + struct bch_extent_crc_unpacked new) { + struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k)); + union bch_extent_crc *crc = (void *) ptrs.end; + if (bch_crc_bytes[new.csum_type] <= 4 && new.uncompressed_size - 1 <= CRC32_SIZE_MAX && new.nonce <= CRC32_NONCE_MAX) @@ -1416,54 +1434,53 @@ static void bch2_extent_crc_init(union bch_extent_crc *crc, BUG(); bch2_extent_crc_pack(crc, new); -} -void bch2_extent_crc_append(struct bkey_i_extent *e, - struct bch_extent_crc_unpacked new) -{ - bch2_extent_crc_init((void *) extent_entry_last(extent_i_to_s(e)), new); - __extent_entry_push(e); + k->k.u64s += extent_entry_u64s(ptrs.end); + + EBUG_ON(bkey_val_u64s(&k->k) > BKEY_EXTENT_VAL_U64s_MAX); } -static inline void __extent_entry_insert(struct bkey_i_extent *e, +static inline void __extent_entry_insert(struct bkey_i *k, union bch_extent_entry *dst, union bch_extent_entry *new) { - union bch_extent_entry *end = extent_entry_last(extent_i_to_s(e)); + union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k)); memmove_u64s_up((u64 *) dst + extent_entry_u64s(new), dst, (u64 *) end - (u64 *) dst); - e->k.u64s += extent_entry_u64s(new); + k->k.u64s += extent_entry_u64s(new); memcpy_u64s_small(dst, new, extent_entry_u64s(new)); } -void bch2_extent_ptr_decoded_append(struct bkey_i_extent *e, +void bch2_extent_ptr_decoded_append(struct bkey_i *k, struct extent_ptr_decoded *p) { - struct bch_extent_crc_unpacked crc = bch2_extent_crc_unpack(&e->k, NULL); + struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k)); + struct bch_extent_crc_unpacked crc = + bch2_extent_crc_unpack(&k->k, NULL); union bch_extent_entry *pos; unsigned i; if (!bch2_crc_unpacked_cmp(crc, p->crc)) { - pos = e->v.start; + pos = ptrs.start; goto found; } - extent_for_each_crc(extent_i_to_s(e), crc, pos) + bkey_for_each_crc(&k->k, ptrs, crc, pos) if (!bch2_crc_unpacked_cmp(crc, p->crc)) { pos = extent_entry_next(pos); goto found; } - bch2_extent_crc_append(e, p->crc); - pos = extent_entry_last(extent_i_to_s(e)); + bch2_extent_crc_append(k, p->crc); + pos = bkey_val_end(bkey_i_to_s(k)); found: p->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr; - __extent_entry_insert(e, pos, to_entry(&p->ptr)); + __extent_entry_insert(k, pos, to_entry(&p->ptr)); for (i = 0; i < p->ec_nr; i++) { p->ec[i].type = 1 << BCH_EXTENT_ENTRY_stripe_ptr; - __extent_entry_insert(e, pos, to_entry(&p->ec[i])); + __extent_entry_insert(k, pos, to_entry(&p->ec[i])); } } diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 549188c864ae..035d15bbca39 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -12,7 +12,8 @@ struct btree_insert_entry; /* extent entries: */ -#define extent_entry_last(_e) bkey_val_end(_e) +#define extent_entry_last(_e) \ + ((typeof(&(_e).v->start[0])) bkey_val_end(_e)) #define entry_to_ptr(_entry) \ ({ \ @@ -258,6 +259,27 @@ out: \ __bkey_for_each_ptr_decode(_k, (_p).start, (_p).end, \ _ptr, _entry) +#define bkey_crc_next(_k, _start, _end, _crc, _iter) \ +({ \ + __bkey_extent_entry_for_each_from(_iter, _end, _iter) \ + if (extent_entry_is_crc(_iter)) { \ + (_crc) = bch2_extent_crc_unpack(_k, \ + entry_to_crc(_iter)); \ + break; \ + } \ + \ + (_iter) < (_end); \ +}) + +#define __bkey_for_each_crc(_k, _start, _end, _crc, _iter) \ + for ((_crc) = bch2_extent_crc_unpack(_k, NULL), \ + (_iter) = (_start); \ + bkey_crc_next(_k, _start, _end, _crc, _iter); \ + (_iter) = extent_entry_next(_iter)) + +#define bkey_for_each_crc(_k, _p, _crc, _iter) \ + __bkey_for_each_crc(_k, (_p).start, (_p).end, _crc, _iter) + /* utility code common to all keys with pointers: */ static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k) @@ -267,7 +289,7 @@ static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k) struct bkey_s_c_btree_ptr e = bkey_s_c_to_btree_ptr(k); return (struct bkey_ptrs_c) { to_entry(&e.v->start[0]), - to_entry(bkey_val_end(e)) + to_entry(extent_entry_last(e)) }; } case KEY_TYPE_extent: { @@ -337,18 +359,6 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k) return ret; } -static inline bool bch2_bkey_has_device(struct bkey_s_c k, unsigned dev) -{ - struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; - - bkey_for_each_ptr(p, ptr) - if (ptr->dev == dev) - return ptr; - - return NULL; -} - unsigned bch2_bkey_nr_ptrs(struct bkey_s_c); unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c); unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c); @@ -359,6 +369,11 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, struct bch_io_failures *, struct extent_ptr_decoded *); +void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr); +void bch2_bkey_drop_device(struct bkey_s, unsigned); +const struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s_c, unsigned); +bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned); + void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); const char *bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c); @@ -424,15 +439,11 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent, const struct bch_extent_ptr * bch2_extent_has_device(struct bkey_s_c_extent, unsigned); -const struct bch_extent_ptr * -bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned); -const struct bch_extent_ptr * -bch2_extent_has_target(struct bch_fs *, struct bkey_s_c_extent, unsigned); unsigned bch2_extent_is_compressed(struct bkey_s_c); -bool bch2_extent_matches_ptr(struct bch_fs *, struct bkey_s_c_extent, - struct bch_extent_ptr, u64); +bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c, + struct bch_extent_ptr, u64); static inline bool bkey_extent_is_data(const struct bkey *k) { @@ -456,15 +467,6 @@ static inline bool bkey_extent_is_allocation(const struct bkey *k) } } -static inline bool bch2_extent_is_fully_allocated(struct bkey_s_c k) -{ - return bkey_extent_is_allocation(k.k) && - !bch2_extent_is_compressed(k); -} - -void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr); -void bch2_bkey_drop_device(struct bkey_s, unsigned); - /* Extent entry iteration: */ #define extent_for_each_entry_from(_e, _entry, _start) \ @@ -480,45 +482,18 @@ void bch2_bkey_drop_device(struct bkey_s, unsigned); #define extent_for_each_ptr(_e, _ptr) \ __bkey_for_each_ptr(&(_e).v->start->ptr, extent_entry_last(_e), _ptr) -#define extent_crc_next(_e, _crc, _iter) \ -({ \ - extent_for_each_entry_from(_e, _iter, _iter) \ - if (extent_entry_is_crc(_iter)) { \ - (_crc) = bch2_extent_crc_unpack((_e).k, entry_to_crc(_iter));\ - break; \ - } \ - \ - (_iter) < extent_entry_last(_e); \ -}) - -#define extent_for_each_crc(_e, _crc, _iter) \ - for ((_crc) = bch2_extent_crc_unpack((_e).k, NULL), \ - (_iter) = (_e).v->start; \ - extent_crc_next(_e, _crc, _iter); \ - (_iter) = extent_entry_next(_iter)) - #define extent_for_each_ptr_decode(_e, _ptr, _entry) \ __bkey_for_each_ptr_decode((_e).k, (_e).v->start, \ extent_entry_last(_e), _ptr, _entry) -void bch2_extent_crc_append(struct bkey_i_extent *, +void bch2_extent_crc_append(struct bkey_i *, struct bch_extent_crc_unpacked); -void bch2_extent_ptr_decoded_append(struct bkey_i_extent *, +void bch2_extent_ptr_decoded_append(struct bkey_i *, struct extent_ptr_decoded *); -static inline void __extent_entry_push(struct bkey_i_extent *e) -{ - union bch_extent_entry *entry = extent_entry_last(extent_i_to_s(e)); - - EBUG_ON(bkey_val_u64s(&e->k) + extent_entry_u64s(entry) > - BKEY_EXTENT_VAL_U64s_MAX); - - e->k.u64s += extent_entry_u64s(entry); -} - -bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent, +bool bch2_can_narrow_extent_crcs(struct bkey_s_c, struct bch_extent_crc_unpacked); -bool bch2_extent_narrow_crcs(struct bkey_i_extent *, struct bch_extent_crc_unpacked); +bool bch2_bkey_narrow_crcs(struct bkey_i *, struct bch_extent_crc_unpacked); union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *); diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 55fee053337f..474535aa3fc2 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -1041,11 +1041,11 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, bool want_full_extent = false; if (bkey_extent_is_data(k.k)) { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *i; struct extent_ptr_decoded p; - extent_for_each_ptr_decode(e, p, i) + bkey_for_each_ptr_decode(k.k, ptrs, p, i) want_full_extent |= ((p.crc.csum_type != 0) | (p.crc.compression_type != 0)); } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index f69b535b1b82..1b3898eae8b8 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1148,15 +1148,15 @@ static int bch2_tmpfile(struct mnt_idmap *idmap, } static int bch2_fill_extent(struct fiemap_extent_info *info, - const struct bkey_i *k, unsigned flags) + struct bkey_s_c k, unsigned flags) { - if (bkey_extent_is_data(&k->k)) { - struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k); + if (bkey_extent_is_data(k.k)) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; int ret; - extent_for_each_ptr_decode(e, p, entry) { + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { int flags2 = 0; u64 offset = p.ptr.offset; @@ -1166,22 +1166,22 @@ static int bch2_fill_extent(struct fiemap_extent_info *info, offset += p.crc.offset; if ((offset & (PAGE_SECTORS - 1)) || - (e.k->size & (PAGE_SECTORS - 1))) + (k.k->size & (PAGE_SECTORS - 1))) flags2 |= FIEMAP_EXTENT_NOT_ALIGNED; ret = fiemap_fill_next_extent(info, - bkey_start_offset(e.k) << 9, + bkey_start_offset(k.k) << 9, offset << 9, - e.k->size << 9, flags|flags2); + k.k->size << 9, flags|flags2); if (ret) return ret; } return 0; - } else if (k->k.type == KEY_TYPE_reservation) { + } else if (k.k->type == KEY_TYPE_reservation) { return fiemap_fill_next_extent(info, - bkey_start_offset(&k->k) << 9, - 0, k->k.size << 9, + bkey_start_offset(k.k) << 9, + 0, k.k->size << 9, flags| FIEMAP_EXTENT_DELALLOC| FIEMAP_EXTENT_UNWRITTEN); @@ -1198,7 +1198,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - BKEY_PADDED(k) tmp; + BKEY_PADDED(k) cur, prev; bool have_extent = false; int ret = 0; @@ -1212,25 +1212,31 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, - POS(ei->v.i_ino, start >> 9), 0, k, ret) + POS(ei->v.i_ino, start >> 9), 0, k, ret) { + if (bkey_cmp(bkey_start_pos(k.k), + POS(ei->v.i_ino, (start + len) >> 9)) >= 0) + break; + + bkey_reassemble(&cur.k, k); + k = bkey_i_to_s_c(&cur.k); + if (bkey_extent_is_data(k.k) || k.k->type == KEY_TYPE_reservation) { - if (bkey_cmp(bkey_start_pos(k.k), - POS(ei->v.i_ino, (start + len) >> 9)) >= 0) - break; - if (have_extent) { - ret = bch2_fill_extent(info, &tmp.k, 0); + ret = bch2_fill_extent(info, + bkey_i_to_s_c(&prev.k), 0); if (ret) break; } - bkey_reassemble(&tmp.k, k); + bkey_copy(&prev.k, &cur.k); have_extent = true; } + } if (!ret && have_extent) - ret = bch2_fill_extent(info, &tmp.k, FIEMAP_EXTENT_LAST); + ret = bch2_fill_extent(info, bkey_i_to_s_c(&prev.k), + FIEMAP_EXTENT_LAST); ret = bch2_trans_exit(&trans) ?: ret; return ret < 0 ? ret : 0; diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index a539719661b8..fd1aceea3553 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -431,7 +431,7 @@ static void init_append_extent(struct bch_write_op *op, if (crc.csum_type || crc.compression_type || crc.nonce) - bch2_extent_crc_append(e, crc); + bch2_extent_crc_append(&e->k_i, crc); bch2_alloc_sectors_append_ptrs(op->c, wp, &e->k_i, crc.compressed_size); @@ -962,17 +962,13 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k, struct bch_io_opts opts, unsigned flags) { - if (!bkey_extent_is_data(k.k)) - return false; - if (!(flags & BCH_READ_MAY_PROMOTE)) return false; if (!opts.promote_target) return false; - if (bch2_extent_has_target(c, bkey_s_c_to_extent(k), - opts.promote_target)) + if (bch2_bkey_has_target(c, k, opts.promote_target)) return false; if (bch2_target_congested(c, opts.promote_target)) { @@ -1230,11 +1226,10 @@ retry: k = bkey_i_to_s_c(&tmp.k); bch2_trans_unlock(&trans); - if (!bkey_extent_is_data(k.k) || - !bch2_extent_matches_ptr(c, bkey_i_to_s_c_extent(&tmp.k), - rbio->pick.ptr, - rbio->pos.offset - - rbio->pick.crc.offset)) { + if (!bch2_bkey_matches_ptr(c, bkey_i_to_s_c(&tmp.k), + rbio->pick.ptr, + rbio->pos.offset - + rbio->pick.crc.offset)) { /* extent we wanted to read no longer exists: */ rbio->hole = true; goto out; @@ -1370,7 +1365,6 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - struct bkey_i_extent *e; BKEY_PADDED(k) new; struct bch_extent_crc_unpacked new_crc; u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset; @@ -1389,34 +1383,30 @@ retry: if (IS_ERR_OR_NULL(k.k)) goto out; - if (!bkey_extent_is_data(k.k)) - goto out; - bkey_reassemble(&new.k, k); - e = bkey_i_to_extent(&new.k); + k = bkey_i_to_s_c(&new.k); - if (!bch2_extent_matches_ptr(c, extent_i_to_s_c(e), - rbio->pick.ptr, data_offset) || - bversion_cmp(e->k.version, rbio->version)) + if (bversion_cmp(k.k->version, rbio->version) || + !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) goto out; /* Extent was merged? */ - if (bkey_start_offset(&e->k) < data_offset || - e->k.p.offset > data_offset + rbio->pick.crc.uncompressed_size) + if (bkey_start_offset(k.k) < data_offset || + k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size) goto out; if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version, rbio->pick.crc, NULL, &new_crc, - bkey_start_offset(&e->k) - data_offset, e->k.size, + bkey_start_offset(k.k) - data_offset, k.k->size, rbio->pick.crc.csum_type)) { bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)"); goto out; } - if (!bch2_extent_narrow_crcs(e, new_crc)) + if (!bch2_bkey_narrow_crcs(&new.k, new_crc)) goto out; - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &e->k_i)); + bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &new.k)); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| @@ -1427,15 +1417,6 @@ out: bch2_trans_exit(&trans); } -static bool should_narrow_crcs(struct bkey_s_c k, - struct extent_ptr_decoded *pick, - unsigned flags) -{ - return !(flags & BCH_READ_IN_RETRY) && - bkey_extent_is_data(k.k) && - bch2_can_narrow_extent_crcs(bkey_s_c_to_extent(k), pick->crc); -} - /* Inner part that may run in process context */ static void __bch2_read_endio(struct work_struct *work) { @@ -1622,7 +1603,8 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, bio_flagged(&orig->bio, BIO_CHAIN)) flags |= BCH_READ_MUST_CLONE; - narrow_crcs = should_narrow_crcs(k, &pick, flags); + narrow_crcs = !(flags & BCH_READ_IN_RETRY) && + bch2_can_narrow_extent_crcs(k, pick.crc); if (narrow_crcs && (flags & BCH_READ_USER_MAPPED)) flags |= BCH_READ_MUST_BOUNCE; diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index ad41f5e36a7c..301cb72bd3e4 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -49,8 +49,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k))) { - if (!bkey_extent_is_data(k.k) || - !bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) { + if (!bch2_bkey_has_device(k, dev_idx)) { ret = bch2_mark_bkey_replicas(c, k); if (ret) break; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 27835e4f13fd..ffa0c2bbe290 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -82,9 +82,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) break; if (bversion_cmp(k.k->version, new->k.version) || - !bkey_extent_is_data(k.k) || - !bch2_extent_matches_ptr(c, bkey_s_c_to_extent(k), - m->ptr, m->offset)) + !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset)) goto nomatch; if (m->data_cmd == DATA_REWRITE && @@ -116,14 +114,14 @@ static int bch2_migrate_index_update(struct bch_write_op *op) continue; } - bch2_extent_ptr_decoded_append(insert, &p); + bch2_extent_ptr_decoded_append(&insert->k_i, &p); did_work = true; } if (!did_work) goto nomatch; - bch2_extent_narrow_crcs(insert, + bch2_bkey_narrow_crcs(&insert->k_i, (struct bch_extent_crc_unpacked) { 0 }); bch2_extent_normalize(c, extent_i_to_s(insert).s); bch2_extent_mark_replicas_cached(c, extent_i_to_s(insert), @@ -393,14 +391,15 @@ static int bch2_move_extent(struct bch_fs *c, struct moving_context *ctxt, struct write_point_specifier wp, struct bch_io_opts io_opts, - struct bkey_s_c_extent e, + struct bkey_s_c k, enum data_cmd data_cmd, struct data_opts data_opts) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); struct moving_io *io; const union bch_extent_entry *entry; struct extent_ptr_decoded p; - unsigned sectors = e.k->size, pages; + unsigned sectors = k.k->size, pages; int ret = -ENOMEM; move_ctxt_wait_event(ctxt, @@ -412,7 +411,7 @@ static int bch2_move_extent(struct bch_fs *c, SECTORS_IN_FLIGHT_PER_DEVICE); /* write path might have to decompress data: */ - extent_for_each_ptr_decode(e, p, entry) + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) sectors = max_t(unsigned, sectors, p.crc.uncompressed_size); pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); @@ -422,8 +421,8 @@ static int bch2_move_extent(struct bch_fs *c, goto err; io->write.ctxt = ctxt; - io->read_sectors = e.k->size; - io->write_sectors = e.k->size; + io->read_sectors = k.k->size; + io->write_sectors = k.k->size; bio_init(&io->write.op.wbio.bio, NULL, io->bi_inline_vecs, pages, 0); bio_set_prio(&io->write.op.wbio.bio, @@ -440,18 +439,18 @@ static int bch2_move_extent(struct bch_fs *c, io->rbio.bio.bi_iter.bi_size = sectors << 9; io->rbio.bio.bi_opf = REQ_OP_READ; - io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(e.k); + io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); io->rbio.bio.bi_end_io = move_read_endio; ret = bch2_migrate_write_init(c, &io->write, wp, io_opts, - data_cmd, data_opts, e.s_c); + data_cmd, data_opts, k); if (ret) goto err_free_pages; atomic64_inc(&ctxt->stats->keys_moved); - atomic64_add(e.k->size, &ctxt->stats->sectors_moved); + atomic64_add(k.k->size, &ctxt->stats->sectors_moved); - trace_move_extent(e.k); + trace_move_extent(k.k); atomic_add(io->read_sectors, &ctxt->read_sectors); list_add_tail(&io->list, &ctxt->reads); @@ -461,7 +460,7 @@ static int bch2_move_extent(struct bch_fs *c, * ctxt when doing wakeup */ closure_get(&ctxt->cl); - bch2_read_extent(c, &io->rbio, e.s_c, 0, + bch2_read_extent(c, &io->rbio, k, 0, BCH_READ_NODECODE| BCH_READ_LAST_FRAGMENT); return 0; @@ -470,7 +469,7 @@ err_free_pages: err_free: kfree(io); err: - trace_move_alloc_fail(e.k); + trace_move_alloc_fail(k.k); return ret; } @@ -580,8 +579,7 @@ peek: k = bkey_i_to_s_c(&tmp.k); bch2_trans_unlock(&trans); - ret2 = bch2_move_extent(c, &ctxt, wp, io_opts, - bkey_s_c_to_extent(k), + ret2 = bch2_move_extent(c, &ctxt, wp, io_opts, k, data_cmd, data_opts); if (ret2) { if (ret2 == -ENOMEM) { diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 44e235ef3de0..84971fbfc722 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -69,26 +69,19 @@ static bool __copygc_pred(struct bch_dev *ca, struct bkey_s_c k) { copygc_heap *h = &ca->copygc_heap; + const struct bch_extent_ptr *ptr = + bch2_bkey_has_device(k, ca->dev_idx); - switch (k.k->type) { - case KEY_TYPE_extent: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const struct bch_extent_ptr *ptr = - bch2_extent_has_device(e, ca->dev_idx); + if (ptr) { + struct copygc_heap_entry search = { .offset = ptr->offset }; - if (ptr) { - struct copygc_heap_entry search = { .offset = ptr->offset }; + ssize_t i = eytzinger0_find_le(h->data, h->used, + sizeof(h->data[0]), + bucket_offset_cmp, &search); - ssize_t i = eytzinger0_find_le(h->data, h->used, - sizeof(h->data[0]), - bucket_offset_cmp, &search); - - return (i >= 0 && - ptr->offset < h->data[i].offset + ca->mi.bucket_size && - ptr->gen == h->data[i].gen); - } - break; - } + return (i >= 0 && + ptr->offset < h->data[i].offset + ca->mi.bucket_size && + ptr->gen == h->data[i].gen); } return false; diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index fe4a9af92a76..0997c0621b7c 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -38,9 +38,9 @@ void bch2_rebalance_add_key(struct bch_fs *c, struct bkey_s_c k, struct bch_io_opts *io_opts) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; - struct bkey_s_c_extent e; if (!bkey_extent_is_data(k.k)) return; @@ -49,9 +49,7 @@ void bch2_rebalance_add_key(struct bch_fs *c, !io_opts->background_compression) return; - e = bkey_s_c_to_extent(k); - - extent_for_each_ptr_decode(e, p, entry) + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) if (rebalance_ptr_pred(c, p, io_opts)) { struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); -- cgit v1.2.3 From 5b6d40e2b670efc2feff4da9dd065053f5be31a7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 29 Jun 2019 17:59:21 -0400 Subject: bcachefs: Export correct blocksize to vfs Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 1b3898eae8b8..54e555fb4d5d 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1147,7 +1147,8 @@ static int bch2_tmpfile(struct mnt_idmap *idmap, return finish_open_simple(file, 0); } -static int bch2_fill_extent(struct fiemap_extent_info *info, +static int bch2_fill_extent(struct bch_fs *c, + struct fiemap_extent_info *info, struct bkey_s_c k, unsigned flags) { if (bkey_extent_is_data(k.k)) { @@ -1165,8 +1166,8 @@ static int bch2_fill_extent(struct fiemap_extent_info *info, else offset += p.crc.offset; - if ((offset & (PAGE_SECTORS - 1)) || - (k.k->size & (PAGE_SECTORS - 1))) + if ((offset & (c->opts.block_size - 1)) || + (k.k->size & (c->opts.block_size - 1))) flags2 |= FIEMAP_EXTENT_NOT_ALIGNED; ret = fiemap_fill_next_extent(info, @@ -1223,7 +1224,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (bkey_extent_is_data(k.k) || k.k->type == KEY_TYPE_reservation) { if (have_extent) { - ret = bch2_fill_extent(info, + ret = bch2_fill_extent(c, info, bkey_i_to_s_c(&prev.k), 0); if (ret) break; @@ -1235,7 +1236,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, } if (!ret && have_extent) - ret = bch2_fill_extent(info, bkey_i_to_s_c(&prev.k), + ret = bch2_fill_extent(c, info, bkey_i_to_s_c(&prev.k), FIEMAP_EXTENT_LAST); ret = bch2_trans_exit(&trans) ?: ret; @@ -1803,9 +1804,8 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, goto out; } - /* XXX: blocksize */ - sb->s_blocksize = PAGE_SIZE; - sb->s_blocksize_bits = PAGE_SHIFT; + sb->s_blocksize = block_bytes(c); + sb->s_blocksize_bits = ilog2(block_bytes(c)); sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_op = &bch_super_operations; sb->s_export_op = &bch_export_ops; -- cgit v1.2.3 From 76426098e419c1732efc3f88166f3f3592c215c9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 16 Aug 2019 09:59:56 -0400 Subject: bcachefs: Reflink Signed-off-by: Kent Overstreet --- fs/bcachefs/Makefile | 1 + fs/bcachefs/bcachefs.h | 4 + fs/bcachefs/bcachefs_format.h | 26 +++- fs/bcachefs/bkey.h | 2 + fs/bcachefs/bkey_methods.c | 1 + fs/bcachefs/btree_types.h | 9 +- fs/bcachefs/btree_update_leaf.c | 3 +- fs/bcachefs/buckets.c | 100 +++++++++++++- fs/bcachefs/extents.c | 50 +++++-- fs/bcachefs/extents.h | 19 ++- fs/bcachefs/fs-io.c | 218 ++++++++++++++++++++++------- fs/bcachefs/fs-io.h | 19 +++ fs/bcachefs/fs.c | 42 +++++- fs/bcachefs/fs.h | 15 +- fs/bcachefs/io.c | 127 +++++++++++++---- fs/bcachefs/io.h | 3 + fs/bcachefs/migrate.c | 13 +- fs/bcachefs/move.c | 98 ++++++++----- fs/bcachefs/move.h | 3 +- fs/bcachefs/recovery.c | 18 +-- fs/bcachefs/reflink.c | 300 ++++++++++++++++++++++++++++++++++++++++ fs/bcachefs/reflink.h | 32 +++++ fs/bcachefs/replicas.c | 1 + 23 files changed, 945 insertions(+), 159 deletions(-) create mode 100644 fs/bcachefs/reflink.c create mode 100644 fs/bcachefs/reflink.h (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index c29ccdb45965..4c2608409144 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -44,6 +44,7 @@ bcachefs-y := \ quota.o \ rebalance.o \ recovery.o \ + reflink.o \ replicas.o \ siphash.o \ six.o \ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 68e2d3b1a9a6..410fce3ed8d4 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -361,6 +361,7 @@ enum gc_phase { GC_PHASE_BTREE_XATTRS, GC_PHASE_BTREE_ALLOC, GC_PHASE_BTREE_QUOTAS, + GC_PHASE_BTREE_REFLINK, GC_PHASE_PENDING_DELETE, GC_PHASE_ALLOC, @@ -750,6 +751,9 @@ struct bch_fs { struct work_struct ec_stripe_delete_work; struct llist_head ec_stripe_delete_list; + /* REFLINK */ + u64 reflink_hint; + /* VFS IO PATH - fs-io.c */ struct bio_set writepage_bioset; struct bio_set dio_write_bioset; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index b8aafd2e283a..62afea1e7ec3 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -340,7 +340,9 @@ static inline void bkey_init(struct bkey *k) x(xattr, 11) \ x(alloc, 12) \ x(quota, 13) \ - x(stripe, 14) + x(stripe, 14) \ + x(reflink_p, 15) \ + x(reflink_v, 16) enum bch_bkey_type { #define x(name, nr) KEY_TYPE_##name = nr, @@ -895,6 +897,24 @@ struct bch_stripe { struct bch_extent_ptr ptrs[0]; } __attribute__((packed, aligned(8))); +/* Reflink: */ + +struct bch_reflink_p { + struct bch_val v; + __le64 idx; + + __le32 reservation_generation; + __u8 nr_replicas; + __u8 pad[3]; +}; + +struct bch_reflink_v { + struct bch_val v; + __le64 refcount; + union bch_extent_entry start[0]; + __u64 _data[0]; +}; + /* Optional/variable size superblock sections: */ struct bch_sb_field { @@ -1297,6 +1317,7 @@ enum bch_sb_features { BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */ BCH_FEATURE_EC = 4, BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3 = 5, + BCH_FEATURE_REFLINK = 6, BCH_FEATURE_NR, }; @@ -1487,7 +1508,8 @@ LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5); x(XATTRS, 3, "xattrs") \ x(ALLOC, 4, "alloc") \ x(QUOTAS, 5, "quotas") \ - x(EC, 6, "erasure_coding") + x(EC, 6, "erasure_coding") \ + x(REFLINK, 7, "reflink") enum btree_id { #define x(kwd, val, name) BTREE_ID_##kwd = val, diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index b3a08e52e6b3..321fe6fe0b55 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -560,6 +560,8 @@ BKEY_VAL_ACCESSORS(xattr); BKEY_VAL_ACCESSORS(alloc); BKEY_VAL_ACCESSORS(quota); BKEY_VAL_ACCESSORS(stripe); +BKEY_VAL_ACCESSORS(reflink_p); +BKEY_VAL_ACCESSORS(reflink_v); /* byte order helpers */ diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 8af16ca994e0..6fa6ac1fadc1 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -10,6 +10,7 @@ #include "extents.h" #include "inode.h" #include "quota.h" +#include "reflink.h" #include "xattr.h" const char * const bch2_bkey_types[] = { diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index ec14e2deecb7..621cbfa22fc9 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -464,7 +464,13 @@ static inline enum btree_node_type btree_node_type(struct btree *b) static inline bool btree_node_type_is_extents(enum btree_node_type type) { - return type == BKEY_TYPE_EXTENTS; + switch (type) { + case BKEY_TYPE_EXTENTS: + case BKEY_TYPE_REFLINK: + return true; + default: + return false; + } } static inline bool btree_node_is_extents(struct btree *b) @@ -480,6 +486,7 @@ static inline bool btree_node_type_needs_gc(enum btree_node_type type) case BKEY_TYPE_EXTENTS: case BKEY_TYPE_INODES: case BKEY_TYPE_EC: + case BKEY_TYPE_REFLINK: return true; default: return false; diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 5f94b6e9cf28..443ffb5c709d 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -521,7 +521,8 @@ static inline bool update_triggers_transactional(struct btree_trans *trans, { return likely(!(trans->flags & BTREE_INSERT_MARK_INMEM)) && (i->iter->btree_id == BTREE_ID_EXTENTS || - i->iter->btree_id == BTREE_ID_INODES); + i->iter->btree_id == BTREE_ID_INODES || + i->iter->btree_id == BTREE_ID_REFLINK); } static inline bool update_has_triggers(struct btree_trans *trans, diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index baf9642d21ca..3d243f2d1095 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -972,7 +972,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c, spin_unlock(&c->ec_stripes_heap_lock); bch_err_ratelimited(c, "pointer to nonexistent stripe %llu", (u64) p.idx); - return -1; + return -EIO; } BUG_ON(m->r.e.data_type != data_type); @@ -1144,6 +1144,7 @@ int bch2_mark_key_locked(struct bch_fs *c, fs_usage, journal_seq, flags); break; case KEY_TYPE_extent: + case KEY_TYPE_reflink_v: ret = bch2_mark_extent(c, k, offset, sectors, BCH_DATA_USER, fs_usage, journal_seq, flags); break; @@ -1304,7 +1305,8 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans, xchg(&warned_disk_usage, 1)) return; - pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors); + bch_err(c, "disk usage increased more than %llu sectors reserved", + disk_res_sectors); trans_for_each_update_iter(trans, i) { struct btree_iter *iter = i->iter; @@ -1319,7 +1321,7 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans, node_iter = iter->l[0].iter; while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b, - KEY_TYPE_discard))) { + KEY_TYPE_discard))) { struct bkey unpacked; struct bkey_s_c k; @@ -1471,6 +1473,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, struct bch_extent_stripe_ptr p, s64 sectors, enum bch_data_type data_type) { + struct bch_fs *c = trans->c; struct bch_replicas_padded r; struct btree_iter *iter; struct bkey_i *new_k; @@ -1487,10 +1490,10 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, return ret; if (k.k->type != KEY_TYPE_stripe) { - bch_err_ratelimited(trans->c, - "pointer to nonexistent stripe %llu", - (u64) p.idx); - ret = -1; + bch2_fs_inconsistent(c, + "pointer to nonexistent stripe %llu", + (u64) p.idx); + ret = -EIO; goto out; } @@ -1578,6 +1581,84 @@ static int bch2_trans_mark_extent(struct btree_trans *trans, return 0; } +static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, + struct bkey_s_c_reflink_p p, + u64 idx, unsigned sectors, + unsigned flags) +{ + struct bch_fs *c = trans->c; + struct btree_iter *iter; + struct bkey_i *new_k; + struct bkey_s_c k; + struct bkey_i_reflink_v *r_v; + s64 ret; + + ret = trans_get_key(trans, BTREE_ID_REFLINK, + POS(0, idx), &iter, &k); + if (ret) + return ret; + + if (k.k->type != KEY_TYPE_reflink_v) { + bch2_fs_inconsistent(c, + "%llu:%llu len %u points to nonexistent indirect extent %llu", + p.k->p.inode, p.k->p.offset, p.k->size, idx); + ret = -EIO; + goto err; + } + + if ((flags & BCH_BUCKET_MARK_OVERWRITE) && + (bkey_start_offset(k.k) < idx || + k.k->p.offset > idx + sectors)) + goto out; + + bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k)); + BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK); + + new_k = trans_update_key(trans, iter, k.k->u64s); + ret = PTR_ERR_OR_ZERO(new_k); + if (ret) + goto err; + + bkey_reassemble(new_k, k); + r_v = bkey_i_to_reflink_v(new_k); + + le64_add_cpu(&r_v->v.refcount, + !(flags & BCH_BUCKET_MARK_OVERWRITE) ? 1 : -1); + + if (!r_v->v.refcount) { + r_v->k.type = KEY_TYPE_deleted; + set_bkey_val_u64s(&r_v->k, 0); + } +out: + ret = k.k->p.offset - idx; +err: + bch2_trans_iter_put(trans, iter); + return ret; +} + +static int bch2_trans_mark_reflink_p(struct btree_trans *trans, + struct bkey_s_c_reflink_p p, unsigned offset, + s64 sectors, unsigned flags) +{ + u64 idx = le64_to_cpu(p.v->idx) + offset; + s64 ret = 0; + + sectors = abs(sectors); + BUG_ON(offset + sectors > p.k->size); + + while (sectors) { + ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors, flags); + if (ret < 0) + break; + + idx += ret; + sectors = max_t(s64, 0LL, sectors - ret); + ret = 0; + } + + return ret; +} + int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k, unsigned offset, s64 sectors, unsigned flags) { @@ -1593,6 +1674,7 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k, return bch2_trans_mark_extent(trans, k, offset, sectors, flags, BCH_DATA_BTREE); case KEY_TYPE_extent: + case KEY_TYPE_reflink_v: return bch2_trans_mark_extent(trans, k, offset, sectors, flags, BCH_DATA_USER); case KEY_TYPE_inode: @@ -1616,6 +1698,10 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k, d->fs_usage.persistent_reserved[replicas - 1] += sectors; return 0; } + case KEY_TYPE_reflink_p: + return bch2_trans_mark_reflink_p(trans, + bkey_s_c_to_reflink_p(k), + offset, sectors, flags); default: return 0; } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 11defa3d99a5..81ec55526ce9 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -744,7 +744,8 @@ void __bch2_cut_front(struct bpos where, struct bkey_s k) case KEY_TYPE_error: case KEY_TYPE_cookie: break; - case KEY_TYPE_extent: { + case KEY_TYPE_extent: + case KEY_TYPE_reflink_v: { struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); union bch_extent_entry *entry; bool seen_crc = false; @@ -774,6 +775,12 @@ void __bch2_cut_front(struct bpos where, struct bkey_s k) break; } + case KEY_TYPE_reflink_p: { + struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k); + + le64_add_cpu(&p.v->idx, sub); + break; + } case KEY_TYPE_reservation: break; default: @@ -968,6 +975,33 @@ static int __bch2_extent_atomic_end(struct btree_trans *trans, } break; + case KEY_TYPE_reflink_p: { + struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); + u64 idx = le64_to_cpu(p.v->idx); + unsigned sectors = end->offset - bkey_start_offset(p.k); + struct btree_iter *iter; + struct bkey_s_c r_k; + + for_each_btree_key(trans, iter, + BTREE_ID_REFLINK, POS(0, idx + offset), + BTREE_ITER_SLOTS, r_k, ret) { + if (bkey_cmp(bkey_start_pos(r_k.k), + POS(0, idx + sectors)) >= 0) + break; + + *nr_iters += 1; + if (*nr_iters >= max_iters) { + struct bpos pos = bkey_start_pos(k.k); + pos.offset += r_k.k->p.offset - idx; + + *end = bpos_min(*end, pos); + break; + } + } + + bch2_trans_iter_put(trans, iter); + break; + } } return ret; @@ -1561,17 +1595,17 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) return false; } -void bch2_extent_mark_replicas_cached(struct bch_fs *c, - struct bkey_s_extent e, - unsigned target, - unsigned nr_desired_replicas) +void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k, + unsigned target, + unsigned nr_desired_replicas) { + struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); union bch_extent_entry *entry; struct extent_ptr_decoded p; - int extra = bch2_bkey_durability(c, e.s_c) - nr_desired_replicas; + int extra = bch2_bkey_durability(c, k.s_c) - nr_desired_replicas; if (target && extra > 0) - extent_for_each_ptr_decode(e, p, entry) { + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { int n = bch2_extent_ptr_durability(c, p); if (n && n <= extra && @@ -1582,7 +1616,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c, } if (extra > 0) - extent_for_each_ptr_decode(e, p, entry) { + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { int n = bch2_extent_ptr_durability(c, p); if (n && n <= extra) { diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 156d8e37045a..cef93af25858 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -306,6 +306,14 @@ static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k) to_entry(&s.v->ptrs[s.v->nr_blocks]), }; } + case KEY_TYPE_reflink_v: { + struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k); + + return (struct bkey_ptrs_c) { + r.v->start, + bkey_val_end(r), + }; + } default: return (struct bkey_ptrs_c) { NULL, NULL }; } @@ -436,8 +444,8 @@ bch2_extent_can_insert(struct btree_trans *, struct btree_insert_entry *, void bch2_insert_fixup_extent(struct btree_trans *, struct btree_insert_entry *); -void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent, - unsigned, unsigned); +void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s, + unsigned, unsigned); const struct bch_extent_ptr * bch2_extent_has_device(struct bkey_s_c_extent, unsigned); @@ -452,17 +460,24 @@ static inline bool bkey_extent_is_data(const struct bkey *k) switch (k->type) { case KEY_TYPE_btree_ptr: case KEY_TYPE_extent: + case KEY_TYPE_reflink_p: + case KEY_TYPE_reflink_v: return true; default: return false; } } +/* + * Should extent be counted under inode->i_sectors? + */ static inline bool bkey_extent_is_allocation(const struct bkey *k) { switch (k->type) { case KEY_TYPE_extent: case KEY_TYPE_reservation: + case KEY_TYPE_reflink_p: + case KEY_TYPE_reflink_v: return true; default: return false; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index ef94aecaa7cb..771fb111550d 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -16,6 +16,7 @@ #include "io.h" #include "keylist.h" #include "quota.h" +#include "reflink.h" #include "trace.h" #include @@ -201,9 +202,9 @@ static int inode_set_size(struct bch_inode_info *inode, return 0; } -static int __must_check bch2_write_inode_size(struct bch_fs *c, - struct bch_inode_info *inode, - loff_t new_size, unsigned fields) +int __must_check bch2_write_inode_size(struct bch_fs *c, + struct bch_inode_info *inode, + loff_t new_size, unsigned fields) { struct inode_new_size s = { .new_size = new_size, @@ -936,15 +937,12 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k) { struct bvec_iter iter; struct bio_vec bv; - unsigned nr_ptrs = bch2_bkey_nr_ptrs_allocated(k); + unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v + ? 0 : bch2_bkey_nr_ptrs_allocated(k); unsigned state = k.k->type == KEY_TYPE_reservation ? SECTOR_RESERVED : SECTOR_ALLOCATED; - BUG_ON(bio->bi_iter.bi_sector < bkey_start_offset(k.k)); - BUG_ON(bio_end_sector(bio) > k.k->p.offset); - - bio_for_each_segment(bv, bio, iter) { struct bch_page_state *s = bch2_page_state(bv.bv_page); unsigned i; @@ -959,10 +957,11 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k) } static void readpage_bio_extend(struct readpages_iter *iter, - struct bio *bio, u64 offset, + struct bio *bio, + unsigned sectors_this_extent, bool get_more) { - while (bio_end_sector(bio) < offset && + while (bio_sectors(bio) < sectors_this_extent && bio->bi_vcnt < bio->bi_max_vecs) { pgoff_t page_offset = bio_end_sector(bio) >> PAGE_SECTOR_SHIFT; struct page *page = readpage_iter_next(iter); @@ -1012,35 +1011,39 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, struct bch_fs *c = trans->c; int flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE; + int ret = 0; rbio->c = c; rbio->start_time = local_clock(); - +retry: while (1) { BKEY_PADDED(k) tmp; struct bkey_s_c k; - unsigned bytes, offset_into_extent; + unsigned bytes, sectors, offset_into_extent; bch2_btree_iter_set_pos(iter, POS(inum, rbio->bio.bi_iter.bi_sector)); k = bch2_btree_iter_peek_slot(iter); - BUG_ON(!k.k); - - if (IS_ERR(k.k)) { - int ret = btree_iter_err(iter); - BUG_ON(!ret); - bcache_io_error(c, &rbio->bio, "btree IO error %i", ret); - bio_endio(&rbio->bio); - return; - } + ret = bkey_err(k); + if (ret) + break; bkey_reassemble(&tmp.k, k); - bch2_trans_unlock(trans); k = bkey_i_to_s_c(&tmp.k); offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); + sectors = k.k->size - offset_into_extent; + + ret = bch2_read_indirect_extent(trans, iter, + &offset_into_extent, &tmp.k); + if (ret) + break; + + sectors = min(sectors, k.k->size - offset_into_extent); + + bch2_trans_unlock(trans); if (readpages_iter) { bool want_full_extent = false; @@ -1055,13 +1058,11 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, (p.crc.compression_type != 0)); } - readpage_bio_extend(readpages_iter, - &rbio->bio, k.k->p.offset, - want_full_extent); + readpage_bio_extend(readpages_iter, &rbio->bio, + sectors, want_full_extent); } - bytes = min_t(unsigned, bio_sectors(&rbio->bio), - (k.k->size - offset_into_extent)) << 9; + bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; swap(rbio->bio.bi_iter.bi_size, bytes); if (rbio->bio.bi_iter.bi_size == bytes) @@ -1078,6 +1079,12 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, swap(rbio->bio.bi_iter.bi_size, bytes); bio_advance(&rbio->bio, bytes); } + + if (ret == -EINTR) + goto retry; + + bcache_io_error(c, &rbio->bio, "btree IO error %i", ret); + bio_endio(&rbio->bio); } void bch2_readahead(struct readahead_control *ractl) @@ -2256,29 +2263,25 @@ out: /* truncate: */ -static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode, - u64 start_offset, u64 end_offset, u64 *journal_seq) +int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, + struct bpos end, struct bch_inode_info *inode, + u64 new_i_size) { - struct bpos start = POS(inode->v.i_ino, start_offset); - struct bpos end = POS(inode->v.i_ino, end_offset); + struct bch_fs *c = trans->c; unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); - struct btree_trans trans; - struct btree_iter *iter; struct bkey_s_c k; - int ret = 0; - - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, start, - BTREE_ITER_INTENT); + int ret = 0, ret2 = 0; while ((k = bch2_btree_iter_peek(iter)).k && - !(ret = bkey_err(k)) && bkey_cmp(iter->pos, end) < 0) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete; + ret = bkey_err(k); + if (ret) + goto btree_err; + bkey_init(&delete.k); delete.k.p = iter->pos; @@ -2286,23 +2289,51 @@ static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode, bch2_key_resize(&delete.k, max_sectors); bch2_cut_back(end, &delete.k); - bch2_trans_begin_updates(&trans); + bch2_trans_begin_updates(trans); - ret = bch2_extent_update(&trans, inode, + ret = bch2_extent_update(trans, inode, &disk_res, NULL, iter, &delete, - 0, true, true, NULL); + new_i_size, false, true, NULL); bch2_disk_reservation_put(c, &disk_res); - - if (ret == -EINTR) +btree_err: + if (ret == -EINTR) { + ret2 = ret; ret = 0; + } if (ret) break; + } - bch2_trans_cond_resched(&trans); + if (bkey_cmp(iter->pos, end) > 0) { + bch2_btree_iter_set_pos(iter, end); + ret = bch2_btree_iter_traverse(iter); } + return ret ?: ret2; +} + +static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode, + u64 start_offset, u64 end_offset) +{ + struct btree_trans trans; + struct btree_iter *iter; + int ret = 0; + + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); + + iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + POS(inode->v.i_ino, start_offset), + BTREE_ITER_INTENT); + + ret = bch2_fpunch_at(&trans, iter, + POS(inode->v.i_ino, end_offset), + inode, 0); + bch2_trans_exit(&trans); + if (ret == -EINTR) + ret = 0; + return ret; } @@ -2510,7 +2541,7 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) ret = __bch2_fpunch(c, inode, round_up(iattr->ia_size, block_bytes(c)) >> 9, - U64_MAX, &inode->ei_journal_seq); + U64_MAX); if (unlikely(ret)) goto err; @@ -2557,8 +2588,7 @@ static long bch2_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len) truncate_pagecache_range(&inode->v, offset, offset + len - 1); if (discard_start < discard_end) - ret = __bch2_fpunch(c, inode, discard_start, discard_end, - &inode->ei_journal_seq); + ret = __bch2_fpunch(c, inode, discard_start, discard_end); err: bch2_pagecache_block_put(&inode->ei_pagecache_lock); inode_unlock(&inode->v); @@ -2670,7 +2700,7 @@ bkey_err: ret = __bch2_fpunch(c, inode, round_up(new_size, block_bytes(c)) >> 9, - U64_MAX, &inode->ei_journal_seq); + U64_MAX); if (ret) goto err; @@ -2853,6 +2883,94 @@ long bch2_fallocate_dispatch(struct file *file, int mode, return -EOPNOTSUPP; } +static void mark_range_unallocated(struct bch_inode_info *inode, + loff_t start, loff_t end) +{ + pgoff_t index = start >> PAGE_SHIFT; + pgoff_t end_index = (end - 1) >> PAGE_SHIFT; + struct folio_batch fbatch; + unsigned i, j; + + folio_batch_init(&fbatch); + + while (filemap_get_folios(inode->v.i_mapping, + &index, end_index, &fbatch)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i]; + struct bch_page_state *s; + + folio_lock(folio); + s = bch2_page_state(&folio->page); + + if (s) + for (j = 0; j < PAGE_SECTORS; j++) + s->s[j].nr_replicas = 0; + + folio_unlock(folio); + } + folio_batch_release(&fbatch); + cond_resched(); + } +} + +loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, + struct file *file_dst, loff_t pos_dst, + loff_t len, unsigned remap_flags) +{ + struct bch_inode_info *src = file_bch_inode(file_src); + struct bch_inode_info *dst = file_bch_inode(file_dst); + struct bch_fs *c = src->v.i_sb->s_fs_info; + loff_t ret = 0; + loff_t aligned_len; + + if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY)) + return -EINVAL; + + if (remap_flags & REMAP_FILE_DEDUP) + return -EOPNOTSUPP; + + if ((pos_src & (block_bytes(c) - 1)) || + (pos_dst & (block_bytes(c) - 1))) + return -EINVAL; + + if (src == dst && + abs(pos_src - pos_dst) < len) + return -EINVAL; + + bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); + + inode_dio_wait(&src->v); + inode_dio_wait(&dst->v); + + ret = generic_remap_file_range_prep(file_src, pos_src, + file_dst, pos_dst, + &len, remap_flags); + if (ret < 0 || len == 0) + goto out_unlock; + + aligned_len = round_up(len, block_bytes(c)); + + ret = write_invalidate_inode_pages_range(dst->v.i_mapping, + pos_dst, pos_dst + aligned_len); + if (ret) + goto out_unlock; + + mark_range_unallocated(src, pos_src, pos_src + aligned_len); + + ret = bch2_remap_range(c, dst, + POS(dst->v.i_ino, pos_dst >> 9), + POS(src->v.i_ino, pos_src >> 9), + aligned_len >> 9, + pos_dst + len); + if (ret > 0) + ret = min(ret << 9, len); + +out_unlock: + bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); + + return ret; +} + /* fseek: */ static int folio_data_offset(struct folio *folio, unsigned offset) diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h index e263b515e901..861ec25ab9ef 100644 --- a/fs/bcachefs/fs-io.h +++ b/fs/bcachefs/fs-io.h @@ -9,6 +9,22 @@ #include +struct quota_res; + +int bch2_extent_update(struct btree_trans *, + struct bch_inode_info *, + struct disk_reservation *, + struct quota_res *, + struct btree_iter *, + struct bkey_i *, + u64, bool, bool, s64 *); +int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, + struct bpos, struct bch_inode_info *, u64); + +int __must_check bch2_write_inode_size(struct bch_fs *, + struct bch_inode_info *, + loff_t, unsigned); + int bch2_writepage(struct page *, struct writeback_control *); int bch2_read_folio(struct file *, struct folio *); @@ -28,6 +44,9 @@ int bch2_fsync(struct file *, loff_t, loff_t, int); int bch2_truncate(struct bch_inode_info *, struct iattr *); long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t); +loff_t bch2_remap_file_range(struct file *, loff_t, struct file *, + loff_t, loff_t, unsigned); + loff_t bch2_llseek(struct file *, loff_t, int); vm_fault_t bch2_page_fault(struct vm_fault *); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 54e555fb4d5d..fad019d3c3f5 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1157,6 +1157,9 @@ static int bch2_fill_extent(struct bch_fs *c, struct extent_ptr_decoded p; int ret; + if (k.k->type == KEY_TYPE_reflink_v) + flags |= FIEMAP_EXTENT_SHARED; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { int flags2 = 0; u64 offset = p.ptr.offset; @@ -1200,6 +1203,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct btree_iter *iter; struct bkey_s_c k; BKEY_PADDED(k) cur, prev; + unsigned offset_into_extent, sectors; bool have_extent = false; int ret = 0; @@ -1212,15 +1216,36 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, - POS(ei->v.i_ino, start >> 9), 0, k, ret) { - if (bkey_cmp(bkey_start_pos(k.k), - POS(ei->v.i_ino, (start + len) >> 9)) >= 0) - break; + iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + POS(ei->v.i_ino, start >> 9), + BTREE_ITER_SLOTS); + + while (bkey_cmp(iter->pos, POS(ei->v.i_ino, (start + len) >> 9)) < 0) { + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); + if (ret) + goto err; bkey_reassemble(&cur.k, k); k = bkey_i_to_s_c(&cur.k); + offset_into_extent = iter->pos.offset - + bkey_start_offset(k.k); + sectors = k.k->size - offset_into_extent; + + ret = bch2_read_indirect_extent(&trans, iter, + &offset_into_extent, &cur.k); + if (ret) + break; + + sectors = min(sectors, k.k->size - offset_into_extent); + + bch2_cut_front(POS(k.k->p.inode, + bkey_start_offset(k.k) + offset_into_extent), + &cur.k); + bch2_key_resize(&cur.k.k, sectors); + cur.k.k.p.offset = iter->pos.offset + cur.k.k.size; + if (bkey_extent_is_data(k.k) || k.k->type == KEY_TYPE_reservation) { if (have_extent) { @@ -1233,12 +1258,16 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bkey_copy(&prev.k, &cur.k); have_extent = true; } + + bch2_btree_iter_set_pos(iter, + POS(iter->pos.inode, + iter->pos.offset + sectors)); } if (!ret && have_extent) ret = bch2_fill_extent(c, info, bkey_i_to_s_c(&prev.k), FIEMAP_EXTENT_LAST); - +err: ret = bch2_trans_exit(&trans) ?: ret; return ret < 0 ? ret : 0; } @@ -1286,6 +1315,7 @@ static const struct file_operations bch_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = bch2_compat_fs_ioctl, #endif + .remap_file_range = bch2_remap_file_range, }; static const struct inode_operations bch_file_inode_operations = { diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index de07f0f1dd51..6edf5dd803f0 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -59,7 +59,8 @@ static inline int ptrcmp(void *l, void *r) enum bch_inode_lock_op { INODE_LOCK = (1U << 0), - INODE_UPDATE_LOCK = (1U << 1), + INODE_PAGECACHE_BLOCK = (1U << 1), + INODE_UPDATE_LOCK = (1U << 2), }; #define bch2_lock_inodes(_locks, ...) \ @@ -71,9 +72,11 @@ do { \ \ for (i = 1; i < ARRAY_SIZE(a); i++) \ if (a[i] != a[i - 1]) { \ - if (_locks & INODE_LOCK) \ + if ((_locks) & INODE_LOCK) \ down_write_nested(&a[i]->v.i_rwsem, i); \ - if (_locks & INODE_UPDATE_LOCK) \ + if ((_locks) & INODE_PAGECACHE_BLOCK) \ + bch2_pagecache_block_get(&a[i]->ei_pagecache_lock);\ + if ((_locks) & INODE_UPDATE_LOCK) \ mutex_lock_nested(&a[i]->ei_update_lock, i);\ } \ } while (0) @@ -87,9 +90,11 @@ do { \ \ for (i = 1; i < ARRAY_SIZE(a); i++) \ if (a[i] != a[i - 1]) { \ - if (_locks & INODE_LOCK) \ + if ((_locks) & INODE_LOCK) \ up_write(&a[i]->v.i_rwsem); \ - if (_locks & INODE_UPDATE_LOCK) \ + if ((_locks) & INODE_PAGECACHE_BLOCK) \ + bch2_pagecache_block_put(&a[i]->ei_pagecache_lock);\ + if ((_locks) & INODE_UPDATE_LOCK) \ mutex_unlock(&a[i]->ei_update_lock); \ } \ } while (0) diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index ed84572a9e67..4d359931edb3 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -1041,6 +1041,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) noinline static struct promote_op *__promote_alloc(struct bch_fs *c, + enum btree_id btree_id, struct bpos pos, struct extent_ptr_decoded *pick, struct bch_io_opts opts, @@ -1097,6 +1098,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c, (struct data_opts) { .target = opts.promote_target }, + btree_id, bkey_s_c_null); BUG_ON(ret); @@ -1134,7 +1136,11 @@ static inline struct promote_op *promote_alloc(struct bch_fs *c, if (!should_promote(c, k, pos, opts, flags)) return NULL; - promote = __promote_alloc(c, pos, pick, opts, sectors, rbio); + promote = __promote_alloc(c, + k.k->type == KEY_TYPE_reflink_v + ? BTREE_ID_REFLINK + : BTREE_ID_EXTENTS, + pos, pick, opts, sectors, rbio); if (!promote) return NULL; @@ -1278,18 +1284,25 @@ retry: POS(inode, bvec_iter.bi_sector), BTREE_ITER_SLOTS, k, ret) { BKEY_PADDED(k) tmp; - unsigned bytes, offset_into_extent; + unsigned bytes, sectors, offset_into_extent; bkey_reassemble(&tmp.k, k); k = bkey_i_to_s_c(&tmp.k); - bch2_trans_unlock(&trans); - offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); + sectors = k.k->size - offset_into_extent; + + ret = bch2_read_indirect_extent(&trans, iter, + &offset_into_extent, &tmp.k); + if (ret) + break; - bytes = min_t(unsigned, bvec_iter_sectors(bvec_iter), - (k.k->size - offset_into_extent)) << 9; + sectors = min(sectors, k.k->size - offset_into_extent); + + bch2_trans_unlock(&trans); + + bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; swap(bvec_iter.bi_size, bytes); ret = __bch2_read_extent(c, rbio, bvec_iter, k, @@ -1569,6 +1582,48 @@ static void bch2_read_endio(struct bio *bio) bch2_rbio_punt(rbio, __bch2_read_endio, context, wq); } +int bch2_read_indirect_extent(struct btree_trans *trans, + struct btree_iter *extent_iter, + unsigned *offset_into_extent, + struct bkey_i *orig_k) +{ + struct btree_iter *iter; + struct bkey_s_c k; + u64 reflink_offset; + int ret; + + if (orig_k->k.type != KEY_TYPE_reflink_p) + return 0; + + reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k)->v.idx) + + *offset_into_extent; + + iter = __bch2_trans_get_iter(trans, BTREE_ID_REFLINK, + POS(0, reflink_offset), + BTREE_ITER_SLOTS, 1); + ret = PTR_ERR_OR_ZERO(iter); + if (ret) + return ret; + + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (k.k->type != KEY_TYPE_reflink_v) { + __bcache_io_error(trans->c, + "pointer to nonexistent indirect extent"); + ret = -EIO; + goto err; + } + + *offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); + bkey_reassemble(orig_k, k); +err: + bch2_trans_iter_put(trans, iter); + return ret; +} + int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, struct bvec_iter iter, struct bkey_s_c k, unsigned offset_into_extent, @@ -1644,6 +1699,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, pos.offset += offset_into_extent; pick.ptr.offset += pick.crc.offset + offset_into_extent; + offset_into_extent = 0; pick.crc.compressed_size = bvec_iter_sectors(iter); pick.crc.uncompressed_size = bvec_iter_sectors(iter); pick.crc.offset = 0; @@ -1829,25 +1885,47 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) rbio->c = c; rbio->start_time = local_clock(); - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, - POS(inode, rbio->bio.bi_iter.bi_sector), - BTREE_ITER_SLOTS, k, ret) { + iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + POS(inode, rbio->bio.bi_iter.bi_sector), + BTREE_ITER_SLOTS); + + while (1) { BKEY_PADDED(k) tmp; - unsigned bytes, offset_into_extent; + unsigned bytes, sectors, offset_into_extent; + + bch2_btree_iter_set_pos(iter, + POS(inode, rbio->bio.bi_iter.bi_sector)); + + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); + if (ret) + goto err; - /* - * Unlock the iterator while the btree node's lock is still in - * cache, before doing the IO: - */ bkey_reassemble(&tmp.k, k); k = bkey_i_to_s_c(&tmp.k); - bch2_trans_unlock(&trans); offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); + sectors = k.k->size - offset_into_extent; + + ret = bch2_read_indirect_extent(&trans, iter, + &offset_into_extent, &tmp.k); + if (ret) + goto err; + + /* + * With indirect extents, the amount of data to read is the min + * of the original extent and the indirect extent: + */ + sectors = min(sectors, k.k->size - offset_into_extent); + + /* + * Unlock the iterator while the btree node's lock is still in + * cache, before doing the IO: + */ + bch2_trans_unlock(&trans); - bytes = min_t(unsigned, bio_sectors(&rbio->bio), - (k.k->size - offset_into_extent)) << 9; + bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; swap(rbio->bio.bi_iter.bi_size, bytes); if (rbio->bio.bi_iter.bi_size == bytes) @@ -1856,21 +1934,18 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) bch2_read_extent(c, rbio, k, offset_into_extent, flags); if (flags & BCH_READ_LAST_FRAGMENT) - return; + break; swap(rbio->bio.bi_iter.bi_size, bytes); bio_advance(&rbio->bio, bytes); } - - /* - * If we get here, it better have been because there was an error - * reading a btree node - */ - BUG_ON(!ret); - bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret); - +out: bch2_trans_exit(&trans); + return; +err: + bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret); bch2_rbio_done(rbio); + goto out; } void bch2_fs_io_exit(struct bch_fs *c) diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h index aa437cb05fe7..a768ccc90f1f 100644 --- a/fs/bcachefs/io.h +++ b/fs/bcachefs/io.h @@ -99,6 +99,9 @@ struct bch_devs_mask; struct cache_promote_op; struct extent_ptr_decoded; +int bch2_read_indirect_extent(struct btree_trans *, struct btree_iter *, + unsigned *, struct bkey_i *); + enum bch_read_flags { BCH_READ_RETRY_IF_STALE = 1 << 0, BCH_READ_MAY_PROMOTE = 1 << 1, diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 301cb72bd3e4..dc3b03d6e627 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -34,7 +34,8 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k, return 0; } -static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) +static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags, + enum btree_id btree_id) { struct btree_trans trans; struct btree_iter *iter; @@ -44,8 +45,8 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, - POS_MIN, BTREE_ITER_PREFETCH); + iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, + BTREE_ITER_PREFETCH); while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k))) { @@ -98,6 +99,12 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) return ret; } +static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) +{ + return __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_EXTENTS) ?: + __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_REFLINK); +} + static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) { struct btree_trans trans; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index ffa0c2bbe290..05bb74a36230 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -63,13 +63,14 @@ static int bch2_migrate_index_update(struct bch_write_op *op) bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + iter = bch2_trans_get_iter(&trans, m->btree_id, bkey_start_pos(&bch2_keylist_front(keys)->k), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); while (1) { struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); - struct bkey_i_extent *insert, *new = + struct bkey_i *insert; + struct bkey_i_extent *new = bkey_i_to_extent(bch2_keylist_front(keys)); BKEY_PADDED(k) _new, _insert; const union bch_extent_entry *entry; @@ -86,26 +87,25 @@ static int bch2_migrate_index_update(struct bch_write_op *op) goto nomatch; if (m->data_cmd == DATA_REWRITE && - !bch2_extent_has_device(bkey_s_c_to_extent(k), - m->data_opts.rewrite_dev)) + !bch2_bkey_has_device(k, m->data_opts.rewrite_dev)) goto nomatch; bkey_reassemble(&_insert.k, k); - insert = bkey_i_to_extent(&_insert.k); + insert = &_insert.k; bkey_copy(&_new.k, bch2_keylist_front(keys)); new = bkey_i_to_extent(&_new.k); - bch2_cut_front(iter->pos, &insert->k_i); + bch2_cut_front(iter->pos, insert); bch2_cut_back(new->k.p, &insert->k); bch2_cut_back(insert->k.p, &new->k); if (m->data_cmd == DATA_REWRITE) - bch2_bkey_drop_device(extent_i_to_s(insert).s, + bch2_bkey_drop_device(bkey_i_to_s(insert), m->data_opts.rewrite_dev); extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) { - if (bch2_extent_has_device(extent_i_to_s_c(insert), p.ptr.dev)) { + if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) { /* * raced with another move op? extent already * has a pointer to the device we just wrote @@ -114,25 +114,25 @@ static int bch2_migrate_index_update(struct bch_write_op *op) continue; } - bch2_extent_ptr_decoded_append(&insert->k_i, &p); + bch2_extent_ptr_decoded_append(insert, &p); did_work = true; } if (!did_work) goto nomatch; - bch2_bkey_narrow_crcs(&insert->k_i, + bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 }); - bch2_extent_normalize(c, extent_i_to_s(insert).s); - bch2_extent_mark_replicas_cached(c, extent_i_to_s(insert), - op->opts.background_target, - op->opts.data_replicas); + bch2_extent_normalize(c, bkey_i_to_s(insert)); + bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert), + op->opts.background_target, + op->opts.data_replicas); /* * If we're not fully overwriting @k, and it's compressed, we * need a reservation for all the pointers in @insert */ - nr = bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)) - + nr = bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(insert)) - m->nr_ptrs_reserved; if (insert->k.size < k.k->size && @@ -148,7 +148,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) } bch2_trans_update(&trans, - BTREE_INSERT_ENTRY(iter, &insert->k_i)); + BTREE_INSERT_ENTRY(iter, insert)); ret = bch2_trans_commit(&trans, &op->res, op_journal_seq(op), @@ -213,10 +213,12 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, struct bch_io_opts io_opts, enum data_cmd data_cmd, struct data_opts data_opts, + enum btree_id btree_id, struct bkey_s_c k) { int ret; + m->btree_id = btree_id; m->data_cmd = data_cmd; m->data_opts = data_opts; m->nr_ptrs_reserved = 0; @@ -264,11 +266,12 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, break; } case DATA_REWRITE: { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; unsigned compressed_sectors = 0; - extent_for_each_ptr_decode(bkey_s_c_to_extent(k), p, entry) + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) if (!p.ptr.cached && p.crc.compression_type != BCH_COMPRESSION_NONE && bch2_dev_in_target(c, p.ptr.dev, data_opts.target)) @@ -391,6 +394,7 @@ static int bch2_move_extent(struct bch_fs *c, struct moving_context *ctxt, struct write_point_specifier wp, struct bch_io_opts io_opts, + enum btree_id btree_id, struct bkey_s_c k, enum data_cmd data_cmd, struct data_opts data_opts) @@ -443,7 +447,7 @@ static int bch2_move_extent(struct bch_fs *c, io->rbio.bio.bi_end_io = move_read_endio; ret = bch2_migrate_write_init(c, &io->write, wp, io_opts, - data_cmd, data_opts, k); + data_cmd, data_opts, btree_id, k); if (ret) goto err_free_pages; @@ -473,16 +477,17 @@ err: return ret; } -int bch2_move_data(struct bch_fs *c, - struct bch_ratelimit *rate, - struct write_point_specifier wp, - struct bpos start, - struct bpos end, - move_pred_fn pred, void *arg, - struct bch_move_stats *stats) +static int __bch2_move_data(struct bch_fs *c, + struct moving_context *ctxt, + struct bch_ratelimit *rate, + struct write_point_specifier wp, + struct bpos start, + struct bpos end, + move_pred_fn pred, void *arg, + struct bch_move_stats *stats, + enum btree_id btree_id) { bool kthread = (current->flags & PF_KTHREAD) != 0; - struct moving_context ctxt = { .stats = stats }; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); BKEY_PADDED(k) tmp; struct btree_trans trans; @@ -493,17 +498,13 @@ int bch2_move_data(struct bch_fs *c, u64 delay, cur_inum = U64_MAX; int ret = 0, ret2; - closure_init_stack(&ctxt.cl); - INIT_LIST_HEAD(&ctxt.reads); - init_waitqueue_head(&ctxt.wait); - bch2_trans_init(&trans, c, 0, 0); stats->data_type = BCH_DATA_USER; - stats->btree_id = BTREE_ID_EXTENTS; + stats->btree_id = btree_id; stats->pos = POS_MIN; - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, start, + iter = bch2_trans_get_iter(&trans, btree_id, start, BTREE_ITER_PREFETCH); if (rate) @@ -528,7 +529,7 @@ int bch2_move_data(struct bch_fs *c, if (unlikely(freezing(current))) { bch2_trans_unlock(&trans); - move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads)); + move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); try_to_freeze(); } } while (delay); @@ -579,12 +580,12 @@ peek: k = bkey_i_to_s_c(&tmp.k); bch2_trans_unlock(&trans); - ret2 = bch2_move_extent(c, &ctxt, wp, io_opts, k, + ret2 = bch2_move_extent(c, ctxt, wp, io_opts, btree_id, k, data_cmd, data_opts); if (ret2) { if (ret2 == -ENOMEM) { /* memory allocation failure, wait for some IO to finish */ - bch2_move_ctxt_wait_for_io(&ctxt); + bch2_move_ctxt_wait_for_io(ctxt); continue; } @@ -602,7 +603,32 @@ next_nondata: bch2_trans_cond_resched(&trans); } out: - bch2_trans_exit(&trans); + ret = bch2_trans_exit(&trans) ?: ret; + + return ret; +} + +int bch2_move_data(struct bch_fs *c, + struct bch_ratelimit *rate, + struct write_point_specifier wp, + struct bpos start, + struct bpos end, + move_pred_fn pred, void *arg, + struct bch_move_stats *stats) +{ + struct moving_context ctxt = { .stats = stats }; + int ret; + + closure_init_stack(&ctxt.cl); + INIT_LIST_HEAD(&ctxt.reads); + init_waitqueue_head(&ctxt.wait); + + stats->data_type = BCH_DATA_USER; + + ret = __bch2_move_data(c, &ctxt, rate, wp, start, end, + pred, arg, stats, BTREE_ID_EXTENTS) ?: + __bch2_move_data(c, &ctxt, rate, wp, start, end, + pred, arg, stats, BTREE_ID_REFLINK); move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads)); closure_sync(&ctxt.cl); diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 71b3d2b2ddb6..0acd1720d4f8 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -25,6 +25,7 @@ struct data_opts { }; struct migrate_write { + enum btree_id btree_id; enum data_cmd data_cmd; struct data_opts data_opts; @@ -44,7 +45,7 @@ int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *, struct write_point_specifier, struct bch_io_opts, enum data_cmd, struct data_opts, - struct bkey_s_c); + enum btree_id, struct bkey_s_c); typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c, diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 3742b241807c..f2899ba9ad43 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -236,7 +236,8 @@ static void replay_now_at(struct journal *j, u64 seq) bch2_journal_pin_put(j, j->replay_journal_seq++); } -static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k) +static int bch2_extent_replay_key(struct bch_fs *c, enum btree_id btree_id, + struct bkey_i *k) { struct btree_trans trans; struct btree_iter *iter, *split_iter; @@ -255,7 +256,7 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k) retry: bch2_trans_begin(&trans); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + iter = bch2_trans_get_iter(&trans, btree_id, bkey_start_pos(&k->k), BTREE_ITER_INTENT); @@ -341,22 +342,17 @@ static int bch2_journal_replay(struct bch_fs *c, for_each_journal_key(keys, i) { replay_now_at(j, keys.journal_seq_base + i->journal_seq); - switch (i->btree_id) { - case BTREE_ID_ALLOC: + if (i->btree_id == BTREE_ID_ALLOC) ret = bch2_alloc_replay_key(c, i->k); - break; - case BTREE_ID_EXTENTS: - ret = bch2_extent_replay_key(c, i->k); - break; - default: + else if (btree_node_type_is_extents(i->btree_id)) + ret = bch2_extent_replay_key(c, i->btree_id, i->k); + else ret = bch2_btree_insert(c, i->btree_id, i->k, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW| BTREE_INSERT_JOURNAL_REPLAY| BTREE_INSERT_NOMARK); - break; - } if (ret) { bch_err(c, "journal replay: error %d while replaying key", diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c new file mode 100644 index 000000000000..dcca9c1d0f47 --- /dev/null +++ b/fs/bcachefs/reflink.c @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bcachefs.h" +#include "btree_update.h" +#include "extents.h" +#include "fs.h" +#include "fs-io.h" +#include "reflink.h" + +#include + +/* reflink pointers */ + +const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); + + if (bkey_val_bytes(p.k) != sizeof(*p.v)) + return "incorrect value size"; + + return NULL; +} + +void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) +{ + struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); + + pr_buf(out, "idx %llu", le64_to_cpu(p.v->idx)); +} + +enum merge_result bch2_reflink_p_merge(struct bch_fs *c, + struct bkey_s _l, struct bkey_s _r) +{ + struct bkey_s_reflink_p l = bkey_s_to_reflink_p(_l); + struct bkey_s_reflink_p r = bkey_s_to_reflink_p(_r); + + if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx)) + return BCH_MERGE_NOMERGE; + + if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) { + bch2_key_resize(l.k, KEY_SIZE_MAX); + __bch2_cut_front(l.k->p, _r); + return BCH_MERGE_PARTIAL; + } + + bch2_key_resize(l.k, l.k->size + r.k->size); + + return BCH_MERGE_MERGE; +} + +/* indirect extents */ + +const char *bch2_reflink_v_invalid(const struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k); + + if (bkey_val_bytes(r.k) < sizeof(*r.v)) + return "incorrect value size"; + + return bch2_bkey_ptrs_invalid(c, k); +} + +void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) +{ + struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k); + + pr_buf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount)); + + bch2_bkey_ptrs_to_text(out, c, k); +} + +/* + * bch2_remap_range() depends on bch2_extent_update(), which depends on various + * things tied to the linux vfs for inode updates, for now: + */ +#ifndef NO_BCACHEFS_FS + +static int bch2_make_extent_indirect(struct btree_trans *trans, + struct btree_iter *extent_iter, + struct bkey_i_extent *e) +{ + struct bch_fs *c = trans->c; + struct btree_iter *reflink_iter; + struct bkey_s_c k; + struct bkey_i_reflink_v *r_v; + struct bkey_i_reflink_p *r_p; + int ret; + + for_each_btree_key(trans, reflink_iter, BTREE_ID_REFLINK, + POS(0, c->reflink_hint), + BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) { + if (reflink_iter->pos.inode) { + bch2_btree_iter_set_pos(reflink_iter, POS_MIN); + continue; + } + + if (bkey_deleted(k.k) && e->k.size <= k.k->size) + break; + } + + if (ret) + goto err; + + /* rewind iter to start of hole, if necessary: */ + bch2_btree_iter_set_pos(reflink_iter, bkey_start_pos(k.k)); + + r_v = bch2_trans_kmalloc(trans, sizeof(*r_v) + bkey_val_bytes(&e->k)); + ret = PTR_ERR_OR_ZERO(r_v); + if (ret) + goto err; + + bkey_reflink_v_init(&r_v->k_i); + r_v->k.p = reflink_iter->pos; + bch2_key_resize(&r_v->k, e->k.size); + r_v->k.version = e->k.version; + + set_bkey_val_u64s(&r_v->k, bkey_val_u64s(&r_v->k) + + bkey_val_u64s(&e->k)); + r_v->v.refcount = 0; + memcpy(r_v->v.start, e->v.start, bkey_val_bytes(&e->k)); + + bch2_trans_update(trans, BTREE_INSERT_ENTRY(reflink_iter, &r_v->k_i)); + + r_p = bch2_trans_kmalloc(trans, sizeof(*r_p)); + if (IS_ERR(r_p)) + return PTR_ERR(r_p); + + e->k.type = KEY_TYPE_reflink_p; + r_p = bkey_i_to_reflink_p(&e->k_i); + set_bkey_val_bytes(&r_p->k, sizeof(r_p->v)); + r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k)); + + bch2_trans_update(trans, BTREE_INSERT_ENTRY(extent_iter, &r_p->k_i)); +err: + if (!IS_ERR(reflink_iter)) { + c->reflink_hint = reflink_iter->pos.offset; + bch2_trans_iter_put(trans, reflink_iter); + } + + return ret; +} + +static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) +{ + struct bkey_s_c k = bch2_btree_iter_peek(iter); + + while (1) { + if (bkey_err(k)) + return k; + + if (bkey_cmp(iter->pos, end) >= 0) + return bkey_s_c_null; + + if (k.k->type == KEY_TYPE_extent || + k.k->type == KEY_TYPE_reflink_p) + return k; + + k = bch2_btree_iter_next(iter); + } +} + +s64 bch2_remap_range(struct bch_fs *c, + struct bch_inode_info *dst_inode, + struct bpos dst_start, struct bpos src_start, + u64 remap_sectors, u64 new_i_size) +{ + struct btree_trans trans; + struct btree_iter *dst_iter, *src_iter; + struct bkey_s_c src_k; + BKEY_PADDED(k) new_dst, new_src; + struct bpos dst_end = dst_start, src_end = src_start; + struct bpos dst_want, src_want; + u64 src_done, dst_done; + int ret = 0; + + if (!(c->sb.features & (1ULL << BCH_FEATURE_REFLINK))) { + mutex_lock(&c->sb_lock); + if (!(c->sb.features & (1ULL << BCH_FEATURE_REFLINK))) { + c->disk_sb.sb->features[0] |= + cpu_to_le64(1ULL << BCH_FEATURE_REFLINK); + + bch2_write_super(c); + } + mutex_unlock(&c->sb_lock); + } + + dst_end.offset += remap_sectors; + src_end.offset += remap_sectors; + + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096); + + src_iter = __bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start, + BTREE_ITER_INTENT, 1); + dst_iter = __bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, dst_start, + BTREE_ITER_INTENT, 2); + + while (1) { + bch2_trans_begin_updates(&trans); + trans.mem_top = 0; + + if (fatal_signal_pending(current)) { + ret = -EINTR; + goto err; + } + + src_k = get_next_src(src_iter, src_end); + ret = bkey_err(src_k); + if (ret) + goto btree_err; + + src_done = bpos_min(src_iter->pos, src_end).offset - + src_start.offset; + dst_want = POS(dst_start.inode, dst_start.offset + src_done); + + if (bkey_cmp(dst_iter->pos, dst_want) < 0) { + ret = bch2_fpunch_at(&trans, dst_iter, dst_want, + dst_inode, new_i_size); + if (ret) + goto btree_err; + continue; + } + + BUG_ON(bkey_cmp(dst_iter->pos, dst_want)); + + if (!bkey_cmp(dst_iter->pos, dst_end)) + break; + + if (src_k.k->type == KEY_TYPE_extent) { + bkey_reassemble(&new_src.k, src_k); + src_k = bkey_i_to_s_c(&new_src.k); + + bch2_cut_front(src_iter->pos, &new_src.k); + bch2_cut_back(src_end, &new_src.k.k); + + ret = bch2_make_extent_indirect(&trans, src_iter, + bkey_i_to_extent(&new_src.k)); + if (ret) + goto btree_err; + + BUG_ON(src_k.k->type != KEY_TYPE_reflink_p); + } + + if (src_k.k->type == KEY_TYPE_reflink_p) { + struct bkey_s_c_reflink_p src_p = + bkey_s_c_to_reflink_p(src_k); + struct bkey_i_reflink_p *dst_p = + bkey_reflink_p_init(&new_dst.k); + + u64 offset = le64_to_cpu(src_p.v->idx) + + (src_iter->pos.offset - + bkey_start_offset(src_k.k)); + + dst_p->v.idx = cpu_to_le64(offset); + } else { + BUG(); + } + + new_dst.k.k.p = dst_iter->pos; + bch2_key_resize(&new_dst.k.k, + min(src_k.k->p.offset - src_iter->pos.offset, + dst_end.offset - dst_iter->pos.offset)); + + ret = bch2_extent_update(&trans, dst_inode, NULL, NULL, + dst_iter, &new_dst.k, + new_i_size, false, true, NULL); + if (ret) + goto btree_err; + + dst_done = dst_iter->pos.offset - dst_start.offset; + src_want = POS(src_start.inode, src_start.offset + dst_done); + bch2_btree_iter_set_pos(src_iter, src_want); +btree_err: + if (ret == -EINTR) + ret = 0; + if (ret) + goto err; + } + + BUG_ON(bkey_cmp(dst_iter->pos, dst_end)); +err: + BUG_ON(bkey_cmp(dst_iter->pos, dst_end) > 0); + + dst_done = dst_iter->pos.offset - dst_start.offset; + new_i_size = min(dst_iter->pos.offset << 9, new_i_size); + + ret = bch2_trans_exit(&trans) ?: ret; + + mutex_lock(&dst_inode->ei_update_lock); + if (dst_inode->v.i_size < new_i_size) { + i_size_write(&dst_inode->v, new_i_size); + ret = bch2_write_inode_size(c, dst_inode, new_i_size, + ATTR_MTIME|ATTR_CTIME); + } + mutex_unlock(&dst_inode->ei_update_lock); + + return dst_done ?: ret; +} + +#endif /* NO_BCACHEFS_FS */ diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h new file mode 100644 index 000000000000..327618c36d33 --- /dev/null +++ b/fs/bcachefs/reflink.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_REFLINK_H +#define _BCACHEFS_REFLINK_H + +const char *bch2_reflink_p_invalid(const struct bch_fs *, struct bkey_s_c); +void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, + struct bkey_s_c); +enum merge_result bch2_reflink_p_merge(struct bch_fs *, + struct bkey_s, struct bkey_s); + +#define bch2_bkey_ops_reflink_p (struct bkey_ops) { \ + .key_invalid = bch2_reflink_p_invalid, \ + .val_to_text = bch2_reflink_p_to_text, \ + .key_merge = bch2_reflink_p_merge, \ +} + +const char *bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c); +void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, + struct bkey_s_c); + + +#define bch2_bkey_ops_reflink_v (struct bkey_ops) { \ + .key_invalid = bch2_reflink_v_invalid, \ + .val_to_text = bch2_reflink_v_to_text, \ +} + +#ifndef NO_BCACHEFS_FS +s64 bch2_remap_range(struct bch_fs *, struct bch_inode_info *, + struct bpos, struct bpos, u64, u64); +#endif /* NO_BCACHEFS_FS */ + +#endif /* _BCACHEFS_REFLINK_H */ diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 7a9a7ec26c93..4fb142f3d39c 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -113,6 +113,7 @@ void bch2_bkey_to_replicas(struct bch_replicas_entry *e, extent_to_replicas(k, e); break; case KEY_TYPE_extent: + case KEY_TYPE_reflink_v: e->data_type = BCH_DATA_USER; extent_to_replicas(k, e); break; -- cgit v1.2.3 From 416f6852523d8599713b756b2d2027d2e9f90b3f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 22 Aug 2019 16:30:55 -0400 Subject: bcachefs: Don't flush journal from bch2_vfs_write_inode() It's only updating timestamps, so this doubly doesn't make sense. fsync will flush the journal, if necessary. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index fad019d3c3f5..c4ef172400e1 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1533,12 +1533,6 @@ static int bch2_vfs_write_inode(struct inode *vinode, ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); - if (c->opts.journal_flush_disabled) - return ret; - - if (!ret && wbc->sync_mode == WB_SYNC_ALL) - ret = bch2_journal_flush_seq(&c->journal, inode->ei_journal_seq); - return ret; } @@ -1595,6 +1589,9 @@ static int bch2_sync_fs(struct super_block *sb, int wait) { struct bch_fs *c = sb->s_fs_info; + if (c->opts.journal_flush_disabled) + return 0; + if (!wait) { bch2_journal_flush_async(&c->journal, NULL); return 0; -- cgit v1.2.3 From 3fb5ebcdd4b0599ba8d20a322d322f3a1aaea381 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 22 Aug 2019 16:07:37 -0400 Subject: bcachefs: Inline some fast paths Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 13 ------------- fs/bcachefs/extents.h | 13 ++++++++++++- fs/bcachefs/fs-io.c | 2 +- fs/bcachefs/fs.c | 2 +- fs/bcachefs/io.c | 14 +++++--------- fs/bcachefs/io.h | 13 +++++++++++-- 6 files changed, 30 insertions(+), 27 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 61b5e22f66c8..63afbf24a101 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -806,19 +806,6 @@ bool bch2_cut_back(struct bpos where, struct bkey *k) return true; } -/** - * bch_key_resize - adjust size of @k - * - * bkey_start_offset(k) will be preserved, modifies where the extent ends - */ -void bch2_key_resize(struct bkey *k, - unsigned new_size) -{ - k->p.offset -= k->size; - k->p.offset += new_size; - k->size = new_size; -} - static bool extent_i_save(struct btree *b, struct bkey_packed *dst, struct bkey_i *src) { diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index cef93af25858..4c4a7945a751 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -540,7 +540,18 @@ static inline void bch2_cut_front(struct bpos where, struct bkey_i *k) } bool bch2_cut_back(struct bpos, struct bkey *); -void bch2_key_resize(struct bkey *, unsigned); + +/** + * bch_key_resize - adjust size of @k + * + * bkey_start_offset(k) will be preserved, modifies where the extent ends + */ +static inline void bch2_key_resize(struct bkey *k, unsigned new_size) +{ + k->p.offset -= k->size; + k->p.offset += new_size; + k->size = new_size; +} /* * In extent_sort_fix_overlapping(), insert_fixup_extent(), diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 1873bbb9afda..0dfe822cecbf 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -1036,7 +1036,7 @@ retry: bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - ret = bch2_read_indirect_extent(trans, iter, + ret = bch2_read_indirect_extent(trans, &offset_into_extent, &tmp.k); if (ret) break; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index c4ef172400e1..dcaf1da656d1 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1233,7 +1233,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - ret = bch2_read_indirect_extent(&trans, iter, + ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &cur.k); if (ret) break; diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 117d1faa99f2..844ae46cd7eb 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -1305,7 +1305,7 @@ retry: bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - ret = bch2_read_indirect_extent(&trans, iter, + ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &tmp.k); if (ret) break; @@ -1594,19 +1594,15 @@ static void bch2_read_endio(struct bio *bio) bch2_rbio_punt(rbio, __bch2_read_endio, context, wq); } -int bch2_read_indirect_extent(struct btree_trans *trans, - struct btree_iter *extent_iter, - unsigned *offset_into_extent, - struct bkey_i *orig_k) +int __bch2_read_indirect_extent(struct btree_trans *trans, + unsigned *offset_into_extent, + struct bkey_i *orig_k) { struct btree_iter *iter; struct bkey_s_c k; u64 reflink_offset; int ret; - if (orig_k->k.type != KEY_TYPE_reflink_p) - return 0; - reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k)->v.idx) + *offset_into_extent; @@ -1920,7 +1916,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - ret = bch2_read_indirect_extent(&trans, iter, + ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &tmp.k); if (ret) goto err; diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h index a768ccc90f1f..c6f5ae717cf3 100644 --- a/fs/bcachefs/io.h +++ b/fs/bcachefs/io.h @@ -99,8 +99,17 @@ struct bch_devs_mask; struct cache_promote_op; struct extent_ptr_decoded; -int bch2_read_indirect_extent(struct btree_trans *, struct btree_iter *, - unsigned *, struct bkey_i *); +int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, + struct bkey_i *); + +static inline int bch2_read_indirect_extent(struct btree_trans *trans, + unsigned *offset_into_extent, + struct bkey_i *k) +{ + return k->k.type == KEY_TYPE_reflink_p + ? __bch2_read_indirect_extent(trans, offset_into_extent, k) + : 0; +} enum bch_read_flags { BCH_READ_RETRY_IF_STALE = 1 << 0, -- cgit v1.2.3 From 7d5224fcdc057a42fcd2d19bbc4d9f1c4808a83b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 22 Aug 2019 16:12:28 -0400 Subject: bcachefs: Optimize fiemap Reflink caused fiemap performance to regress badly - this gets us back to where we were. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 58 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 26 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index dcaf1da656d1..ffd9b386a14d 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1203,6 +1203,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct btree_iter *iter; struct bkey_s_c k; BKEY_PADDED(k) cur, prev; + struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); unsigned offset_into_extent, sectors; bool have_extent = false; int ret = 0; @@ -1217,14 +1218,16 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, - POS(ei->v.i_ino, start >> 9), - BTREE_ITER_SLOTS); - - while (bkey_cmp(iter->pos, POS(ei->v.i_ino, (start + len) >> 9)) < 0) { - k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) - goto err; + POS(ei->v.i_ino, start >> 9), 0); +retry: + while ((k = bch2_btree_iter_peek(iter)).k && + !(ret = bkey_err(k)) && + bkey_cmp(iter->pos, end) < 0) { + if (!bkey_extent_is_data(k.k) && + k.k->type != KEY_TYPE_reservation) { + bch2_btree_iter_next(iter); + continue; + } bkey_reassemble(&cur.k, k); k = bkey_i_to_s_c(&cur.k); @@ -1240,34 +1243,37 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, sectors = min(sectors, k.k->size - offset_into_extent); - bch2_cut_front(POS(k.k->p.inode, - bkey_start_offset(k.k) + offset_into_extent), - &cur.k); + if (offset_into_extent) + bch2_cut_front(POS(k.k->p.inode, + bkey_start_offset(k.k) + + offset_into_extent), + &cur.k); bch2_key_resize(&cur.k.k, sectors); cur.k.k.p.offset = iter->pos.offset + cur.k.k.size; - if (bkey_extent_is_data(k.k) || - k.k->type == KEY_TYPE_reservation) { - if (have_extent) { - ret = bch2_fill_extent(c, info, - bkey_i_to_s_c(&prev.k), 0); - if (ret) - break; - } - - bkey_copy(&prev.k, &cur.k); - have_extent = true; + if (have_extent) { + ret = bch2_fill_extent(c, info, + bkey_i_to_s_c(&prev.k), 0); + if (ret) + break; } - bch2_btree_iter_set_pos(iter, - POS(iter->pos.inode, - iter->pos.offset + sectors)); + bkey_copy(&prev.k, &cur.k); + have_extent = true; + + if (k.k->type == KEY_TYPE_reflink_v) + bch2_btree_iter_set_pos(iter, k.k->p); + else + bch2_btree_iter_next(iter); } + if (ret == -EINTR) + goto retry; + if (!ret && have_extent) ret = bch2_fill_extent(c, info, bkey_i_to_s_c(&prev.k), FIEMAP_EXTENT_LAST); -err: + ret = bch2_trans_exit(&trans) ?: ret; return ret < 0 ? ret : 0; } -- cgit v1.2.3 From 05cf02b5a10ae9b60aad4b1fe4049eb4e7603b4f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 28 Aug 2019 12:41:45 -0400 Subject: bcachefs: Fix fiemap (again) when iterating over reflink pointers, we use the key we just emitted to set the iterator position - which means we have to be setting the key's inode field as well Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index ffd9b386a14d..0ba498505b07 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1249,7 +1249,8 @@ retry: offset_into_extent), &cur.k); bch2_key_resize(&cur.k.k, sectors); - cur.k.k.p.offset = iter->pos.offset + cur.k.k.size; + cur.k.k.p = iter->pos; + cur.k.k.p.offset += cur.k.k.size; if (have_extent) { ret = bch2_fill_extent(c, info, -- cgit v1.2.3 From b43a0f60a61e8e0adea6b1b9adc9a97600fc2f00 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 25 Sep 2019 16:19:52 -0400 Subject: bcachefs: Cleanup i_nlink handling Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 22 ++++------------------ fs/bcachefs/fs.h | 5 ----- fs/bcachefs/fsck.c | 13 ++----------- fs/bcachefs/inode.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 34 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 0ba498505b07..b9a20bb19b58 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -131,9 +131,7 @@ void bch2_inode_update_after_write(struct bch_fs *c, struct bch_inode_unpacked *bi, unsigned fields) { - set_nlink(&inode->v, bi->bi_flags & BCH_INODE_UNLINKED - ? 0 - : bi->bi_nlink + nlink_bias(inode->v.i_mode)); + set_nlink(&inode->v, bch2_inode_nlink_get(bi)); i_uid_write(&inode->v, bi->bi_uid); i_gid_write(&inode->v, bi->bi_gid); inode->v.i_mode = bi->bi_mode; @@ -552,12 +550,7 @@ static int inode_update_for_link_fn(struct bch_inode_info *inode, struct bch_fs *c = inode->v.i_sb->s_fs_info; bi->bi_ctime = bch2_current_time(c); - - if (bi->bi_flags & BCH_INODE_UNLINKED) - bi->bi_flags &= ~BCH_INODE_UNLINKED; - else - bi->bi_nlink++; - + bch2_inode_nlink_inc(bi); return 0; } @@ -640,11 +633,7 @@ static int inode_update_for_unlink_fn(struct bch_inode_info *inode, struct bch_fs *c = inode->v.i_sb->s_fs_info; bi->bi_ctime = bch2_current_time(c); - if (bi->bi_nlink) - bi->bi_nlink--; - else - bi->bi_flags |= BCH_INODE_UNLINKED; - + bch2_inode_nlink_dec(bi); return 0; } @@ -815,10 +804,7 @@ static int inode_update_for_rename_fn(struct bch_inode_info *inode, BUG_ON(bi->bi_nlink && S_ISDIR(info->dst_inode->v.i_mode)); - if (bi->bi_nlink) - bi->bi_nlink--; - else - bi->bi_flags |= BCH_INODE_UNLINKED; + bch2_inode_nlink_dec(bi); } if (inode == info->src_dir || diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 6edf5dd803f0..04ac5b4129a4 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -109,11 +109,6 @@ static inline u8 mode_to_type(umode_t mode) return (mode >> 12) & 15; } -static inline unsigned nlink_bias(umode_t mode) -{ - return S_ISDIR(mode) ? 2 : 1; -} - static inline bool inode_attr_changing(struct bch_inode_info *dir, struct bch_inode_info *inode, enum inode_opt_id id) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 50a7d8c1faba..162563b809fb 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1116,9 +1116,7 @@ static int check_inode_nlink(struct bch_fs *c, struct nlink *link, bool *do_update) { - u32 i_nlink = u->bi_flags & BCH_INODE_UNLINKED - ? 0 - : u->bi_nlink + nlink_bias(u->bi_mode); + u32 i_nlink = bch2_inode_nlink_get(u); u32 real_i_nlink = link->count * nlink_bias(u->bi_mode) + link->dir_count; @@ -1197,14 +1195,7 @@ static int check_inode_nlink(struct bch_fs *c, u->bi_inum, i_nlink, real_i_nlink); set_i_nlink: if (i_nlink != real_i_nlink) { - if (real_i_nlink) { - u->bi_nlink = real_i_nlink - nlink_bias(u->bi_mode); - u->bi_flags &= ~BCH_INODE_UNLINKED; - } else { - u->bi_nlink = 0; - u->bi_flags |= BCH_INODE_UNLINKED; - } - + bch2_inode_nlink_set(u, real_i_nlink); *do_update = true; } fsck_err: diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index af0c355f2f04..e88ec78071bd 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -103,6 +103,49 @@ static inline u64 bch2_inode_opt_get(struct bch_inode_unpacked *inode, } } +/* i_nlink: */ + +static inline unsigned nlink_bias(umode_t mode) +{ + return S_ISDIR(mode) ? 2 : 1; +} + +static inline void bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) +{ + if (bi->bi_flags & BCH_INODE_UNLINKED) + bi->bi_flags &= ~BCH_INODE_UNLINKED; + else + bi->bi_nlink++; +} + +static inline void bch2_inode_nlink_dec(struct bch_inode_unpacked *bi) +{ + BUG_ON(bi->bi_flags & BCH_INODE_UNLINKED); + if (bi->bi_nlink) + bi->bi_nlink--; + else + bi->bi_flags |= BCH_INODE_UNLINKED; +} + +static inline unsigned bch2_inode_nlink_get(struct bch_inode_unpacked *bi) +{ + return bi->bi_flags & BCH_INODE_UNLINKED + ? 0 + : bi->bi_nlink + nlink_bias(bi->bi_mode); +} + +static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi, + unsigned nlink) +{ + if (nlink) { + bi->bi_nlink = nlink - nlink_bias(bi->bi_mode); + bi->bi_flags &= ~BCH_INODE_UNLINKED; + } else { + bi->bi_nlink = 0; + bi->bi_flags |= BCH_INODE_UNLINKED; + } +} + #ifdef CONFIG_BCACHEFS_DEBUG void bch2_inode_pack_test(void); #else -- cgit v1.2.3 From a7199432c3cbcd42141cfd5c047bf8828c2390d8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 22 Sep 2019 18:49:16 -0400 Subject: bcachefs: Kill deferred btree updates Will be replaced by cached btree iterators Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 2 +- fs/bcachefs/alloc_background.c | 4 +- fs/bcachefs/btree_types.h | 20 ----- fs/bcachefs/btree_update.h | 43 ++-------- fs/bcachefs/btree_update_leaf.c | 178 ++++++---------------------------------- fs/bcachefs/buckets.c | 8 +- fs/bcachefs/dirent.c | 9 +- fs/bcachefs/ec.c | 6 +- fs/bcachefs/fs-io.c | 78 +++++------------- fs/bcachefs/fs.c | 42 +++------- fs/bcachefs/fs.h | 1 - fs/bcachefs/fsck.c | 7 +- fs/bcachefs/inode.c | 6 +- fs/bcachefs/io.c | 5 +- fs/bcachefs/migrate.c | 5 +- fs/bcachefs/move.c | 3 +- fs/bcachefs/opts.h | 8 +- fs/bcachefs/quota.c | 2 +- fs/bcachefs/recovery.c | 2 +- fs/bcachefs/reflink.c | 4 +- fs/bcachefs/str_hash.h | 4 +- fs/bcachefs/tests.c | 10 +-- 22 files changed, 99 insertions(+), 348 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 1c3343252129..5a4263806610 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -378,7 +378,7 @@ int bch2_acl_chmod(struct btree_trans *trans, } new->k.p = iter->pos; - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &new->k_i)); + bch2_trans_update(trans, iter, &new->k_i); *new_acl = acl; acl = NULL; err: diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 85795b580892..81418d534d70 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -311,7 +311,7 @@ retry: a->k.p = iter->pos; bch2_alloc_pack(a, new_u); - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i)); + bch2_trans_update(trans, iter, &a->k_i); ret = bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| @@ -899,7 +899,7 @@ retry: a->k.p = iter->pos; bch2_alloc_pack(a, u); - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i)); + bch2_trans_update(trans, iter, &a->k_i); /* * XXX: diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 299d1173df62..c128ff393f0c 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -246,29 +246,9 @@ static inline enum btree_iter_type btree_iter_type(struct btree_iter *iter) return iter->flags & BTREE_ITER_TYPE; } -struct deferred_update { - struct journal_preres res; - struct journal_entry_pin journal; - - spinlock_t lock; - unsigned dirty:1; - - u8 allocated_u64s; - enum btree_id btree_id; - - /* must be last: */ - struct bkey_i k; -}; - struct btree_insert_entry { struct bkey_i *k; - - union { struct btree_iter *iter; - struct deferred_update *d; - }; - - bool deferred; }; #define BTREE_ITER_MAX 64 diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 36e34b3d9213..0e985c1f0100 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -15,24 +15,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *, void bch2_btree_journal_key(struct btree_trans *, struct btree_iter *, struct bkey_i *); -void bch2_deferred_update_free(struct bch_fs *, - struct deferred_update *); -struct deferred_update * -bch2_deferred_update_alloc(struct bch_fs *, enum btree_id, unsigned); - -#define BTREE_INSERT_ENTRY(_iter, _k) \ - ((struct btree_insert_entry) { \ - .iter = (_iter), \ - .k = (_k), \ - }) - -#define BTREE_INSERT_DEFERRED(_d, _k) \ - ((struct btree_insert_entry) { \ - .k = (_k), \ - .d = (_d), \ - .deferred = true, \ - }) - enum { __BTREE_INSERT_ATOMIC, __BTREE_INSERT_NOUNLOCK, @@ -120,11 +102,14 @@ int bch2_trans_commit(struct btree_trans *, u64 *, unsigned); static inline void bch2_trans_update(struct btree_trans *trans, - struct btree_insert_entry entry) + struct btree_iter *iter, + struct bkey_i *k) { EBUG_ON(trans->nr_updates >= trans->nr_iters + 4); - trans->updates[trans->nr_updates++] = entry; + trans->updates[trans->nr_updates++] = (struct btree_insert_entry) { + .iter = iter, .k = k + }; } #define bch2_trans_do(_c, _journal_seq, _flags, _do) \ @@ -145,23 +130,9 @@ static inline void bch2_trans_update(struct btree_trans *trans, _ret; \ }) -#define __trans_next_update(_trans, _i, _filter) \ -({ \ - while ((_i) < (_trans)->updates + (_trans->nr_updates) && !(_filter))\ - (_i)++; \ - \ - (_i) < (_trans)->updates + (_trans->nr_updates); \ -}) - -#define __trans_for_each_update(_trans, _i, _filter) \ +#define trans_for_each_update(_trans, _i) \ for ((_i) = (_trans)->updates; \ - __trans_next_update(_trans, _i, _filter); \ + (_i) < (_trans)->updates + (_trans)->nr_updates; \ (_i)++) -#define trans_for_each_update(trans, i) \ - __trans_for_each_update(trans, i, true) - -#define trans_for_each_update_iter(trans, i) \ - __trans_for_each_update(trans, i, !(i)->deferred) - #endif /* _BCACHEFS_BTREE_UPDATE_H */ diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index a0a59cd496a3..2e9271759447 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -28,8 +28,7 @@ static inline bool same_leaf_as_prev(struct btree_trans *trans, ? trans->updates + trans->updates_sorted[sorted_idx - 1] : NULL; - return !i->deferred && - prev && + return prev && i->iter->l[0].b == prev->iter->l[0].b; } @@ -73,13 +72,6 @@ static void btree_trans_lock_write(struct btree_trans *trans, bool lock) } } -static inline int btree_trans_cmp(struct btree_insert_entry l, - struct btree_insert_entry r) -{ - return cmp_int(l.deferred, r.deferred) ?: - btree_iter_cmp(l.iter, r.iter); -} - static inline void btree_trans_sort_updates(struct btree_trans *trans) { struct btree_insert_entry *l, *r; @@ -89,7 +81,7 @@ static inline void btree_trans_sort_updates(struct btree_trans *trans) for (pos = 0; pos < nr; pos++) { r = trans->updates + trans->updates_sorted[pos]; - if (btree_trans_cmp(*l, *r) <= 0) + if (btree_iter_cmp(l->iter, r->iter) <= 0) break; } @@ -312,143 +304,23 @@ static void btree_insert_key_leaf(struct btree_trans *trans, trace_btree_insert_key(c, b, insert->k); } -/* Deferred btree updates: */ - -static void deferred_update_flush(struct journal *j, - struct journal_entry_pin *pin, - u64 seq) -{ - struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct deferred_update *d = - container_of(pin, struct deferred_update, journal); - struct journal_preres res = { 0 }; - u64 tmp[32]; - struct bkey_i *k = (void *) tmp; - int ret; - - if (d->allocated_u64s > ARRAY_SIZE(tmp)) { - k = kmalloc(d->allocated_u64s * sizeof(u64), GFP_NOFS); - - BUG_ON(!k); /* XXX */ - } - - spin_lock(&d->lock); - if (d->dirty) { - BUG_ON(jset_u64s(d->k.k.u64s) > d->res.u64s); - - swap(res, d->res); - - BUG_ON(d->k.k.u64s > d->allocated_u64s); - - bkey_copy(k, &d->k); - d->dirty = false; - spin_unlock(&d->lock); - - ret = bch2_btree_insert(c, d->btree_id, k, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE| - BTREE_INSERT_JOURNAL_RESERVED); - bch2_fs_fatal_err_on(ret && !bch2_journal_error(j), - c, "error flushing deferred btree update: %i", ret); - - spin_lock(&d->lock); - } - - if (!d->dirty) - bch2_journal_pin_drop(j, &d->journal); - spin_unlock(&d->lock); - - bch2_journal_preres_put(j, &res); - if (k != (void *) tmp) - kfree(k); -} - -static void btree_insert_key_deferred(struct btree_trans *trans, - struct btree_insert_entry *insert) -{ - struct bch_fs *c = trans->c; - struct journal *j = &c->journal; - struct deferred_update *d = insert->d; - int difference; - - BUG_ON(trans->flags & BTREE_INSERT_JOURNAL_REPLAY); - BUG_ON(insert->k->u64s > d->allocated_u64s); - - __btree_journal_key(trans, d->btree_id, insert->k); - - spin_lock(&d->lock); - BUG_ON(jset_u64s(insert->k->u64s) > - trans->journal_preres.u64s); - - difference = jset_u64s(insert->k->u64s) - d->res.u64s; - if (difference > 0) { - trans->journal_preres.u64s -= difference; - d->res.u64s += difference; - } - - bkey_copy(&d->k, insert->k); - d->dirty = true; - - bch2_journal_pin_update(j, trans->journal_res.seq, &d->journal, - deferred_update_flush); - spin_unlock(&d->lock); -} - -void bch2_deferred_update_free(struct bch_fs *c, - struct deferred_update *d) -{ - deferred_update_flush(&c->journal, &d->journal, 0); - - BUG_ON(journal_pin_active(&d->journal)); - - bch2_journal_pin_flush(&c->journal, &d->journal); - kfree(d); -} - -struct deferred_update * -bch2_deferred_update_alloc(struct bch_fs *c, - enum btree_id btree_id, - unsigned u64s) -{ - struct deferred_update *d; - - BUG_ON(u64s > U8_MAX); - - d = kmalloc(offsetof(struct deferred_update, k) + - u64s * sizeof(u64), GFP_NOFS); - BUG_ON(!d); - - memset(d, 0, offsetof(struct deferred_update, k)); - - spin_lock_init(&d->lock); - d->allocated_u64s = u64s; - d->btree_id = btree_id; - - return d; -} - /* Normal update interface: */ static inline void btree_insert_entry_checks(struct btree_trans *trans, struct btree_insert_entry *i) { struct bch_fs *c = trans->c; - enum btree_id btree_id = !i->deferred - ? i->iter->btree_id - : i->d->btree_id; - - if (!i->deferred) { - BUG_ON(i->iter->level); - BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); - EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) && - bkey_cmp(i->k->k.p, i->iter->l[0].b->key.k.p) > 0); - EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) && - !(trans->flags & BTREE_INSERT_ATOMIC)); - } + + BUG_ON(i->iter->level); + BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); + EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) && + bkey_cmp(i->k->k.p, i->iter->l[0].b->key.k.p) > 0); + EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) && + !(trans->flags & BTREE_INSERT_ATOMIC)); BUG_ON(debug_check_bkeys(c) && !bkey_deleted(&i->k->k) && - bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), btree_id)); + bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->iter->btree_id)); } static int bch2_trans_journal_preres_get(struct btree_trans *trans) @@ -459,7 +331,7 @@ static int bch2_trans_journal_preres_get(struct btree_trans *trans) int ret; trans_for_each_update(trans, i) - if (i->deferred) + if (0) u64s += jset_u64s(i->k->k.u64s); if (!u64s) @@ -551,10 +423,7 @@ static int btree_trans_check_can_insert(struct btree_trans *trans, static inline void do_btree_insert_one(struct btree_trans *trans, struct btree_insert_entry *insert) { - if (likely(!insert->deferred)) - btree_insert_key_leaf(trans, insert); - else - btree_insert_key_deferred(trans, insert); + btree_insert_key_leaf(trans, insert); } static inline bool update_triggers_transactional(struct btree_trans *trans, @@ -570,7 +439,6 @@ static inline bool update_has_triggers(struct btree_trans *trans, struct btree_insert_entry *i) { return likely(!(trans->flags & BTREE_INSERT_NOMARK)) && - !i->deferred && btree_node_type_needs_gc(i->iter->btree_id); } @@ -588,14 +456,14 @@ static inline int do_btree_insert_at(struct btree_trans *trans, : 0; int ret; - trans_for_each_update_iter(trans, i) + trans_for_each_update(trans, i) BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK); /* * note: running triggers will append more updates to the list of * updates as we're walking it: */ - trans_for_each_update_iter(trans, i) + trans_for_each_update(trans, i) if (update_has_triggers(trans, i) && update_triggers_transactional(trans, i)) { ret = bch2_trans_mark_update(trans, i->iter, i->k); @@ -633,7 +501,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, if (ret) goto out; - trans_for_each_update_iter(trans, i) { + trans_for_each_update(trans, i) { if (!btree_node_type_needs_gc(i->iter->btree_id)) continue; @@ -673,7 +541,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, i->k->k.version = MAX_VERSION; } - trans_for_each_update_iter(trans, i) + trans_for_each_update(trans, i) if (update_has_triggers(trans, i) && !update_triggers_transactional(trans, i)) bch2_mark_update(trans, i, &fs_usage->u, mark_flags); @@ -687,7 +555,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, if (likely(!(trans->flags & BTREE_INSERT_NOMARK)) && unlikely(c->gc_pos.phase)) - trans_for_each_update_iter(trans, i) + trans_for_each_update(trans, i) if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b))) bch2_mark_update(trans, i, NULL, mark_flags| @@ -772,7 +640,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, case BTREE_INSERT_NEED_MARK_REPLICAS: bch2_trans_unlock(trans); - trans_for_each_update_iter(trans, i) { + trans_for_each_update(trans, i) { ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k)); if (ret) return ret; @@ -842,7 +710,7 @@ static int __bch2_trans_commit(struct btree_trans *trans, unsigned iter; int ret; - trans_for_each_update_iter(trans, i) { + trans_for_each_update(trans, i) { if (!bch2_btree_iter_upgrade(i->iter, 1)) { trace_trans_restart_upgrade(trans->ip); ret = -EINTR; @@ -868,7 +736,7 @@ static int __bch2_trans_commit(struct btree_trans *trans, trans->nounlock = false; - trans_for_each_update_iter(trans, i) + trans_for_each_update(trans, i) bch2_btree_iter_downgrade(i->iter); err: /* make sure we didn't drop or screw up locks: */ @@ -995,7 +863,7 @@ retry: iter = bch2_trans_get_iter(&trans, id, bkey_start_pos(&k->k), BTREE_ITER_INTENT); - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, k)); + bch2_trans_update(&trans, iter, k); ret = bch2_trans_commit(&trans, disk_res, journal_seq, flags); if (ret == -EINTR) @@ -1045,7 +913,7 @@ retry: break; } - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &delete)); + bch2_trans_update(trans, iter, &delete); ret = bch2_trans_commit(trans, NULL, journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL); @@ -1072,7 +940,7 @@ int bch2_btree_delete_at(struct btree_trans *trans, bkey_init(&k.k); k.k.p = iter->pos; - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &k)); + bch2_trans_update(trans, iter, &k); return bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE|flags); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 637a9e909f82..9c97a1522d9d 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1316,7 +1316,7 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans, bch_err(c, "disk usage increased more than %llu sectors reserved", disk_res_sectors); - trans_for_each_update_iter(trans, i) { + trans_for_each_update(trans, i) { struct btree_iter *iter = i->iter; struct btree *b = iter->l[0].b; struct btree_node_iter node_iter = iter->l[0].iter; @@ -1358,7 +1358,7 @@ static int trans_get_key(struct btree_trans *trans, struct btree_insert_entry *i; int ret; - trans_for_each_update_iter(trans, i) + trans_for_each_update(trans, i) if (i->iter->btree_id == btree_id && (btree_node_type_is_extents(btree_id) ? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 && @@ -1397,13 +1397,13 @@ static void *trans_update_key(struct btree_trans *trans, bkey_init(&new_k->k); new_k->k.p = iter->pos; - trans_for_each_update_iter(trans, i) + trans_for_each_update(trans, i) if (i->iter == iter) { i->k = new_k; return new_k; } - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, new_k)); + bch2_trans_update(trans, iter, new_k); return new_k; } diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 1442dacef0de..38dd96808e90 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -255,9 +255,8 @@ int bch2_dirent_rename(struct btree_trans *trans, * new_dst at the src position: */ new_dst->k.p = src_iter->pos; - bch2_trans_update(trans, - BTREE_INSERT_ENTRY(src_iter, - &new_dst->k_i)); + bch2_trans_update(trans, src_iter, + &new_dst->k_i); return 0; } else { /* If we're overwriting, we can't insert new_dst @@ -280,8 +279,8 @@ int bch2_dirent_rename(struct btree_trans *trans, } } - bch2_trans_update(trans, BTREE_INSERT_ENTRY(src_iter, &new_src->k_i)); - bch2_trans_update(trans, BTREE_INSERT_ENTRY(dst_iter, &new_dst->k_i)); + bch2_trans_update(trans, src_iter, &new_src->k_i); + bch2_trans_update(trans, dst_iter, &new_dst->k_i); return 0; } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 5b61e9cb1ac3..155e7c9bd89f 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -738,7 +738,7 @@ found_slot: stripe->k.p = iter->pos; - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &stripe->k_i)); + bch2_trans_update(&trans, iter, &stripe->k_i); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| @@ -819,7 +819,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, extent_stripe_ptr_add(e, s, ptr, idx); - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &tmp.k)); + bch2_trans_update(&trans, iter, &tmp.k); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| @@ -1231,7 +1231,7 @@ static int __bch2_stripe_write_key(struct btree_trans *trans, spin_unlock(&c->ec_stripes_heap_lock); - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &new_key->k_i)); + bch2_trans_update(trans, iter, &new_key->k_i); return bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL|flags); diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 18356cbe0794..da4976344d49 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -324,69 +324,36 @@ int bch2_extent_update(struct btree_trans *trans, if (!may_allocate && allocating) return -ENOSPC; - bch2_trans_update(trans, BTREE_INSERT_ENTRY(extent_iter, k)); + bch2_trans_update(trans, extent_iter, k); new_i_size = min(k->k.p.offset << 9, new_i_size); /* XXX: inode->i_size locking */ if (i_sectors_delta || new_i_size > inode->ei_inode.bi_size) { - if (c->opts.new_inode_updates) { - bch2_trans_unlock(trans); - mutex_lock(&inode->ei_update_lock); - - if (!bch2_trans_relock(trans)) { - mutex_unlock(&inode->ei_update_lock); - return -EINTR; - } - - inode_locked = true; - - if (!inode->ei_inode_update) - inode->ei_inode_update = - bch2_deferred_update_alloc(c, - BTREE_ID_INODES, 64); - - inode_u = inode->ei_inode; - inode_u.bi_sectors += i_sectors_delta; - - /* XXX: this is slightly suspect */ - if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) && - new_i_size > inode_u.bi_size) { - inode_u.bi_size = new_i_size; - extended = true; - } - - bch2_inode_pack(&inode_p, &inode_u); - bch2_trans_update(trans, - BTREE_INSERT_DEFERRED(inode->ei_inode_update, - &inode_p.inode.k_i)); - } else { - inode_iter = bch2_trans_get_iter(trans, - BTREE_ID_INODES, - POS(k->k.p.inode, 0), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - if (IS_ERR(inode_iter)) - return PTR_ERR(inode_iter); - - ret = bch2_btree_iter_traverse(inode_iter); - if (ret) - goto err; + inode_iter = bch2_trans_get_iter(trans, + BTREE_ID_INODES, + POS(k->k.p.inode, 0), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + if (IS_ERR(inode_iter)) + return PTR_ERR(inode_iter); - inode_u = inode->ei_inode; - inode_u.bi_sectors += i_sectors_delta; + ret = bch2_btree_iter_traverse(inode_iter); + if (ret) + goto err; - /* XXX: this is slightly suspect */ - if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) && - new_i_size > inode_u.bi_size) { - inode_u.bi_size = new_i_size; - extended = true; - } + inode_u = inode->ei_inode; + inode_u.bi_sectors += i_sectors_delta; - bch2_inode_pack(&inode_p, &inode_u); - bch2_trans_update(trans, - BTREE_INSERT_ENTRY(inode_iter, &inode_p.inode.k_i)); + /* XXX: this is slightly suspect */ + if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) && + new_i_size > inode_u.bi_size) { + inode_u.bi_size = new_i_size; + extended = true; } + + bch2_inode_pack(&inode_p, &inode_u); + bch2_trans_update(trans, inode_iter, &inode_p.inode.k_i); } ret = bch2_trans_commit(trans, disk_res, @@ -2793,9 +2760,8 @@ reassemble: bkey_start_pos(&delete.k)); } - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(dst, ©.k)); - bch2_trans_update(&trans, - BTREE_INSERT_ENTRY(del ?: src, &delete)); + bch2_trans_update(&trans, dst, ©.k); + bch2_trans_update(&trans, del ?: src, &delete); if (copy.k.k.size == k.k->size) { /* diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b9a20bb19b58..166d94e5e59d 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -154,30 +154,22 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, inode_set_fn set, void *p) { - struct bch_fs *c = trans->c; struct btree_iter *iter = NULL; struct bkey_inode_buf *inode_p; int ret; lockdep_assert_held(&inode->ei_update_lock); - if (c->opts.new_inode_updates) { - /* XXX: Don't do this with btree locks held */ - if (!inode->ei_inode_update) - inode->ei_inode_update = - bch2_deferred_update_alloc(c, BTREE_ID_INODES, 64); - } else { - iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, - POS(inode->v.i_ino, 0), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - if (IS_ERR(iter)) - return PTR_ERR(iter); - - /* The btree node lock is our lock on the inode: */ - ret = bch2_btree_iter_traverse(iter); - if (ret) - return ret; - } + iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, + POS(inode->v.i_ino, 0), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + if (IS_ERR(iter)) + return PTR_ERR(iter); + + /* The btree node lock is our lock on the inode: */ + ret = bch2_btree_iter_traverse(iter); + if (ret) + return ret; *inode_u = inode->ei_inode; @@ -192,14 +184,7 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, return PTR_ERR(inode_p); bch2_inode_pack(inode_p, inode_u); - - if (!inode->ei_inode_update) - bch2_trans_update(trans, - BTREE_INSERT_ENTRY(iter, &inode_p->inode.k_i)); - else - bch2_trans_update(trans, - BTREE_INSERT_DEFERRED(inode->ei_inode_update, - &inode_p->inode.k_i)); + bch2_trans_update(trans, iter, &inode_p->inode.k_i); return 0; } @@ -1482,7 +1467,6 @@ static struct inode *bch2_alloc_inode(struct super_block *sb) mutex_init(&inode->ei_update_lock); pagecache_lock_init(&inode->ei_pagecache_lock); mutex_init(&inode->ei_quota_lock); - inode->ei_inode_update = NULL; inode->ei_journal_seq = 0; return &inode->v; @@ -1540,10 +1524,6 @@ static void bch2_evict_inode(struct inode *vinode) BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved); - if (inode->ei_inode_update) - bch2_deferred_update_free(c, inode->ei_inode_update); - inode->ei_inode_update = NULL; - if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) { bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks), KEY_TYPE_QUOTA_WARN); diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 04ac5b4129a4..c3ee9c17064f 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -34,7 +34,6 @@ struct bch_inode_info { struct inode v; struct mutex ei_update_lock; - struct deferred_update *ei_inode_update; u64 ei_journal_seq; u64 ei_quota_reserved; unsigned long ei_last_dirtied; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index b806284c0517..c5540536f47c 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -393,7 +393,7 @@ static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h, if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)", buf, strlen(buf), d->v.d_name, len)) { - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &d->k_i)); + bch2_trans_update(trans, iter, &d->k_i); ret = bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL| @@ -663,8 +663,7 @@ retry: bkey_reassemble(&n->k_i, d.s_c); n->v.d_type = mode_to_type(target.bi_mode); - bch2_trans_update(&trans, - BTREE_INSERT_ENTRY(iter, &n->k_i)); + bch2_trans_update(&trans, iter, &n->k_i); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| @@ -1293,7 +1292,7 @@ static int check_inode(struct btree_trans *trans, struct bkey_inode_buf p; bch2_inode_pack(&p, &u); - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &p.inode.k_i)); + bch2_trans_update(trans, iter, &p.inode.k_i); ret = bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL| diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 0fb08a396d62..f192536558c1 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -345,8 +345,7 @@ again: inode_u->bi_generation = bkey_generation(k); bch2_inode_pack(inode_p, inode_u); - bch2_trans_update(trans, - BTREE_INSERT_ENTRY(iter, &inode_p->inode.k_i)); + bch2_trans_update(trans, iter, &inode_p->inode.k_i); return 0; } } @@ -435,8 +434,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) delete.v.bi_generation = cpu_to_le32(bi_generation); } - bch2_trans_update(&trans, - BTREE_INSERT_ENTRY(iter, &delete.k_i)); + bch2_trans_update(&trans, iter, &delete.k_i); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 07fe6b5cd517..690f9b2dbb98 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -290,8 +290,7 @@ retry: if (ret) break; - bch2_trans_update(&trans, - BTREE_INSERT_ENTRY(iter, &split.k)); + bch2_trans_update(&trans, iter, &split.k); ret = bch2_trans_commit(&trans, &op->res, op_journal_seq(op), BTREE_INSERT_NOFAIL| @@ -1445,7 +1444,7 @@ retry: if (!bch2_bkey_narrow_crcs(&new.k, new_crc)) goto out; - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &new.k)); + bch2_trans_update(&trans, iter, &new.k); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index dc3b03d6e627..de8522f754e2 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -72,10 +72,9 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags */ bch2_extent_normalize(c, bkey_i_to_s(&tmp.key)); - /* XXX not sketchy at all */ - iter->pos = bkey_start_pos(&tmp.key.k); + bch2_btree_iter_set_pos(iter, bkey_start_pos(&tmp.key.k)); - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &tmp.key)); + bch2_trans_update(&trans, iter, &tmp.key); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 8855dd19f7f2..2f0bdfbfcd61 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -147,8 +147,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) goto next; } - bch2_trans_update(&trans, - BTREE_INSERT_ENTRY(iter, insert)); + bch2_trans_update(&trans, iter, insert); ret = bch2_trans_commit(&trans, &op->res, op_journal_seq(op), diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index d44bfe90c0d5..d9325d4bc024 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -295,13 +295,7 @@ enum opt_type { OPT_UINT(0, BCH_REPLICAS_MAX), \ NO_SB_OPT, 1, \ "n", "Data written to this device will be considered\n"\ - "to have already been replicated n times") \ - x(new_inode_updates, u8, \ - OPT_MOUNT, \ - OPT_BOOL(), \ - NO_SB_OPT, false, \ - NULL, "Enable new btree write-cache for inode updates") - + "to have already been replicated n times") struct bch_opts { #define x(_name, _bits, ...) unsigned _name##_defined:1; diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index f0da0fac09bf..0fa6f33c049b 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -752,7 +752,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, if (qdq->d_fieldmask & QC_INO_HARD) new_quota.v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit); - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &new_quota.k_i)); + bch2_trans_update(&trans, iter, &new_quota.k_i); ret = bch2_trans_commit(&trans, NULL, NULL, 0); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 98d9a1432e50..2e880955a07c 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -301,7 +301,7 @@ retry: bch2_cut_front(split_iter->pos, split); bch2_cut_back(atomic_end, &split->k); - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(split_iter, split)); + bch2_trans_update(&trans, split_iter, split); bch2_btree_iter_set_pos(iter, split->k.p); } while (bkey_cmp(iter->pos, k->k.p) < 0); diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index dcca9c1d0f47..c08b57634abd 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -120,7 +120,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, r_v->v.refcount = 0; memcpy(r_v->v.start, e->v.start, bkey_val_bytes(&e->k)); - bch2_trans_update(trans, BTREE_INSERT_ENTRY(reflink_iter, &r_v->k_i)); + bch2_trans_update(trans, reflink_iter, &r_v->k_i); r_p = bch2_trans_kmalloc(trans, sizeof(*r_p)); if (IS_ERR(r_p)) @@ -131,7 +131,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, set_bkey_val_bytes(&r_p->k, sizeof(r_p->v)); r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k)); - bch2_trans_update(trans, BTREE_INSERT_ENTRY(extent_iter, &r_p->k_i)); + bch2_trans_update(trans, extent_iter, &r_p->k_i); err: if (!IS_ERR(reflink_iter)) { c->reflink_hint = reflink_iter->pos.offset; diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 31b278e71051..886f1bc8aa14 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -267,7 +267,7 @@ not_found: } insert->k.p = iter->pos; - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, insert)); + bch2_trans_update(trans, iter, insert); bch2_trans_iter_free_on_commit(trans, iter); } @@ -295,7 +295,7 @@ int bch2_hash_delete_at(struct btree_trans *trans, delete->k.p = iter->pos; delete->k.type = ret ? KEY_TYPE_whiteout : KEY_TYPE_deleted; - bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, delete)); + bch2_trans_update(trans, iter, delete); return 0; } diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 92843bd09b04..a2092bb99095 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -43,7 +43,7 @@ static void test_delete(struct bch_fs *c, u64 nr) ret = bch2_btree_iter_traverse(iter); BUG_ON(ret); - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &k.k_i)); + bch2_trans_update(&trans, iter, &k.k_i); ret = bch2_trans_commit(&trans, NULL, NULL, 0); BUG_ON(ret); @@ -75,7 +75,7 @@ static void test_delete_written(struct bch_fs *c, u64 nr) ret = bch2_btree_iter_traverse(iter); BUG_ON(ret); - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &k.k_i)); + bch2_trans_update(&trans, iter, &k.k_i); ret = bch2_trans_commit(&trans, NULL, NULL, 0); BUG_ON(ret); @@ -465,7 +465,7 @@ static void rand_mixed(struct bch_fs *c, u64 nr) bkey_cookie_init(&k.k_i); k.k.p = iter->pos; - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &k.k_i)); + bch2_trans_update(&trans, iter, &k.k_i); ret = bch2_trans_commit(&trans, NULL, NULL, 0); BUG_ON(ret); } @@ -509,7 +509,7 @@ static void seq_insert(struct bch_fs *c, u64 nr) BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { insert.k.p = iter->pos; - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &insert.k_i)); + bch2_trans_update(&trans, iter, &insert.k_i); ret = bch2_trans_commit(&trans, NULL, NULL, 0); BUG_ON(ret); @@ -548,7 +548,7 @@ static void seq_overwrite(struct bch_fs *c, u64 nr) bkey_reassemble(&u.k_i, k); - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &u.k_i)); + bch2_trans_update(&trans, iter, &u.k_i); ret = bch2_trans_commit(&trans, NULL, NULL, 0); BUG_ON(ret); } -- cgit v1.2.3 From 6988e85be525b874745824622bae4209c265dc5a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 25 Sep 2019 23:11:41 -0400 Subject: bcachefs: Trust inode in btree over bch_inode_info This is the start of some refactoring work to make less code depend on the linux VFS - here the inode cache - to make e.g. the fuse port easier. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 166d94e5e59d..0a83d5f61a6b 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -156,9 +156,8 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, { struct btree_iter *iter = NULL; struct bkey_inode_buf *inode_p; - int ret; - - lockdep_assert_held(&inode->ei_update_lock); + struct bkey_s_c k; + int ret = 0; iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(inode->v.i_ino, 0), @@ -166,12 +165,17 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, if (IS_ERR(iter)) return PTR_ERR(iter); - /* The btree node lock is our lock on the inode: */ - ret = bch2_btree_iter_traverse(iter); + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); if (ret) return ret; - *inode_u = inode->ei_inode; + if (k.k->type != KEY_TYPE_inode) + return -EIO; + + ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode_u); + if (ret) + return ret; if (set) { ret = set(inode, inode_u, p); @@ -185,7 +189,6 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, bch2_inode_pack(inode_p, inode_u); bch2_trans_update(trans, iter, &inode_p->inode.k_i); - return 0; } -- cgit v1.2.3 From 58677a1d40df8fe3375e9badd7387cf1a2946a3a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Oct 2019 16:51:57 -0400 Subject: bcachefs: bch2_inode_peek()/bch2_inode_write() Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 51 +++++++------- fs/bcachefs/fs-io.c | 87 +++++++++-------------- fs/bcachefs/fs.c | 200 +++++++++++++++++++++++----------------------------- fs/bcachefs/inode.c | 47 ++++++++++++ fs/bcachefs/inode.h | 5 ++ 5 files changed, 198 insertions(+), 192 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 5a4263806610..4e631e04cf0c 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -281,51 +281,54 @@ int bch2_set_acl_trans(struct btree_trans *trans, return ret == -ENOENT ? 0 : ret; } -static int inode_update_for_set_acl_fn(struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct bch_fs *c = inode->v.i_sb->s_fs_info; - umode_t mode = (unsigned long) p; - - bi->bi_ctime = bch2_current_time(c); - bi->bi_mode = mode; - return 0; -} - int bch2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, - struct posix_acl *acl, int type) + struct posix_acl *_acl, int type) { struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct btree_trans trans; + struct btree_iter *inode_iter; struct bch_inode_unpacked inode_u; - umode_t mode = inode->v.i_mode; + struct posix_acl *acl; + umode_t mode; int ret; mutex_lock(&inode->ei_update_lock); bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); + acl = _acl; - if (type == ACL_TYPE_ACCESS && acl) { + inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, + BTREE_ITER_INTENT); + ret = PTR_ERR_OR_ZERO(inode_iter); + if (ret) + goto btree_err; + + mode = inode_u.bi_mode; + + if (type == ACL_TYPE_ACCESS) { ret = posix_acl_update_mode(idmap, &inode->v, &mode, &acl); if (ret) goto err; } -retry: - bch2_trans_begin(&trans); - ret = bch2_set_acl_trans(&trans, - &inode->ei_inode, - &inode->ei_str_hash, - acl, type) ?: - bch2_write_inode_trans(&trans, inode, &inode_u, - inode_update_for_set_acl_fn, - (void *)(unsigned long) mode) ?: + ret = bch2_set_acl_trans(&trans, &inode_u, + &inode->ei_str_hash, + acl, type); + if (ret) + goto btree_err; + + inode_u.bi_ctime = bch2_current_time(c); + inode_u.bi_mode = mode; + + ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); +btree_err: if (ret == -EINTR) goto retry; if (unlikely(ret)) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index be121b755fc7..49c0343da462 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -241,11 +241,13 @@ static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, static int sum_sector_overwrites(struct btree_trans *trans, struct btree_iter *extent_iter, - struct bkey_i *new, bool *allocating, + struct bkey_i *new, + bool may_allocate, s64 *delta) { struct btree_iter *iter; struct bkey_s_c old; + int ret = 0; *delta = 0; @@ -253,21 +255,13 @@ static int sum_sector_overwrites(struct btree_trans *trans, if (IS_ERR(iter)) return PTR_ERR(iter); - old = bch2_btree_iter_peek_slot(iter); - - while (1) { - /* - * should not be possible to get an error here, since we're - * carefully not advancing past @new and thus whatever leaf node - * @_iter currently points to: - */ - BUG_ON(bkey_err(old)); - - if (allocating && - !*allocating && + for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) { + if (!may_allocate && bch2_bkey_nr_ptrs_allocated(old) < - bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(new))) - *allocating = true; + bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(new))) { + ret = -ENOSPC; + break; + } *delta += (min(new->k.p.offset, old.k->p.offset) - @@ -278,12 +272,10 @@ static int sum_sector_overwrites(struct btree_trans *trans, if (bkey_cmp(old.k->p, new->k.p) >= 0) break; - - old = bch2_btree_iter_next_slot(iter); } bch2_trans_iter_put(trans, iter); - return 0; + return ret; } int bch2_extent_update(struct btree_trans *trans, @@ -301,9 +293,7 @@ int bch2_extent_update(struct btree_trans *trans, struct btree_iter *inode_iter = NULL; struct bch_inode_unpacked inode_u; struct bkey_inode_buf inode_p; - bool allocating = false; bool extended = false; - bool inode_locked = false; s64 i_sectors_delta; int ret; @@ -315,15 +305,11 @@ int bch2_extent_update(struct btree_trans *trans, if (ret) return ret; - ret = sum_sector_overwrites(trans, extent_iter, - k, &allocating, - &i_sectors_delta); + ret = sum_sector_overwrites(trans, extent_iter, k, + may_allocate, &i_sectors_delta); if (ret) return ret; - if (!may_allocate && allocating) - return -ENOSPC; - bch2_trans_update(trans, extent_iter, k); new_i_size = min(k->k.p.offset << 9, new_i_size); @@ -331,29 +317,28 @@ int bch2_extent_update(struct btree_trans *trans, /* XXX: inode->i_size locking */ if (i_sectors_delta || new_i_size > inode->ei_inode.bi_size) { - inode_iter = bch2_trans_get_iter(trans, - BTREE_ID_INODES, - POS(k->k.p.inode, 0), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + inode_iter = bch2_inode_peek(trans, &inode_u, + k->k.p.inode, BTREE_ITER_INTENT); if (IS_ERR(inode_iter)) return PTR_ERR(inode_iter); - ret = bch2_btree_iter_traverse(inode_iter); - if (ret) - goto err; - - inode_u = inode->ei_inode; inode_u.bi_sectors += i_sectors_delta; - /* XXX: this is slightly suspect */ + /* + * XXX: can BCH_INODE_I_SIZE_DIRTY be true here? i.e. can we + * race with truncate? + */ if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) && new_i_size > inode_u.bi_size) { inode_u.bi_size = new_i_size; extended = true; } - bch2_inode_pack(&inode_p, &inode_u); - bch2_trans_update(trans, inode_iter, &inode_p.inode.k_i); + if (i_sectors_delta || extended) { + bch2_inode_pack(&inode_p, &inode_u); + bch2_trans_update(trans, inode_iter, + &inode_p.inode.k_i); + } } ret = bch2_trans_commit(trans, disk_res, @@ -365,33 +350,25 @@ int bch2_extent_update(struct btree_trans *trans, if (ret) goto err; - inode->ei_inode.bi_sectors += i_sectors_delta; - - EBUG_ON(i_sectors_delta && - inode->ei_inode.bi_sectors != inode_u.bi_sectors); - - if (extended) { - inode->ei_inode.bi_size = new_i_size; - - if (direct) { - spin_lock(&inode->v.i_lock); - if (new_i_size > inode->v.i_size) - i_size_write(&inode->v, new_i_size); - spin_unlock(&inode->v.i_lock); - } + if (i_sectors_delta || extended) { + inode->ei_inode.bi_sectors = inode_u.bi_sectors; + inode->ei_inode.bi_size = inode_u.bi_size; } if (direct) i_sectors_acct(c, inode, quota_res, i_sectors_delta); + if (direct && extended) { + spin_lock(&inode->v.i_lock); + if (new_i_size > inode->v.i_size) + i_size_write(&inode->v, new_i_size); + spin_unlock(&inode->v.i_lock); + } if (total_delta) *total_delta += i_sectors_delta; err: if (!IS_ERR_OR_NULL(inode_iter)) bch2_trans_iter_put(trans, inode_iter); - if (inode_locked) - mutex_unlock(&inode->ei_update_lock); - return ret; } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 0a83d5f61a6b..cbe1b90e80c2 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -155,41 +155,19 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans, void *p) { struct btree_iter *iter = NULL; - struct bkey_inode_buf *inode_p; - struct bkey_s_c k; int ret = 0; - iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, - POS(inode->v.i_ino, 0), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - if (IS_ERR(iter)) - return PTR_ERR(iter); - - k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); + iter = bch2_inode_peek(trans, inode_u, inode->v.i_ino, + BTREE_ITER_INTENT); + ret = PTR_ERR_OR_ZERO(iter); if (ret) return ret; - if (k.k->type != KEY_TYPE_inode) - return -EIO; - - ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode_u); + ret = set ? set(inode, inode_u, p) : 0; if (ret) return ret; - if (set) { - ret = set(inode, inode_u, p); - if (ret) - return ret; - } - - inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); - if (IS_ERR(inode_p)) - return PTR_ERR(inode_p); - - bch2_inode_pack(inode_p, inode_u); - bch2_trans_update(trans, iter, &inode_p->inode.k_i); - return 0; + return bch2_inode_write(trans, iter, inode_u); } int __must_check bch2_write_inode(struct bch_fs *c, @@ -531,23 +509,13 @@ static int bch2_create(struct mnt_idmap *idmap, return 0; } -static int inode_update_for_link_fn(struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct bch_fs *c = inode->v.i_sb->s_fs_info; - - bi->bi_ctime = bch2_current_time(c); - bch2_inode_nlink_inc(bi); - return 0; -} - static int __bch2_link(struct bch_fs *c, struct bch_inode_info *inode, struct bch_inode_info *dir, struct dentry *dentry) { struct btree_trans trans; + struct btree_iter *inode_iter; struct bch_inode_unpacked inode_u; int ret; @@ -555,21 +523,30 @@ static int __bch2_link(struct bch_fs *c, bch2_trans_init(&trans, c, 4, 1024); retry: bch2_trans_begin(&trans); - ret = __bch2_dirent_create(&trans, dir->v.i_ino, &dir->ei_str_hash, mode_to_type(inode->v.i_mode), &dentry->d_name, inode->v.i_ino, - BCH_HASH_SET_MUST_CREATE) ?: - bch2_write_inode_trans(&trans, inode, &inode_u, - inode_update_for_link_fn, - NULL) ?: - bch2_trans_commit(&trans, NULL, - &inode->ei_journal_seq, - BTREE_INSERT_ATOMIC| - BTREE_INSERT_NOUNLOCK); + BCH_HASH_SET_MUST_CREATE); + if (ret) + goto err; + + inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, + BTREE_ITER_INTENT); + ret = PTR_ERR_OR_ZERO(inode_iter); + if (ret) + goto err; + + inode_u.bi_ctime = bch2_current_time(c); + bch2_inode_nlink_inc(&inode_u); + ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: + bch2_trans_commit(&trans, NULL, + &inode->ei_journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK); +err: if (ret == -EINTR) goto retry; @@ -600,36 +577,12 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir, return 0; } -static int inode_update_dir_for_unlink_fn(struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_inode_info *unlink_inode = p; - - bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); - - bi->bi_nlink -= S_ISDIR(unlink_inode->v.i_mode); - - return 0; -} - -static int inode_update_for_unlink_fn(struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct bch_fs *c = inode->v.i_sb->s_fs_info; - - bi->bi_ctime = bch2_current_time(c); - bch2_inode_nlink_dec(bi); - return 0; -} - static int bch2_unlink(struct inode *vdir, struct dentry *dentry) { struct bch_fs *c = vdir->i_sb->s_fs_info; struct bch_inode_info *dir = to_bch_ei(vdir); struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); + struct btree_iter *dir_iter, *inode_iter; struct bch_inode_unpacked dir_u, inode_u; struct btree_trans trans; int ret; @@ -641,25 +594,42 @@ retry: ret = __bch2_dirent_delete(&trans, dir->v.i_ino, &dir->ei_str_hash, - &dentry->d_name) ?: - bch2_write_inode_trans(&trans, dir, &dir_u, - inode_update_dir_for_unlink_fn, - inode) ?: - bch2_write_inode_trans(&trans, inode, &inode_u, - inode_update_for_unlink_fn, - NULL) ?: + &dentry->d_name); + if (ret) + goto btree_err; + + dir_iter = bch2_inode_peek(&trans, &dir_u, dir->v.i_ino, + BTREE_ITER_INTENT); + ret = PTR_ERR_OR_ZERO(dir_iter); + if (ret) + goto btree_err; + + inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, + BTREE_ITER_INTENT); + ret = PTR_ERR_OR_ZERO(inode_iter); + if (ret) + goto btree_err; + + dir_u.bi_mtime = dir_u.bi_ctime = inode_u.bi_ctime = + bch2_current_time(c); + + dir_u.bi_nlink -= S_ISDIR(inode_u.bi_mode); + bch2_inode_nlink_dec(&inode_u); + + ret = bch2_inode_write(&trans, dir_iter, &dir_u) ?: + bch2_inode_write(&trans, inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &dir->ei_journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); +btree_err: if (ret == -EINTR) goto retry; if (ret) goto err; - if (dir->ei_journal_seq > inode->ei_journal_seq) - inode->ei_journal_seq = dir->ei_journal_seq; + journal_seq_copy(inode, dir->ei_journal_seq); bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); @@ -953,67 +923,60 @@ err: return ret; } -struct inode_write_setattr { - struct iattr *attr; - struct mnt_idmap *idmap; -}; - -static int inode_update_for_setattr_fn(struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) +static void bch2_setattr_copy(struct mnt_idmap *idmap, + struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + struct iattr *attr) { struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct inode_write_setattr *s = p; - unsigned int ia_valid = s->attr->ia_valid; + unsigned int ia_valid = attr->ia_valid; if (ia_valid & ATTR_UID) - bi->bi_uid = from_kuid(i_user_ns(&inode->v), s->attr->ia_uid); + bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid); if (ia_valid & ATTR_GID) - bi->bi_gid = from_kgid(i_user_ns(&inode->v), s->attr->ia_gid); + bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid); if (ia_valid & ATTR_ATIME) - bi->bi_atime = timespec_to_bch2_time(c, s->attr->ia_atime); + bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime); if (ia_valid & ATTR_MTIME) - bi->bi_mtime = timespec_to_bch2_time(c, s->attr->ia_mtime); + bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime); if (ia_valid & ATTR_CTIME) - bi->bi_ctime = timespec_to_bch2_time(c, s->attr->ia_ctime); + bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime); if (ia_valid & ATTR_MODE) { - umode_t mode = s->attr->ia_mode; + umode_t mode = attr->ia_mode; kgid_t gid = ia_valid & ATTR_GID - ? s->attr->ia_gid + ? attr->ia_gid : inode->v.i_gid; if (!in_group_p(gid) && - !capable_wrt_inode_uidgid(s->idmap, &inode->v, CAP_FSETID)) + !capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID)) mode &= ~S_ISGID; bi->bi_mode = mode; } - - return 0; } static int bch2_setattr_nonsize(struct mnt_idmap *idmap, struct bch_inode_info *inode, - struct iattr *iattr) + struct iattr *attr) { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_qid qid; struct btree_trans trans; + struct btree_iter *inode_iter; struct bch_inode_unpacked inode_u; struct posix_acl *acl = NULL; - struct inode_write_setattr s = { iattr, idmap }; int ret; mutex_lock(&inode->ei_update_lock); qid = inode->ei_qid; - if (iattr->ia_valid & ATTR_UID) - qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), iattr->ia_uid); + if (attr->ia_valid & ATTR_UID) + qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid); - if (iattr->ia_valid & ATTR_GID) - qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), iattr->ia_gid); + if (attr->ia_valid & ATTR_GID) + qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid); ret = bch2_fs_quota_transfer(c, inode, qid, ~0, KEY_TYPE_QUOTA_PREALLOC); @@ -1026,22 +989,33 @@ retry: kfree(acl); acl = NULL; - ret = bch2_write_inode_trans(&trans, inode, &inode_u, - inode_update_for_setattr_fn, &s) ?: - (iattr->ia_valid & ATTR_MODE - ? bch2_acl_chmod(&trans, inode, iattr->ia_mode, &acl) - : 0) ?: + inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, + BTREE_ITER_INTENT); + ret = PTR_ERR_OR_ZERO(inode_iter); + if (ret) + goto btree_err; + + bch2_setattr_copy(idmap, inode, &inode_u, attr); + + if (attr->ia_valid & ATTR_MODE) { + ret = bch2_acl_chmod(&trans, inode, inode_u.bi_mode, &acl); + if (ret) + goto btree_err; + } + + ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); +btree_err: if (ret == -EINTR) goto retry; if (unlikely(ret)) goto err_trans; - bch2_inode_update_after_write(c, inode, &inode_u, iattr->ia_valid); + bch2_inode_update_after_write(c, inode, &inode_u, attr->ia_valid); if (acl) set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index f192536558c1..fc38cfb9e939 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -181,6 +181,53 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode, return 0; } +struct btree_iter *bch2_inode_peek(struct btree_trans *trans, + struct bch_inode_unpacked *inode, + u64 inum, unsigned flags) +{ + struct btree_iter *iter; + struct bkey_s_c k; + int ret; + + iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(inum, 0), + BTREE_ITER_SLOTS|flags); + if (IS_ERR(iter)) + return iter; + + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); + if (ret) + goto err; + + ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO; + if (ret) + goto err; + + ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode); + if (ret) + goto err; + + return iter; +err: + bch2_trans_iter_put(trans, iter); + return ERR_PTR(ret); +} + +int bch2_inode_write(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode) +{ + struct bkey_inode_buf *inode_p; + + inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); + if (IS_ERR(inode_p)) + return PTR_ERR(inode_p); + + bch2_inode_pack(inode_p, inode); + bch2_trans_update(trans, iter, &inode_p->inode.k_i); + return 0; +} + const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index e88ec78071bd..c5626c668639 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -46,6 +46,11 @@ struct bkey_inode_buf { void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *); int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *); +struct btree_iter *bch2_inode_peek(struct btree_trans *, + struct bch_inode_unpacked *, u64, unsigned); +int bch2_inode_write(struct btree_trans *, struct btree_iter *, + struct bch_inode_unpacked *); + void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *, uid_t, gid_t, umode_t, dev_t, struct bch_inode_unpacked *); -- cgit v1.2.3 From 9638574229e3ae0175a46a63431149746c777b3a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Oct 2019 18:35:36 -0400 Subject: bcachefs: Factor out fs-common.c This refactoring makes the code easier to understand by separating the bcachefs btree transactional code from the linux VFS code - but more importantly, it's also to share code with the fuse port. Signed-off-by: Kent Overstreet --- fs/bcachefs/Makefile | 1 + fs/bcachefs/dirent.c | 97 ++++------ fs/bcachefs/dirent.h | 29 +-- fs/bcachefs/fs-common.c | 280 +++++++++++++++++++++++++++ fs/bcachefs/fs-common.h | 37 ++++ fs/bcachefs/fs-ioctl.c | 10 + fs/bcachefs/fs.c | 495 +++++++++++++----------------------------------- fs/bcachefs/fs.h | 13 -- fs/bcachefs/fsck.c | 76 +++----- fs/bcachefs/inode.c | 39 ++-- fs/bcachefs/inode.h | 16 +- fs/bcachefs/recovery.c | 26 +-- 12 files changed, 586 insertions(+), 533 deletions(-) create mode 100644 fs/bcachefs/fs-common.c create mode 100644 fs/bcachefs/fs-common.h (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 4c2608409144..9d120936703a 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -27,6 +27,7 @@ bcachefs-y := \ error.o \ extents.o \ fs.o \ + fs-common.o \ fs-ioctl.o \ fs-io.o \ fsck.o \ diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 38dd96808e90..304128d7251f 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -138,10 +138,10 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, return dirent; } -int __bch2_dirent_create(struct btree_trans *trans, - u64 dir_inum, const struct bch_hash_info *hash_info, - u8 type, const struct qstr *name, u64 dst_inum, - int flags) +int bch2_dirent_create(struct btree_trans *trans, + u64 dir_inum, const struct bch_hash_info *hash_info, + u8 type, const struct qstr *name, u64 dst_inum, + int flags) { struct bkey_i_dirent *dirent; int ret; @@ -155,16 +155,6 @@ int __bch2_dirent_create(struct btree_trans *trans, dir_inum, &dirent->k_i, flags); } -int bch2_dirent_create(struct bch_fs *c, u64 dir_inum, - const struct bch_hash_info *hash_info, - u8 type, const struct qstr *name, u64 dst_inum, - u64 *journal_seq, int flags) -{ - return bch2_trans_do(c, journal_seq, flags, - __bch2_dirent_create(&trans, dir_inum, hash_info, - type, name, dst_inum, flags)); -} - static void dirent_copy_target(struct bkey_i_dirent *dst, struct bkey_s_c_dirent src) { @@ -172,23 +162,22 @@ static void dirent_copy_target(struct bkey_i_dirent *dst, dst->v.d_type = src.v->d_type; } -static struct bpos bch2_dirent_pos(struct bch_inode_info *inode, - const struct qstr *name) -{ - return POS(inode->v.i_ino, bch2_dirent_hash(&inode->ei_str_hash, name)); -} - int bch2_dirent_rename(struct btree_trans *trans, - struct bch_inode_info *src_dir, const struct qstr *src_name, - struct bch_inode_info *dst_dir, const struct qstr *dst_name, - enum bch_rename_mode mode) + u64 src_dir, struct bch_hash_info *src_hash, + u64 dst_dir, struct bch_hash_info *dst_hash, + const struct qstr *src_name, u64 *src_inum, + const struct qstr *dst_name, u64 *dst_inum, + enum bch_rename_mode mode) { struct btree_iter *src_iter, *dst_iter; struct bkey_s_c old_src, old_dst; struct bkey_i_dirent *new_src = NULL, *new_dst = NULL; - struct bpos dst_pos = bch2_dirent_pos(dst_dir, dst_name); + struct bpos dst_pos = + POS(dst_dir, bch2_dirent_hash(dst_hash, dst_name)); int ret; + *src_inum = *dst_inum = 0; + /* * Lookup dst: * @@ -198,24 +187,25 @@ int bch2_dirent_rename(struct btree_trans *trans, */ dst_iter = mode == BCH_RENAME ? bch2_hash_hole(trans, bch2_dirent_hash_desc, - &dst_dir->ei_str_hash, - dst_dir->v.i_ino, dst_name) + dst_hash, dst_dir, dst_name) : bch2_hash_lookup(trans, bch2_dirent_hash_desc, - &dst_dir->ei_str_hash, - dst_dir->v.i_ino, dst_name, + dst_hash, dst_dir, dst_name, BTREE_ITER_INTENT); if (IS_ERR(dst_iter)) return PTR_ERR(dst_iter); old_dst = bch2_btree_iter_peek_slot(dst_iter); + if (mode != BCH_RENAME) + *dst_inum = le64_to_cpu(bkey_s_c_to_dirent(old_dst).v->d_inum); + /* Lookup src: */ src_iter = bch2_hash_lookup(trans, bch2_dirent_hash_desc, - &src_dir->ei_str_hash, - src_dir->v.i_ino, src_name, + src_hash, src_dir, src_name, BTREE_ITER_INTENT); if (IS_ERR(src_iter)) return PTR_ERR(src_iter); old_src = bch2_btree_iter_peek_slot(src_iter); + *src_inum = le64_to_cpu(bkey_s_c_to_dirent(old_src).v->d_inum); /* Create new dst key: */ new_dst = dirent_create_key(trans, 0, dst_name, 0); @@ -269,8 +259,7 @@ int bch2_dirent_rename(struct btree_trans *trans, } else { /* Check if we need a whiteout to delete src: */ ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc, - &src_dir->ei_str_hash, - src_iter); + src_hash, src_iter); if (ret < 0) return ret; @@ -284,12 +273,12 @@ int bch2_dirent_rename(struct btree_trans *trans, return 0; } -int __bch2_dirent_delete(struct btree_trans *trans, u64 dir_inum, - const struct bch_hash_info *hash_info, - const struct qstr *name) +int bch2_dirent_delete_at(struct btree_trans *trans, + const struct bch_hash_info *hash_info, + struct btree_iter *iter) { - return bch2_hash_delete(trans, bch2_dirent_hash_desc, hash_info, - dir_inum, name); + return bch2_hash_delete_at(trans, bch2_dirent_hash_desc, + hash_info, iter); } int bch2_dirent_delete(struct bch_fs *c, u64 dir_inum, @@ -300,7 +289,17 @@ int bch2_dirent_delete(struct bch_fs *c, u64 dir_inum, return bch2_trans_do(c, journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL, - __bch2_dirent_delete(&trans, dir_inum, hash_info, name)); + bch2_hash_delete(&trans, bch2_dirent_hash_desc, hash_info, + dir_inum, name)); +} + +struct btree_iter * +__bch2_dirent_lookup_trans(struct btree_trans *trans, u64 dir_inum, + const struct bch_hash_info *hash_info, + const struct qstr *name) +{ + return bch2_hash_lookup(trans, bch2_dirent_hash_desc, + hash_info, dir_inum, name, 0); } u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum, @@ -314,8 +313,7 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum, bch2_trans_init(&trans, c, 0, 0); - iter = bch2_hash_lookup(&trans, bch2_dirent_hash_desc, - hash_info, dir_inum, name, 0); + iter = __bch2_dirent_lookup_trans(&trans, dir_inum, hash_info, name); if (IS_ERR(iter)) { BUG_ON(PTR_ERR(iter) == -EINTR); goto out; @@ -349,16 +347,8 @@ int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum) return ret; } -int bch2_empty_dir(struct bch_fs *c, u64 dir_inum) +int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx) { - return bch2_trans_do(c, NULL, 0, - bch2_empty_dir_trans(&trans, dir_inum)); -} - -int bch2_readdir(struct bch_fs *c, struct file *file, - struct dir_context *ctx) -{ - struct bch_inode_info *inode = file_bch_inode(file); struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; @@ -366,22 +356,19 @@ int bch2_readdir(struct bch_fs *c, struct file *file, unsigned len; int ret; - if (!dir_emit_dots(file, ctx)) - return 0; - bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, - POS(inode->v.i_ino, ctx->pos), 0, k, ret) { + POS(inum, ctx->pos), 0, k, ret) { if (k.k->type != KEY_TYPE_dirent) continue; dirent = bkey_s_c_to_dirent(k); - if (bkey_cmp(k.k->p, POS(inode->v.i_ino, ctx->pos)) < 0) + if (bkey_cmp(k.k->p, POS(inum, ctx->pos)) < 0) continue; - if (k.k->p.inode > inode->v.i_ino) + if (k.k->p.inode > inum) break; len = bch2_dirent_name_bytes(dirent); diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index bc64718a7832..9a57ad005468 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -29,15 +29,13 @@ static inline unsigned dirent_val_u64s(unsigned len) sizeof(u64)); } -int __bch2_dirent_create(struct btree_trans *, u64, - const struct bch_hash_info *, u8, - const struct qstr *, u64, int); -int bch2_dirent_create(struct bch_fs *c, u64, const struct bch_hash_info *, - u8, const struct qstr *, u64, u64 *, int); - -int __bch2_dirent_delete(struct btree_trans *, u64, - const struct bch_hash_info *, - const struct qstr *); +int bch2_dirent_create(struct btree_trans *, u64, + const struct bch_hash_info *, u8, + const struct qstr *, u64, int); + +int bch2_dirent_delete_at(struct btree_trans *, + const struct bch_hash_info *, + struct btree_iter *); int bch2_dirent_delete(struct bch_fs *, u64, const struct bch_hash_info *, const struct qstr *, u64 *); @@ -48,15 +46,20 @@ enum bch_rename_mode { }; int bch2_dirent_rename(struct btree_trans *, - struct bch_inode_info *, const struct qstr *, - struct bch_inode_info *, const struct qstr *, + u64, struct bch_hash_info *, + u64, struct bch_hash_info *, + const struct qstr *, u64 *, + const struct qstr *, u64 *, enum bch_rename_mode); +struct btree_iter * +__bch2_dirent_lookup_trans(struct btree_trans *, u64, + const struct bch_hash_info *, + const struct qstr *); u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *, const struct qstr *); int bch2_empty_dir_trans(struct btree_trans *, u64); -int bch2_empty_dir(struct bch_fs *, u64); -int bch2_readdir(struct bch_fs *, struct file *, struct dir_context *); +int bch2_readdir(struct bch_fs *, u64, struct dir_context *); #endif /* _BCACHEFS_DIRENT_H */ diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c new file mode 100644 index 000000000000..fdd2b9b6716f --- /dev/null +++ b/fs/bcachefs/fs-common.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" +#include "acl.h" +#include "btree_update.h" +#include "dirent.h" +#include "fs-common.h" +#include "inode.h" +#include "xattr.h" + +#include + +int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, + struct bch_inode_unpacked *dir_u, + struct bch_inode_unpacked *new_inode, + const struct qstr *name, + uid_t uid, gid_t gid, umode_t mode, dev_t rdev, + struct posix_acl *default_acl, + struct posix_acl *acl) +{ + struct bch_fs *c = trans->c; + struct btree_iter *dir_iter; + struct bch_hash_info hash = bch2_hash_info_init(c, new_inode); + u64 now = bch2_current_time(trans->c); + int ret; + + dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, + name ? BTREE_ITER_INTENT : 0); + if (IS_ERR(dir_iter)) + return PTR_ERR(dir_iter); + + bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u); + + if (!name) + new_inode->bi_flags |= BCH_INODE_UNLINKED; + + ret = bch2_inode_create(trans, new_inode, + BLOCKDEV_INODE_MAX, 0, + &c->unused_inode_hint); + if (ret) + return ret; + + if (default_acl) { + ret = bch2_set_acl_trans(trans, new_inode, &hash, + default_acl, ACL_TYPE_DEFAULT); + if (ret) + return ret; + } + + if (acl) { + ret = bch2_set_acl_trans(trans, new_inode, &hash, + acl, ACL_TYPE_ACCESS); + if (ret) + return ret; + } + + if (name) { + struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u); + dir_u->bi_mtime = dir_u->bi_ctime = now; + + if (S_ISDIR(new_inode->bi_mode)) + dir_u->bi_nlink++; + + ret = bch2_inode_write(trans, dir_iter, dir_u); + if (ret) + return ret; + + ret = bch2_dirent_create(trans, dir_inum, &dir_hash, + mode_to_type(new_inode->bi_mode), + name, new_inode->bi_inum, + BCH_HASH_SET_MUST_CREATE); + if (ret) + return ret; + } + + return 0; +} + +int bch2_link_trans(struct btree_trans *trans, + u64 dir_inum, + u64 inum, struct bch_inode_unpacked *inode_u, + const struct qstr *name) +{ + struct btree_iter *dir_iter, *inode_iter; + struct bch_inode_unpacked dir_u; + struct bch_hash_info dir_hash; + u64 now = bch2_current_time(trans->c); + + dir_iter = bch2_inode_peek(trans, &dir_u, dir_inum, 0); + if (IS_ERR(dir_iter)) + return PTR_ERR(dir_iter); + + inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT); + if (IS_ERR(inode_iter)) + return PTR_ERR(inode_iter); + + dir_hash = bch2_hash_info_init(trans->c, &dir_u); + + inode_u->bi_ctime = now; + bch2_inode_nlink_inc(inode_u); + + return bch2_dirent_create(trans, dir_inum, &dir_hash, + mode_to_type(inode_u->bi_mode), + name, inum, BCH_HASH_SET_MUST_CREATE) ?: + bch2_inode_write(trans, inode_iter, inode_u); +} + +int bch2_unlink_trans(struct btree_trans *trans, + u64 dir_inum, struct bch_inode_unpacked *dir_u, + struct bch_inode_unpacked *inode_u, + const struct qstr *name) +{ + struct btree_iter *dir_iter, *dirent_iter, *inode_iter; + struct bch_hash_info dir_hash; + u64 inum, now = bch2_current_time(trans->c); + struct bkey_s_c k; + + dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT); + if (IS_ERR(dir_iter)) + return PTR_ERR(dir_iter); + + dir_hash = bch2_hash_info_init(trans->c, dir_u); + + dirent_iter = __bch2_dirent_lookup_trans(trans, dir_inum, + &dir_hash, name); + if (IS_ERR(dirent_iter)) + return PTR_ERR(dirent_iter); + + k = bch2_btree_iter_peek_slot(dirent_iter); + inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum); + + inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT); + if (IS_ERR(inode_iter)) + return PTR_ERR(inode_iter); + + dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now; + dir_u->bi_nlink -= S_ISDIR(inode_u->bi_mode); + bch2_inode_nlink_dec(inode_u); + + return (S_ISDIR(inode_u->bi_mode) + ? bch2_empty_dir_trans(trans, inum) + : 0) ?: + bch2_dirent_delete_at(trans, &dir_hash, dirent_iter) ?: + bch2_inode_write(trans, dir_iter, dir_u) ?: + bch2_inode_write(trans, inode_iter, inode_u); +} + +bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u, + struct bch_inode_unpacked *src_u) +{ + u64 src, dst; + unsigned id; + bool ret = false; + + for (id = 0; id < Inode_opt_nr; id++) { + if (dst_u->bi_fields_set & (1 << id)) + continue; + + src = bch2_inode_opt_get(src_u, id); + dst = bch2_inode_opt_get(dst_u, id); + + if (src == dst) + continue; + + bch2_inode_opt_set(dst_u, id, src); + ret = true; + } + + return ret; +} + +int bch2_rename_trans(struct btree_trans *trans, + u64 src_dir, struct bch_inode_unpacked *src_dir_u, + u64 dst_dir, struct bch_inode_unpacked *dst_dir_u, + struct bch_inode_unpacked *src_inode_u, + struct bch_inode_unpacked *dst_inode_u, + const struct qstr *src_name, + const struct qstr *dst_name, + enum bch_rename_mode mode) +{ + struct btree_iter *src_dir_iter, *dst_dir_iter = NULL; + struct btree_iter *src_inode_iter, *dst_inode_iter = NULL; + struct bch_hash_info src_hash, dst_hash; + u64 src_inode, dst_inode, now = bch2_current_time(trans->c); + int ret; + + src_dir_iter = bch2_inode_peek(trans, src_dir_u, src_dir, + BTREE_ITER_INTENT); + if (IS_ERR(src_dir_iter)) + return PTR_ERR(src_dir_iter); + + src_hash = bch2_hash_info_init(trans->c, src_dir_u); + + if (dst_dir != src_dir) { + dst_dir_iter = bch2_inode_peek(trans, dst_dir_u, dst_dir, + BTREE_ITER_INTENT); + if (IS_ERR(dst_dir_iter)) + return PTR_ERR(dst_dir_iter); + + dst_hash = bch2_hash_info_init(trans->c, dst_dir_u); + } else { + dst_dir_u = src_dir_u; + dst_hash = src_hash; + } + + ret = bch2_dirent_rename(trans, + src_dir, &src_hash, + dst_dir, &dst_hash, + src_name, &src_inode, + dst_name, &dst_inode, + mode); + if (ret) + return ret; + + src_inode_iter = bch2_inode_peek(trans, src_inode_u, src_inode, + BTREE_ITER_INTENT); + if (IS_ERR(src_inode_iter)) + return PTR_ERR(src_inode_iter); + + if (dst_inode) { + dst_inode_iter = bch2_inode_peek(trans, dst_inode_u, dst_inode, + BTREE_ITER_INTENT); + if (IS_ERR(dst_inode_iter)) + return PTR_ERR(dst_inode_iter); + } + + if (mode == BCH_RENAME_OVERWRITE) { + if (S_ISDIR(src_inode_u->bi_mode) != + S_ISDIR(dst_inode_u->bi_mode)) + return -ENOTDIR; + + if (S_ISDIR(dst_inode_u->bi_mode) && + bch2_empty_dir_trans(trans, dst_inode)) + return -ENOTEMPTY; + } + + if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) && + S_ISDIR(src_inode_u->bi_mode)) + return -EXDEV; + + if (mode == BCH_RENAME_EXCHANGE && + bch2_reinherit_attrs(dst_inode_u, src_dir_u) && + S_ISDIR(dst_inode_u->bi_mode)) + return -EXDEV; + + if (S_ISDIR(src_inode_u->bi_mode)) { + src_dir_u->bi_nlink--; + dst_dir_u->bi_nlink++; + } + + if (dst_inode && S_ISDIR(dst_inode_u->bi_mode)) { + dst_dir_u->bi_nlink--; + src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE; + } + + if (mode == BCH_RENAME_OVERWRITE) + bch2_inode_nlink_dec(dst_inode_u); + + src_dir_u->bi_mtime = now; + src_dir_u->bi_ctime = now; + + if (src_dir != dst_dir) { + dst_dir_u->bi_mtime = now; + dst_dir_u->bi_ctime = now; + } + + src_inode_u->bi_ctime = now; + + if (dst_inode) + dst_inode_u->bi_ctime = now; + + return bch2_inode_write(trans, src_dir_iter, src_dir_u) ?: + (src_dir != dst_dir + ? bch2_inode_write(trans, dst_dir_iter, dst_dir_u) + : 0 ) ?: + bch2_inode_write(trans, src_inode_iter, src_inode_u) ?: + (dst_inode + ? bch2_inode_write(trans, dst_inode_iter, dst_inode_u) + : 0 ); +} diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h new file mode 100644 index 000000000000..7adcfcf92aec --- /dev/null +++ b/fs/bcachefs/fs-common.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_FS_COMMON_H +#define _BCACHEFS_FS_COMMON_H + +struct posix_acl; + +int bch2_create_trans(struct btree_trans *, u64, + struct bch_inode_unpacked *, + struct bch_inode_unpacked *, + const struct qstr *, + uid_t, gid_t, umode_t, dev_t, + struct posix_acl *, + struct posix_acl *); + +int bch2_link_trans(struct btree_trans *, + u64, + u64, struct bch_inode_unpacked *, + const struct qstr *); + +int bch2_unlink_trans(struct btree_trans *, + u64, struct bch_inode_unpacked *, + struct bch_inode_unpacked *, + const struct qstr *); + +int bch2_rename_trans(struct btree_trans *, + u64, struct bch_inode_unpacked *, + u64, struct bch_inode_unpacked *, + struct bch_inode_unpacked *, + struct bch_inode_unpacked *, + const struct qstr *, + const struct qstr *, + enum bch_rename_mode); + +bool bch2_reinherit_attrs(struct bch_inode_unpacked *, + struct bch_inode_unpacked *); + +#endif /* _BCACHEFS_FS_COMMON_H */ diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 0cf2621ec4fc..acc0a230ff0c 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -5,6 +5,7 @@ #include "chardev.h" #include "dirent.h" #include "fs.h" +#include "fs-common.h" #include "fs-ioctl.h" #include "quota.h" @@ -164,6 +165,15 @@ err: return ret; } +static int bch2_reinherit_attrs_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_inode_info *dir = p; + + return !bch2_reinherit_attrs(bi, &dir->ei_inode); +} + static int bch2_ioc_reinherit_attrs(struct bch_fs *c, struct file *file, struct bch_inode_info *src, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index cbe1b90e80c2..b19a2deed5c1 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -9,6 +9,7 @@ #include "dirent.h" #include "extents.h" #include "fs.h" +#include "fs-common.h" #include "fs-io.h" #include "fs-ioctl.h" #include "fsck.h" @@ -148,34 +149,13 @@ void bch2_inode_update_after_write(struct bch_fs *c, bch2_inode_flags_to_vfs(inode); } -int __must_check bch2_write_inode_trans(struct btree_trans *trans, - struct bch_inode_info *inode, - struct bch_inode_unpacked *inode_u, - inode_set_fn set, - void *p) -{ - struct btree_iter *iter = NULL; - int ret = 0; - - iter = bch2_inode_peek(trans, inode_u, inode->v.i_ino, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(iter); - if (ret) - return ret; - - ret = set ? set(inode, inode_u, p) : 0; - if (ret) - return ret; - - return bch2_inode_write(trans, iter, inode_u); -} - int __must_check bch2_write_inode(struct bch_fs *c, struct bch_inode_info *inode, inode_set_fn set, void *p, unsigned fields) { struct btree_trans trans; + struct btree_iter *iter; struct bch_inode_unpacked inode_u; int ret; @@ -183,7 +163,11 @@ int __must_check bch2_write_inode(struct bch_fs *c, retry: bch2_trans_begin(&trans); - ret = bch2_write_inode_trans(&trans, inode, &inode_u, set, p) ?: + iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, + BTREE_ITER_INTENT); + ret = PTR_ERR_OR_ZERO(iter) ?: + (set ? set(inode, &inode_u, p) : 0) ?: + bch2_inode_write(&trans, iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, BTREE_INSERT_ATOMIC| @@ -238,32 +222,6 @@ int bch2_fs_quota_transfer(struct bch_fs *c, return ret; } -int bch2_reinherit_attrs_fn(struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct bch_inode_info *dir = p; - u64 src, dst; - unsigned id; - int ret = 1; - - for (id = 0; id < Inode_opt_nr; id++) { - if (bi->bi_fields_set & (1 << id)) - continue; - - src = bch2_inode_opt_get(&dir->ei_inode, id); - dst = bch2_inode_opt_get(bi, id); - - if (src == dst) - continue; - - bch2_inode_opt_set(bi, id, src); - ret = 0; - } - - return ret; -} - struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum) { struct bch_inode_unpacked inode_u; @@ -291,39 +249,6 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum) return &inode->v; } -static void bch2_inode_init_owner(struct bch_inode_unpacked *inode_u, - const struct inode *dir, umode_t mode) -{ - kuid_t uid = current_fsuid(); - kgid_t gid; - - if (dir && dir->i_mode & S_ISGID) { - gid = dir->i_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } else - gid = current_fsgid(); - - inode_u->bi_uid = from_kuid(i_user_ns(dir), uid); - inode_u->bi_gid = from_kgid(i_user_ns(dir), gid); - inode_u->bi_mode = mode; -} - -static int inode_update_for_create_fn(struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_inode_unpacked *new_inode = p; - - bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); - - if (S_ISDIR(new_inode->bi_mode)) - bi->bi_nlink++; - - return 0; -} - static int inum_test(struct inode *inode, void *p) { unsigned long *ino = p; @@ -341,40 +266,27 @@ __bch2_create(struct mnt_idmap *idmap, struct bch_inode_unpacked dir_u; struct bch_inode_info *inode, *old; struct bch_inode_unpacked inode_u; - struct bch_hash_info hash_info; struct posix_acl *default_acl = NULL, *acl = NULL; u64 journal_seq = 0; int ret; - bch2_inode_init(c, &inode_u, 0, 0, 0, rdev, &dir->ei_inode); - bch2_inode_init_owner(&inode_u, &dir->v, mode); - - hash_info = bch2_hash_info_init(c, &inode_u); - - if (tmpfile) - inode_u.bi_flags |= BCH_INODE_UNLINKED; - - ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, - KEY_TYPE_QUOTA_PREALLOC); - if (ret) - return ERR_PTR(ret); - + /* + * preallocate acls + vfs inode before btree transaction, so that + * nothing can fail after the transaction succeeds: + */ #ifdef CONFIG_BCACHEFS_POSIX_ACL - ret = posix_acl_create(&dir->v, &inode_u.bi_mode, &default_acl, &acl); + ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl); if (ret) - goto err; + return ERR_PTR(ret); #endif - - /* - * preallocate vfs inode before btree transaction, so that nothing can - * fail after the transaction succeeds: - */ inode = to_bch_ei(new_inode(c->vfs_sb)); if (unlikely(!inode)) { - ret = -ENOMEM; + inode = ERR_PTR(-ENOMEM); goto err; } + bch2_inode_init_early(c, &inode_u); + if (!tmpfile) mutex_lock(&dir->ei_update_lock); @@ -382,38 +294,28 @@ __bch2_create(struct mnt_idmap *idmap, retry: bch2_trans_begin(&trans); - ret = __bch2_inode_create(&trans, &inode_u, - BLOCKDEV_INODE_MAX, 0, - &c->unused_inode_hint) ?: - (default_acl - ? bch2_set_acl_trans(&trans, &inode_u, &hash_info, - default_acl, ACL_TYPE_DEFAULT) - : 0) ?: - (acl - ? bch2_set_acl_trans(&trans, &inode_u, &hash_info, - acl, ACL_TYPE_ACCESS) - : 0) ?: - (!tmpfile - ? __bch2_dirent_create(&trans, dir->v.i_ino, - &dir->ei_str_hash, - mode_to_type(mode), - &dentry->d_name, - inode_u.bi_inum, - BCH_HASH_SET_MUST_CREATE) - : 0) ?: - (!tmpfile - ? bch2_write_inode_trans(&trans, dir, &dir_u, - inode_update_for_create_fn, - &inode_u) - : 0) ?: - bch2_trans_commit(&trans, NULL, - &journal_seq, + ret = bch2_create_trans(&trans, dir->v.i_ino, &dir_u, &inode_u, + !tmpfile ? &dentry->d_name : NULL, + from_kuid(i_user_ns(&dir->v), current_fsuid()), + from_kgid(i_user_ns(&dir->v), current_fsgid()), + mode, rdev, + default_acl, acl) ?: + bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, + KEY_TYPE_QUOTA_PREALLOC); + if (unlikely(ret)) + goto err_before_quota; + + ret = bch2_trans_commit(&trans, NULL, &journal_seq, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); - if (ret == -EINTR) - goto retry; - if (unlikely(ret)) + if (unlikely(ret)) { + bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, + KEY_TYPE_QUOTA_WARN); +err_before_quota: + if (ret == -EINTR) + goto retry; goto err_trans; + } if (!tmpfile) { bch2_inode_update_after_write(c, dir, &dir_u, @@ -444,7 +346,7 @@ retry: * We raced, another process pulled the new inode into cache * before us: */ - old->ei_journal_seq = inode->ei_journal_seq; + journal_seq_copy(old, journal_seq); make_bad_inode(&inode->v); iput(&inode->v); @@ -458,7 +360,7 @@ retry: } bch2_trans_exit(&trans); -out: +err: posix_acl_release(default_acl); posix_acl_release(acl); return inode; @@ -469,10 +371,8 @@ err_trans: bch2_trans_exit(&trans); make_bad_inode(&inode->v); iput(&inode->v); -err: - bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, KEY_TYPE_QUOTA_WARN); inode = ERR_PTR(ret); - goto out; + goto err; } /* methods */ @@ -515,40 +415,23 @@ static int __bch2_link(struct bch_fs *c, struct dentry *dentry) { struct btree_trans trans; - struct btree_iter *inode_iter; struct bch_inode_unpacked inode_u; int ret; mutex_lock(&inode->ei_update_lock); bch2_trans_init(&trans, c, 4, 1024); -retry: - bch2_trans_begin(&trans); - ret = __bch2_dirent_create(&trans, dir->v.i_ino, - &dir->ei_str_hash, - mode_to_type(inode->v.i_mode), - &dentry->d_name, - inode->v.i_ino, - BCH_HASH_SET_MUST_CREATE); - if (ret) - goto err; - inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(inode_iter); - if (ret) - goto err; - - inode_u.bi_ctime = bch2_current_time(c); - bch2_inode_nlink_inc(&inode_u); - - ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, - &inode->ei_journal_seq, - BTREE_INSERT_ATOMIC| - BTREE_INSERT_NOUNLOCK); -err: - if (ret == -EINTR) - goto retry; + do { + bch2_trans_begin(&trans); + ret = bch2_link_trans(&trans, + dir->v.i_ino, + inode->v.i_ino, &inode_u, + &dentry->d_name) ?: + bch2_trans_commit(&trans, NULL, + &inode->ei_journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK); + } while (ret == -EINTR); if (likely(!ret)) bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME); @@ -582,60 +465,36 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) struct bch_fs *c = vdir->i_sb->s_fs_info; struct bch_inode_info *dir = to_bch_ei(vdir); struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); - struct btree_iter *dir_iter, *inode_iter; struct bch_inode_unpacked dir_u, inode_u; struct btree_trans trans; int ret; bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); bch2_trans_init(&trans, c, 4, 1024); -retry: - bch2_trans_begin(&trans); - - ret = __bch2_dirent_delete(&trans, dir->v.i_ino, - &dir->ei_str_hash, - &dentry->d_name); - if (ret) - goto btree_err; - - dir_iter = bch2_inode_peek(&trans, &dir_u, dir->v.i_ino, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(dir_iter); - if (ret) - goto btree_err; - - inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(inode_iter); - if (ret) - goto btree_err; - - dir_u.bi_mtime = dir_u.bi_ctime = inode_u.bi_ctime = - bch2_current_time(c); - - dir_u.bi_nlink -= S_ISDIR(inode_u.bi_mode); - bch2_inode_nlink_dec(&inode_u); - ret = bch2_inode_write(&trans, dir_iter, &dir_u) ?: - bch2_inode_write(&trans, inode_iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, - &dir->ei_journal_seq, - BTREE_INSERT_ATOMIC| - BTREE_INSERT_NOUNLOCK| - BTREE_INSERT_NOFAIL); -btree_err: - if (ret == -EINTR) - goto retry; - if (ret) - goto err; - - journal_seq_copy(inode, dir->ei_journal_seq); + do { + bch2_trans_begin(&trans); + + ret = bch2_unlink_trans(&trans, + dir->v.i_ino, &dir_u, + &inode_u, &dentry->d_name) ?: + bch2_trans_commit(&trans, NULL, + &dir->ei_journal_seq, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOUNLOCK| + BTREE_INSERT_NOFAIL); + } while (ret == -EINTR); + + if (likely(!ret)) { + BUG_ON(inode_u.bi_inum != inode->v.i_ino); + + journal_seq_copy(inode, dir->ei_journal_seq); + bch2_inode_update_after_write(c, dir, &dir_u, + ATTR_MTIME|ATTR_CTIME); + bch2_inode_update_after_write(c, inode, &inode_u, + ATTR_MTIME); + } - bch2_inode_update_after_write(c, dir, &dir_u, - ATTR_MTIME|ATTR_CTIME); - bch2_inode_update_after_write(c, inode, &inode_u, - ATTR_MTIME); -err: bch2_trans_exit(&trans); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); @@ -693,11 +552,6 @@ static int bch2_mkdir(struct mnt_idmap *idmap, static int bch2_rmdir(struct inode *vdir, struct dentry *dentry) { - struct bch_fs *c = vdir->i_sb->s_fs_info; - - if (bch2_empty_dir(c, dentry->d_inode->i_ino)) - return -ENOTEMPTY; - return bch2_unlink(vdir, dentry); } @@ -715,99 +569,31 @@ static int bch2_mknod(struct mnt_idmap *idmap, return 0; } -struct rename_info { - u64 now; - struct bch_inode_info *src_dir; - struct bch_inode_info *dst_dir; - struct bch_inode_info *src_inode; - struct bch_inode_info *dst_inode; - enum bch_rename_mode mode; -}; - -static int inode_update_for_rename_fn(struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct rename_info *info = p; - int ret; - - if (inode == info->src_dir) { - bi->bi_nlink -= S_ISDIR(info->src_inode->v.i_mode); - bi->bi_nlink += info->dst_inode && - S_ISDIR(info->dst_inode->v.i_mode) && - info->mode == BCH_RENAME_EXCHANGE; - } - - if (inode == info->dst_dir) { - bi->bi_nlink += S_ISDIR(info->src_inode->v.i_mode); - bi->bi_nlink -= info->dst_inode && - S_ISDIR(info->dst_inode->v.i_mode); - } - - if (inode == info->src_inode) { - ret = bch2_reinherit_attrs_fn(inode, bi, info->dst_dir); - - BUG_ON(!ret && S_ISDIR(info->src_inode->v.i_mode)); - } - - if (inode == info->dst_inode && - info->mode == BCH_RENAME_EXCHANGE) { - ret = bch2_reinherit_attrs_fn(inode, bi, info->src_dir); - - BUG_ON(!ret && S_ISDIR(info->dst_inode->v.i_mode)); - } - - if (inode == info->dst_inode && - info->mode == BCH_RENAME_OVERWRITE) { - BUG_ON(bi->bi_nlink && - S_ISDIR(info->dst_inode->v.i_mode)); - - bch2_inode_nlink_dec(bi); - } - - if (inode == info->src_dir || - inode == info->dst_dir) - bi->bi_mtime = info->now; - bi->bi_ctime = info->now; - - return 0; -} - static int bch2_rename2(struct mnt_idmap *idmap, struct inode *src_vdir, struct dentry *src_dentry, struct inode *dst_vdir, struct dentry *dst_dentry, unsigned flags) { struct bch_fs *c = src_vdir->i_sb->s_fs_info; - struct rename_info i = { - .src_dir = to_bch_ei(src_vdir), - .dst_dir = to_bch_ei(dst_vdir), - .src_inode = to_bch_ei(src_dentry->d_inode), - .dst_inode = to_bch_ei(dst_dentry->d_inode), - .mode = flags & RENAME_EXCHANGE - ? BCH_RENAME_EXCHANGE - : dst_dentry->d_inode - ? BCH_RENAME_OVERWRITE : BCH_RENAME, - }; - struct btree_trans trans; + struct bch_inode_info *src_dir = to_bch_ei(src_vdir); + struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir); + struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode); + struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode); struct bch_inode_unpacked dst_dir_u, src_dir_u; struct bch_inode_unpacked src_inode_u, dst_inode_u; + struct btree_trans trans; + enum bch_rename_mode mode = flags & RENAME_EXCHANGE + ? BCH_RENAME_EXCHANGE + : dst_dentry->d_inode + ? BCH_RENAME_OVERWRITE : BCH_RENAME; u64 journal_seq = 0; int ret; if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE)) return -EINVAL; - if (i.mode == BCH_RENAME_OVERWRITE) { - if (S_ISDIR(i.src_inode->v.i_mode) != - S_ISDIR(i.dst_inode->v.i_mode)) - return -ENOTDIR; - - if (S_ISDIR(i.src_inode->v.i_mode) && - bch2_empty_dir(c, i.dst_inode->v.i_ino)) - return -ENOTEMPTY; - - ret = filemap_write_and_wait_range(i.src_inode->v.i_mapping, + if (mode == BCH_RENAME_OVERWRITE) { + ret = filemap_write_and_wait_range(src_inode->v.i_mapping, 0, LLONG_MAX); if (ret) return ret; @@ -816,37 +602,24 @@ static int bch2_rename2(struct mnt_idmap *idmap, bch2_trans_init(&trans, c, 8, 2048); bch2_lock_inodes(INODE_UPDATE_LOCK, - i.src_dir, - i.dst_dir, - i.src_inode, - i.dst_inode); - - if (S_ISDIR(i.src_inode->v.i_mode) && - inode_attrs_changing(i.dst_dir, i.src_inode)) { - ret = -EXDEV; - goto err; - } - - if (i.mode == BCH_RENAME_EXCHANGE && - S_ISDIR(i.dst_inode->v.i_mode) && - inode_attrs_changing(i.src_dir, i.dst_inode)) { - ret = -EXDEV; - goto err; - } - - if (inode_attr_changing(i.dst_dir, i.src_inode, Inode_opt_project)) { - ret = bch2_fs_quota_transfer(c, i.src_inode, - i.dst_dir->ei_qid, + src_dir, + dst_dir, + src_inode, + dst_inode); + + if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) { + ret = bch2_fs_quota_transfer(c, src_inode, + dst_dir->ei_qid, 1 << QTYP_PRJ, KEY_TYPE_QUOTA_PREALLOC); if (ret) goto err; } - if (i.mode == BCH_RENAME_EXCHANGE && - inode_attr_changing(i.src_dir, i.dst_inode, Inode_opt_project)) { - ret = bch2_fs_quota_transfer(c, i.dst_inode, - i.src_dir->ei_qid, + if (mode == BCH_RENAME_EXCHANGE && + inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) { + ret = bch2_fs_quota_transfer(c, dst_inode, + src_dir->ei_qid, 1 << QTYP_PRJ, KEY_TYPE_QUOTA_PREALLOC); if (ret) @@ -855,24 +628,14 @@ static int bch2_rename2(struct mnt_idmap *idmap, retry: bch2_trans_begin(&trans); - i.now = bch2_current_time(c); - - ret = bch2_dirent_rename(&trans, - i.src_dir, &src_dentry->d_name, - i.dst_dir, &dst_dentry->d_name, - i.mode) ?: - bch2_write_inode_trans(&trans, i.src_dir, &src_dir_u, - inode_update_for_rename_fn, &i) ?: - (i.src_dir != i.dst_dir - ? bch2_write_inode_trans(&trans, i.dst_dir, &dst_dir_u, - inode_update_for_rename_fn, &i) - : 0 ) ?: - bch2_write_inode_trans(&trans, i.src_inode, &src_inode_u, - inode_update_for_rename_fn, &i) ?: - (i.dst_inode - ? bch2_write_inode_trans(&trans, i.dst_inode, &dst_inode_u, - inode_update_for_rename_fn, &i) - : 0 ) ?: + ret = bch2_rename_trans(&trans, + src_dir->v.i_ino, &src_dir_u, + dst_dir->v.i_ino, &dst_dir_u, + &src_inode_u, + &dst_inode_u, + &src_dentry->d_name, + &dst_dentry->d_name, + mode) ?: bch2_trans_commit(&trans, NULL, &journal_seq, BTREE_INSERT_ATOMIC| @@ -882,43 +645,47 @@ retry: if (unlikely(ret)) goto err; - bch2_inode_update_after_write(c, i.src_dir, &src_dir_u, + BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum); + BUG_ON(dst_inode && + dst_inode->v.i_ino != dst_inode_u.bi_inum); + + bch2_inode_update_after_write(c, src_dir, &src_dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(i.src_dir, journal_seq); + journal_seq_copy(src_dir, journal_seq); - if (i.src_dir != i.dst_dir) { - bch2_inode_update_after_write(c, i.dst_dir, &dst_dir_u, + if (src_dir != dst_dir) { + bch2_inode_update_after_write(c, dst_dir, &dst_dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(i.dst_dir, journal_seq); + journal_seq_copy(dst_dir, journal_seq); } - journal_seq_copy(i.src_inode, journal_seq); - if (i.dst_inode) - journal_seq_copy(i.dst_inode, journal_seq); - - bch2_inode_update_after_write(c, i.src_inode, &src_inode_u, + bch2_inode_update_after_write(c, src_inode, &src_inode_u, ATTR_CTIME); - if (i.dst_inode) - bch2_inode_update_after_write(c, i.dst_inode, &dst_inode_u, + journal_seq_copy(src_inode, journal_seq); + + if (dst_inode) { + bch2_inode_update_after_write(c, dst_inode, &dst_inode_u, ATTR_CTIME); + journal_seq_copy(dst_inode, journal_seq); + } err: bch2_trans_exit(&trans); - bch2_fs_quota_transfer(c, i.src_inode, - bch_qid(&i.src_inode->ei_inode), + bch2_fs_quota_transfer(c, src_inode, + bch_qid(&src_inode->ei_inode), 1 << QTYP_PRJ, KEY_TYPE_QUOTA_NOCHECK); - if (i.dst_inode) - bch2_fs_quota_transfer(c, i.dst_inode, - bch_qid(&i.dst_inode->ei_inode), + if (dst_inode) + bch2_fs_quota_transfer(c, dst_inode, + bch_qid(&dst_inode->ei_inode), 1 << QTYP_PRJ, KEY_TYPE_QUOTA_NOCHECK); bch2_unlock_inodes(INODE_UPDATE_LOCK, - i.src_dir, - i.dst_dir, - i.src_inode, - i.dst_inode); + src_dir, + dst_dir, + src_inode, + dst_inode); return ret; } @@ -1251,9 +1018,13 @@ static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence) static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) { - struct bch_fs *c = file_inode(file)->i_sb->s_fs_info; + struct bch_inode_info *inode = file_bch_inode(file); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + + if (!dir_emit_dots(file, ctx)) + return 0; - return bch2_readdir(c, file, ctx); + return bch2_readdir(c, inode->v.i_ino, ctx); } static const struct file_operations bch_file_operations = { diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index c3ee9c17064f..b3a2993dd9bc 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -103,11 +103,6 @@ static inline struct bch_inode_info *file_bch_inode(struct file *file) return to_bch_ei(file_inode(file)); } -static inline u8 mode_to_type(umode_t mode) -{ - return (mode >> 12) & 15; -} - static inline bool inode_attr_changing(struct bch_inode_info *dir, struct bch_inode_info *inode, enum inode_opt_id id) @@ -162,17 +157,9 @@ void bch2_inode_update_after_write(struct bch_fs *, struct bch_inode_info *, struct bch_inode_unpacked *, unsigned); -int __must_check bch2_write_inode_trans(struct btree_trans *, - struct bch_inode_info *, - struct bch_inode_unpacked *, - inode_set_fn, void *); int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *, inode_set_fn, void *, unsigned); -int bch2_reinherit_attrs_fn(struct bch_inode_info *, - struct bch_inode_unpacked *, - void *); - void bch2_vfs_exit(void); int bch2_vfs_init(void); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index c5540536f47c..5acf1fb64543 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -4,7 +4,7 @@ #include "btree_update.h" #include "dirent.h" #include "error.h" -#include "fs.h" +#include "fs-common.h" #include "fsck.h" #include "inode.h" #include "keylist.h" @@ -80,9 +80,7 @@ static int reattach_inode(struct bch_fs *c, struct bch_inode_unpacked *lostfound_inode, u64 inum) { - struct bch_hash_info lostfound_hash_info = - bch2_hash_info_init(c, lostfound_inode); - struct bkey_inode_buf packed; + struct bch_inode_unpacked inode_u; char name_buf[20]; struct qstr name; int ret; @@ -90,30 +88,14 @@ static int reattach_inode(struct bch_fs *c, snprintf(name_buf, sizeof(name_buf), "%llu", inum); name = (struct qstr) QSTR(name_buf); - lostfound_inode->bi_nlink++; - - bch2_inode_pack(&packed, lostfound_inode); - - ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i, - NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW); - if (ret) { - bch_err(c, "error %i reattaching inode %llu while updating lost+found", - ret, inum); - return ret; - } + ret = bch2_trans_do(c, NULL, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_LAZY_RW, + bch2_link_trans(&trans, lostfound_inode->bi_inum, + inum, &inode_u, &name)); + if (ret) + bch_err(c, "error %i reattaching inode %llu", ret, inum); - ret = bch2_dirent_create(c, lostfound_inode->bi_inum, - &lostfound_hash_info, - DT_DIR, &name, inum, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW); - if (ret) { - bch_err(c, "error %i reattaching inode %llu while creating new dirent", - ret, inum); - return ret; - } return ret; } @@ -758,7 +740,7 @@ static int check_root(struct bch_fs *c, struct bch_inode_unpacked *root_inode) fsck_err: return ret; create_root: - bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, + bch2_inode_init(c, root_inode, 0, 0, S_IFDIR|0755, 0, NULL); root_inode->bi_inum = BCACHEFS_ROOT_INO; @@ -778,7 +760,6 @@ static int check_lostfound(struct bch_fs *c, struct qstr lostfound = QSTR("lost+found"); struct bch_hash_info root_hash_info = bch2_hash_info_init(c, root_inode); - struct bkey_inode_buf packed; u64 inum; int ret; @@ -806,33 +787,20 @@ static int check_lostfound(struct bch_fs *c, fsck_err: return ret; create_lostfound: - root_inode->bi_nlink++; - - bch2_inode_pack(&packed, root_inode); - - ret = bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i, - NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW); - if (ret) - return ret; - - bch2_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, - 0, root_inode); - - ret = bch2_inode_create(c, lostfound_inode, BLOCKDEV_INODE_MAX, 0, - &c->unused_inode_hint); + bch2_inode_init_early(c, lostfound_inode); + + ret = bch2_trans_do(c, NULL, + BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + bch2_create_trans(&trans, + BCACHEFS_ROOT_INO, root_inode, + lostfound_inode, &lostfound, + 0, 0, S_IFDIR|0755, 0, NULL, NULL)); if (ret) - return ret; - - ret = bch2_dirent_create(c, BCACHEFS_ROOT_INO, &root_hash_info, DT_DIR, - &lostfound, lostfound_inode->bi_inum, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW); - if (ret) - return ret; + bch_err(c, "error creating lost+found: %i", ret); - return 0; + return ret; } struct inode_bitmap { diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 3dc46faaebbc..aeae536b39f1 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -297,11 +297,9 @@ void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c, pr_buf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation)); } -void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, - uid_t uid, gid_t gid, umode_t mode, dev_t rdev, - struct bch_inode_unpacked *parent) +void bch2_inode_init_early(struct bch_fs *c, + struct bch_inode_unpacked *inode_u) { - s64 now = bch2_current_time(c); enum bch_str_hash_type str_hash = bch2_str_hash_opt_to_type(c, c->opts.str_hash); @@ -311,7 +309,12 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, inode_u->bi_flags |= str_hash << INODE_STR_HASH_OFFSET; get_random_bytes(&inode_u->bi_hash_seed, sizeof(inode_u->bi_hash_seed)); +} +void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now, + uid_t uid, gid_t gid, umode_t mode, dev_t rdev, + struct bch_inode_unpacked *parent) +{ inode_u->bi_mode = mode; inode_u->bi_uid = uid; inode_u->bi_gid = gid; @@ -321,6 +324,12 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, inode_u->bi_ctime = now; inode_u->bi_otime = now; + if (parent && parent->bi_mode & S_ISGID) { + inode_u->bi_gid = parent->bi_gid; + if (S_ISDIR(mode)) + inode_u->bi_mode |= S_ISGID; + } + if (parent) { #define x(_name, ...) inode_u->bi_##_name = parent->bi_##_name; BCH_INODE_OPTS() @@ -328,6 +337,15 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, } } +void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, + uid_t uid, gid_t gid, umode_t mode, dev_t rdev, + struct bch_inode_unpacked *parent) +{ + bch2_inode_init_early(c, inode_u); + bch2_inode_init_late(inode_u, bch2_current_time(c), + uid, gid, mode, rdev, parent); +} + static inline u32 bkey_generation(struct bkey_s_c k) { switch (k.k->type) { @@ -340,9 +358,9 @@ static inline u32 bkey_generation(struct bkey_s_c k) } } -int __bch2_inode_create(struct btree_trans *trans, - struct bch_inode_unpacked *inode_u, - u64 min, u64 max, u64 *hint) +int bch2_inode_create(struct btree_trans *trans, + struct bch_inode_unpacked *inode_u, + u64 min, u64 max, u64 *hint) { struct bch_fs *c = trans->c; struct bkey_inode_buf *inode_p; @@ -408,13 +426,6 @@ out: return -ENOSPC; } -int bch2_inode_create(struct bch_fs *c, struct bch_inode_unpacked *inode_u, - u64 min, u64 max, u64 *hint) -{ - return bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC, - __bch2_inode_create(&trans, inode_u, min, max, hint)); -} - int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) { struct btree_trans trans; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index c5626c668639..b32c0a47c25d 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -51,14 +51,17 @@ struct btree_iter *bch2_inode_peek(struct btree_trans *, int bch2_inode_write(struct btree_trans *, struct btree_iter *, struct bch_inode_unpacked *); +void bch2_inode_init_early(struct bch_fs *, + struct bch_inode_unpacked *); +void bch2_inode_init_late(struct bch_inode_unpacked *, u64, + uid_t, gid_t, umode_t, dev_t, + struct bch_inode_unpacked *); void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *, uid_t, gid_t, umode_t, dev_t, struct bch_inode_unpacked *); -int __bch2_inode_create(struct btree_trans *, - struct bch_inode_unpacked *, - u64, u64, u64 *); -int bch2_inode_create(struct bch_fs *, struct bch_inode_unpacked *, +int bch2_inode_create(struct btree_trans *, + struct bch_inode_unpacked *, u64, u64, u64 *); int bch2_inode_rm(struct bch_fs *, u64); @@ -108,6 +111,11 @@ static inline u64 bch2_inode_opt_get(struct bch_inode_unpacked *inode, } } +static inline u8 mode_to_type(umode_t mode) +{ + return (mode >> 12) & 15; +} + /* i_nlink: */ static inline unsigned nlink_bias(umode_t mode) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 2e880955a07c..e6015bc13e9b 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -10,6 +10,7 @@ #include "dirent.h" #include "ec.h" #include "error.h" +#include "fs-common.h" #include "fsck.h" #include "journal_io.h" #include "journal_reclaim.h" @@ -952,7 +953,6 @@ int bch2_fs_initialize(struct bch_fs *c) { struct bch_inode_unpacked root_inode, lostfound_inode; struct bkey_inode_buf packed_inode; - struct bch_hash_info root_hash_info; struct qstr lostfound = QSTR("lost+found"); const char *err = "cannot allocate memory"; struct bch_dev *ca; @@ -997,7 +997,6 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL); root_inode.bi_inum = BCACHEFS_ROOT_INO; - root_inode.bi_nlink++; /* lost+found */ bch2_inode_pack(&packed_inode, &root_inode); err = "error creating root directory"; @@ -1007,24 +1006,15 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; - bch2_inode_init(c, &lostfound_inode, 0, 0, - S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, - &root_inode); - lostfound_inode.bi_inum = BCACHEFS_ROOT_INO + 1; - bch2_inode_pack(&packed_inode, &lostfound_inode); + bch2_inode_init_early(c, &lostfound_inode); err = "error creating lost+found"; - ret = bch2_btree_insert(c, BTREE_ID_INODES, - &packed_inode.inode.k_i, - NULL, NULL, 0); - if (ret) - goto err; - - root_hash_info = bch2_hash_info_init(c, &root_inode); - - ret = bch2_dirent_create(c, BCACHEFS_ROOT_INO, &root_hash_info, DT_DIR, - &lostfound, lostfound_inode.bi_inum, NULL, - BTREE_INSERT_NOFAIL); + ret = bch2_trans_do(c, NULL, BTREE_INSERT_ATOMIC, + bch2_create_trans(&trans, BCACHEFS_ROOT_INO, + &root_inode, &lostfound_inode, + &lostfound, + 0, 0, 0755, 0, + NULL, NULL)); if (ret) goto err; -- cgit v1.2.3 From e0541a9346951c94dce4d65d88541a329adf0b76 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 9 Oct 2019 11:12:48 -0400 Subject: bcachefs: Kill some dependencies on ei_inode Moving bch2_extent_update() to io.c will be greatly simplified if we no longer have to keep ei_inode.bi_size/bi_sectors up to date. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 92 ++++++++++++++++++++++++++++++++++++----------------- fs/bcachefs/fs.c | 28 ---------------- 2 files changed, 62 insertions(+), 58 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index de3c6f8c4b04..9ecefd95df6e 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -2453,14 +2453,18 @@ static int bch2_truncate_page(struct bch_inode_info *inode, loff_t from) from, round_up(from, PAGE_SIZE)); } -static int bch2_extend(struct bch_inode_info *inode, struct iattr *iattr) +static int bch2_extend(struct bch_inode_info *inode, + struct bch_inode_unpacked *inode_u, + struct iattr *iattr) { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; int ret; - ret = filemap_write_and_wait_range(mapping, - inode->ei_inode.bi_size, S64_MAX); + /* + * sync appends: + */ + ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX); if (ret) return ret; @@ -2501,19 +2505,31 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; + struct bch_inode_unpacked inode_u; + struct btree_trans trans; + struct btree_iter *iter; u64 new_i_size = iattr->ia_size; - bool shrink; int ret = 0; inode_dio_wait(&inode->v); bch2_pagecache_block_get(&inode->ei_pagecache_lock); - BUG_ON(inode->v.i_size < inode->ei_inode.bi_size); + /* + * fetch current on disk i_size: inode is locked, i_size can only + * increase underneath us: + */ + bch2_trans_init(&trans, c, 0, 0); + iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, 0); + ret = PTR_ERR_OR_ZERO(iter); + bch2_trans_exit(&trans); + + if (ret) + goto err; - shrink = iattr->ia_size <= inode->v.i_size; + BUG_ON(inode->v.i_size < inode_u.bi_size); - if (!shrink) { - ret = bch2_extend(inode, iattr); + if (iattr->ia_size > inode->v.i_size) { + ret = bch2_extend(inode, &inode_u, iattr); goto err; } @@ -2531,9 +2547,9 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) * userspace has to redirty it and call .mkwrite -> set_page_dirty * again to allocate the part of the page that was extended. */ - if (iattr->ia_size > inode->ei_inode.bi_size) + if (iattr->ia_size > inode_u.bi_size) ret = filemap_write_and_wait_range(mapping, - inode->ei_inode.bi_size, + inode_u.bi_size, iattr->ia_size - 1); else if (iattr->ia_size & (PAGE_SIZE - 1)) ret = filemap_write_and_wait_range(mapping, @@ -2935,33 +2951,49 @@ bkey_err: if (ret) goto err; } - bch2_trans_unlock(&trans); - if (!(mode & FALLOC_FL_KEEP_SIZE) && - end > inode->v.i_size) { - i_size_write(&inode->v, end); + /* + * Do we need to extend the file? + * + * If we zeroed up to the end of the file, we dropped whatever writes + * were going to write out the current i_size, so we have to extend + * manually even if FL_KEEP_SIZE was set: + */ + if (end >= inode->v.i_size && + (!(mode & FALLOC_FL_KEEP_SIZE) || + (mode & FALLOC_FL_ZERO_RANGE))) { + struct btree_iter *inode_iter; + struct bch_inode_unpacked inode_u; + + do { + bch2_trans_begin(&trans); + inode_iter = bch2_inode_peek(&trans, &inode_u, + inode->v.i_ino, 0); + ret = PTR_ERR_OR_ZERO(inode_iter); + } while (ret == -EINTR); - mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode_size(c, inode, inode->v.i_size, 0); - mutex_unlock(&inode->ei_update_lock); - } + bch2_trans_unlock(&trans); + + if (ret) + goto err; - /* blech */ - if ((mode & FALLOC_FL_KEEP_SIZE) && - (mode & FALLOC_FL_ZERO_RANGE) && - inode->ei_inode.bi_size != inode->v.i_size) { - /* sync appends.. */ + /* + * Sync existing appends before extending i_size, + * as in bch2_extend(): + */ ret = filemap_write_and_wait_range(mapping, - inode->ei_inode.bi_size, S64_MAX); + inode_u.bi_size, S64_MAX); if (ret) goto err; - if (inode->ei_inode.bi_size != inode->v.i_size) { - mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode_size(c, inode, - inode->v.i_size, 0); - mutex_unlock(&inode->ei_update_lock); - } + if (mode & FALLOC_FL_KEEP_SIZE) + end = inode->v.i_size; + else + i_size_write(&inode->v, end); + + mutex_lock(&inode->ei_update_lock); + ret = bch2_write_inode_size(c, inode, end, 0); + mutex_unlock(&inode->ei_update_lock); } err: bch2_trans_exit(&trans); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b19a2deed5c1..0042a825a698 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -99,34 +99,6 @@ void bch2_pagecache_block_get(struct pagecache_lock *lock) __pagecache_lock_get(lock, -1); } -/* - * I_SIZE_DIRTY requires special handling: - * - * To the recovery code, the flag means that there is stale data past i_size - * that needs to be deleted; it's used for implementing atomic appends and - * truncates. - * - * On append, we set I_SIZE_DIRTY before doing the write, then after the write - * we clear I_SIZE_DIRTY atomically with updating i_size to the new larger size - * that exposes the data we just wrote. - * - * On truncate, it's the reverse: We set I_SIZE_DIRTY atomically with setting - * i_size to the new smaller size, then we delete the data that we just made - * invisible, and then we clear I_SIZE_DIRTY. - * - * Because there can be multiple appends in flight at a time, we need a refcount - * (i_size_dirty_count) instead of manipulating the flag directly. Nonzero - * refcount means I_SIZE_DIRTY is set, zero means it's cleared. - * - * Because write_inode() can be called at any time, i_size_dirty_count means - * something different to the runtime code - it means to write_inode() "don't - * update i_size yet". - * - * We don't clear I_SIZE_DIRTY directly, we let write_inode() clear it when - * i_size_dirty_count is zero - but the reverse is not true, I_SIZE_DIRTY must - * be set explicitly. - */ - void bch2_inode_update_after_write(struct bch_fs *c, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, -- cgit v1.2.3 From 821a99b7ba6802d43f980a8312cd25694b7ea076 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 11 Oct 2019 15:14:36 -0400 Subject: bcachefs: Switch to .iterate_shared for readdir We definitely don't need an exclusive inode lock for readdir. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 45 +++++++++++++-------------------------------- 1 file changed, 13 insertions(+), 32 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 0042a825a698..65556993bbb9 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -367,12 +367,12 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, return d_splice_alias(vinode, dentry); } -static int bch2_create(struct mnt_idmap *idmap, - struct inode *vdir, struct dentry *dentry, - umode_t mode, bool excl) +static int bch2_mknod(struct mnt_idmap *idmap, + struct inode *vdir, struct dentry *dentry, + umode_t mode, dev_t rdev) { struct bch_inode_info *inode = - __bch2_create(idmap, to_bch_ei(vdir), dentry, mode|S_IFREG, 0, false); + __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev, false); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -381,6 +381,13 @@ static int bch2_create(struct mnt_idmap *idmap, return 0; } +static int bch2_create(struct mnt_idmap *idmap, + struct inode *vdir, struct dentry *dentry, + umode_t mode, bool excl) +{ + return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0); +} + static int __bch2_link(struct bch_fs *c, struct bch_inode_info *inode, struct bch_inode_info *dir, @@ -512,33 +519,7 @@ err: static int bch2_mkdir(struct mnt_idmap *idmap, struct inode *vdir, struct dentry *dentry, umode_t mode) { - struct bch_inode_info *inode = - __bch2_create(idmap, to_bch_ei(vdir), dentry, mode|S_IFDIR, 0, false); - - if (IS_ERR(inode)) - return PTR_ERR(inode); - - d_instantiate(dentry, &inode->v); - return 0; -} - -static int bch2_rmdir(struct inode *vdir, struct dentry *dentry) -{ - return bch2_unlink(vdir, dentry); -} - -static int bch2_mknod(struct mnt_idmap *idmap, - struct inode *vdir, struct dentry *dentry, - umode_t mode, dev_t rdev) -{ - struct bch_inode_info *inode = - __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev, false); - - if (IS_ERR(inode)) - return PTR_ERR(inode); - - d_instantiate(dentry, &inode->v); - return 0; + return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0); } static int bch2_rename2(struct mnt_idmap *idmap, @@ -1034,7 +1015,7 @@ static const struct inode_operations bch_dir_inode_operations = { .unlink = bch2_unlink, .symlink = bch2_symlink, .mkdir = bch2_mkdir, - .rmdir = bch2_rmdir, + .rmdir = bch2_unlink, .mknod = bch2_mknod, .rename = bch2_rename2, .getattr = bch2_getattr, -- cgit v1.2.3 From 4a1d8d3efcdedd0911941f236b2e3a6347f518c3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 6 Nov 2019 14:29:30 -0500 Subject: bcachefs: Fix setting of attributes mask in getattr Discovered by xfstests generic/553 Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 65556993bbb9..b241164f6f7e 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -775,10 +775,15 @@ static int bch2_getattr(struct mnt_idmap *idmap, if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE) stat->attributes |= STATX_ATTR_IMMUTABLE; + stat->attributes_mask |= STATX_ATTR_IMMUTABLE; + if (inode->ei_inode.bi_flags & BCH_INODE_APPEND) stat->attributes |= STATX_ATTR_APPEND; + stat->attributes_mask |= STATX_ATTR_APPEND; + if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP) stat->attributes |= STATX_ATTR_NODUMP; + stat->attributes_mask |= STATX_ATTR_NODUMP; return 0; } -- cgit v1.2.3 From 35189e09ab46785746df7007ed2a57ee78b56191 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 9 Nov 2019 16:01:15 -0500 Subject: bcachefs: bkey_on_stack This implements code for storing small bkeys on the stack and allocating out of a mempool if they're too big. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 2 + fs/bcachefs/bkey_on_stack.h | 35 +++++++++++++++++ fs/bcachefs/bkey_sort.c | 13 +++++-- fs/bcachefs/ec.c | 12 ++++-- fs/bcachefs/extents.c | 18 ++++++--- fs/bcachefs/fs-io.c | 92 +++++++++++++++++++++++++-------------------- fs/bcachefs/fs.c | 29 ++++++++------ fs/bcachefs/io.c | 63 +++++++++++++++++++------------ fs/bcachefs/migrate.c | 16 +++++--- fs/bcachefs/move.c | 10 +++-- fs/bcachefs/reflink.c | 17 ++++++--- fs/bcachefs/super.c | 2 + 12 files changed, 205 insertions(+), 104 deletions(-) create mode 100644 fs/bcachefs/bkey_on_stack.h (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index f8a040115fd1..344cf982124f 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -729,6 +729,8 @@ struct bch_fs { atomic64_t key_version; + mempool_t large_bkey_pool; + /* REBALANCE */ struct bch_fs_rebalance rebalance; diff --git a/fs/bcachefs/bkey_on_stack.h b/fs/bcachefs/bkey_on_stack.h new file mode 100644 index 000000000000..d4739038323f --- /dev/null +++ b/fs/bcachefs/bkey_on_stack.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BKEY_ON_STACK_H +#define _BCACHEFS_BKEY_ON_STACK_H + +#include "bcachefs.h" + +struct bkey_on_stack { + struct bkey_i *k; + u64 onstack[12]; +}; + +static inline void bkey_on_stack_realloc(struct bkey_on_stack *s, + struct bch_fs *c, unsigned u64s) +{ + if (s->k == (void *) s->onstack && + u64s > ARRAY_SIZE(s->onstack)) { + s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS); + memcpy(s->k, s->onstack, sizeof(s->onstack)); + } +} + +static inline void bkey_on_stack_init(struct bkey_on_stack *s) +{ + s->k = (void *) s->onstack; +} + +static inline void bkey_on_stack_exit(struct bkey_on_stack *s, + struct bch_fs *c) +{ + if (s->k != (void *) s->onstack) + mempool_free(s->k, &c->large_bkey_pool); + s->k = NULL; +} + +#endif /* _BCACHEFS_BKEY_ON_STACK_H */ diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c index 2cac269b386f..f5c0507ad79d 100644 --- a/fs/bcachefs/bkey_sort.c +++ b/fs/bcachefs/bkey_sort.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "bkey_on_stack.h" #include "bkey_sort.h" #include "bset.h" #include "extents.h" @@ -292,8 +293,10 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bkey l_unpacked, r_unpacked; struct bkey_s l, r; struct btree_nr_keys nr; + struct bkey_on_stack split; memset(&nr, 0, sizeof(nr)); + bkey_on_stack_init(&split); heap_resort(iter, extent_sort_cmp, NULL); @@ -349,13 +352,13 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, extent_sort_sift(iter, b, _r - iter->data); } else if (bkey_cmp(l.k->p, r.k->p) > 0) { - BKEY_PADDED(k) tmp; + bkey_on_stack_realloc(&split, c, l.k->u64s); /* * r wins, but it overlaps in the middle of l - split l: */ - bkey_reassemble(&tmp.k, l.s_c); - bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k); + bkey_reassemble(split.k, l.s_c); + bch2_cut_back(bkey_start_pos(r.k), &split.k->k); __bch2_cut_front(r.k->p, l); extent_save(b, lk, l.k); @@ -363,7 +366,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, extent_sort_sift(iter, b, 0); extent_sort_append(c, f, &nr, dst->start, - &prev, bkey_i_to_s(&tmp.k)); + &prev, bkey_i_to_s(split.k)); } else { bch2_cut_back(bkey_start_pos(r.k), l.k); extent_save(b, lk, l.k); @@ -373,6 +376,8 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, extent_sort_advance_prev(f, &nr, dst->start, &prev); dst->u64s = cpu_to_le16((u64 *) prev - dst->_data); + + bkey_on_stack_exit(&split, c); return nr; } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index f32b8e6bf2ce..b24f867520c3 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -4,6 +4,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "bkey_on_stack.h" #include "bset.h" #include "btree_gc.h" #include "btree_update.h" @@ -777,9 +778,10 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, struct bkey_s_c k; struct bkey_s_extent e; struct bch_extent_ptr *ptr; - BKEY_PADDED(k) tmp; + struct bkey_on_stack sk; int ret = 0, dev, idx; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -804,8 +806,9 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, dev = s->key.v.ptrs[idx].dev; - bkey_reassemble(&tmp.k, k); - e = bkey_i_to_s_extent(&tmp.k); + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + e = bkey_i_to_s_extent(sk.k); extent_for_each_ptr(e, ptr) if (ptr->dev != dev) @@ -816,7 +819,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, extent_stripe_ptr_add(e, s, ptr, idx); - bch2_trans_update(&trans, iter, &tmp.k); + bch2_trans_update(&trans, iter, sk.k); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| @@ -829,6 +832,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, } bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); return ret; } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index b12798103763..46eeaa574e86 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -8,6 +8,7 @@ #include "bcachefs.h" #include "bkey_methods.h" +#include "bkey_on_stack.h" #include "btree_gc.h" #include "btree_update.h" #include "btree_update_interior.h" @@ -1132,7 +1133,11 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter, break; } case BCH_EXTENT_OVERLAP_MIDDLE: { - BKEY_PADDED(k) split; + struct bkey_on_stack split; + + bkey_on_stack_init(&split); + bkey_on_stack_realloc(&split, c, k.k->u64s); + /* * The insert key falls 'in the middle' of k * The insert key splits k in 3: @@ -1147,18 +1152,19 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter, * modify k _before_ doing the insert (which will move * what k points to) */ - bkey_reassemble(&split.k, k.s_c); - split.k.k.needs_whiteout |= bkey_written(l->b, _k); + bkey_reassemble(split.k, k.s_c); + split.k->k.needs_whiteout |= bkey_written(l->b, _k); - bch2_cut_back(bkey_start_pos(&insert->k), &split.k.k); - BUG_ON(bkey_deleted(&split.k.k)); + bch2_cut_back(bkey_start_pos(&insert->k), &split.k->k); + BUG_ON(bkey_deleted(&split.k->k)); __bch2_cut_front(insert->k.p, k); BUG_ON(bkey_deleted(k.k)); extent_save(l->b, _k, k.k); bch2_btree_iter_fix_key_modified(iter, l->b, _k); - extent_bset_insert(c, iter, &split.k); + extent_bset_insert(c, iter, split.k); + bkey_on_stack_exit(&split, c); break; } } diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 657559c2db14..478630fdf643 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -3,6 +3,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "bkey_on_stack.h" #include "btree_update.h" #include "buckets.h" #include "clock.h" @@ -691,6 +692,18 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k) } } +static bool extent_partial_reads_expensive(struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *i; + + bkey_for_each_crc(k.k, ptrs, crc, i) + if (crc.csum_type || crc.compression_type) + return true; + return false; +} + static void readpage_bio_extend(struct readpages_iter *iter, struct bio *bio, unsigned sectors_this_extent, @@ -744,15 +757,17 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, struct readpages_iter *readpages_iter) { struct bch_fs *c = trans->c; + struct bkey_on_stack sk; int flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE; int ret = 0; rbio->c = c; rbio->start_time = local_clock(); + + bkey_on_stack_init(&sk); retry: while (1) { - BKEY_PADDED(k) tmp; struct bkey_s_c k; unsigned bytes, sectors, offset_into_extent; @@ -764,15 +779,16 @@ retry: if (ret) break; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + k = bkey_i_to_s_c(sk.k); offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; ret = bch2_read_indirect_extent(trans, - &offset_into_extent, &tmp.k); + &offset_into_extent, sk.k); if (ret) break; @@ -780,22 +796,9 @@ retry: bch2_trans_unlock(trans); - if (readpages_iter) { - bool want_full_extent = false; - - if (bkey_extent_is_data(k.k)) { - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *i; - struct extent_ptr_decoded p; - - bkey_for_each_ptr_decode(k.k, ptrs, p, i) - want_full_extent |= ((p.crc.csum_type != 0) | - (p.crc.compression_type != 0)); - } - - readpage_bio_extend(readpages_iter, &rbio->bio, - sectors, want_full_extent); - } + if (readpages_iter) + readpage_bio_extend(readpages_iter, &rbio->bio, sectors, + extent_partial_reads_expensive(k)); bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; swap(rbio->bio.bi_iter.bi_size, bytes); @@ -809,7 +812,7 @@ retry: bch2_read_extent(c, rbio, k, offset_into_extent, flags); if (flags & BCH_READ_LAST_FRAGMENT) - return; + break; swap(rbio->bio.bi_iter.bi_size, bytes); bio_advance(&rbio->bio, bytes); @@ -818,8 +821,12 @@ retry: if (ret == -EINTR) goto retry; - bcache_io_error(c, &rbio->bio, "btree IO error %i", ret); - bio_endio(&rbio->bio); + if (ret) { + bcache_io_error(c, &rbio->bio, "btree IO error %i", ret); + bio_endio(&rbio->bio); + } + + bkey_on_stack_exit(&sk, c); } void bch2_readahead(struct readahead_control *ractl) @@ -2353,6 +2360,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; + struct bkey_on_stack copy; struct btree_trans trans; struct btree_iter *src, *dst, *del = NULL; loff_t shift, new_size; @@ -2362,6 +2370,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, if ((offset | len) & (block_bytes(c) - 1)) return -EINVAL; + bkey_on_stack_init(©); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256); /* @@ -2430,7 +2439,6 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, while (1) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); - BKEY_PADDED(k) copy; struct bkey_i delete; struct bkey_s_c k; struct bpos next_pos; @@ -2455,34 +2463,35 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0) break; reassemble: - bkey_reassemble(©.k, k); + bkey_on_stack_realloc(©, c, k.k->u64s); + bkey_reassemble(copy.k, k); if (insert && bkey_cmp(bkey_start_pos(k.k), move_pos) < 0) { - bch2_cut_front(move_pos, ©.k); - bch2_btree_iter_set_pos(src, bkey_start_pos(©.k.k)); + bch2_cut_front(move_pos, copy.k); + bch2_btree_iter_set_pos(src, bkey_start_pos(©.k->k)); } - copy.k.k.p.offset += shift >> 9; - bch2_btree_iter_set_pos(dst, bkey_start_pos(©.k.k)); + copy.k->k.p.offset += shift >> 9; + bch2_btree_iter_set_pos(dst, bkey_start_pos(©.k->k)); - ret = bch2_extent_atomic_end(dst, ©.k, &atomic_end); + ret = bch2_extent_atomic_end(dst, copy.k, &atomic_end); if (ret) goto bkey_err; - if (bkey_cmp(atomic_end, copy.k.k.p)) { + if (bkey_cmp(atomic_end, copy.k->k.p)) { if (insert) { move_pos = atomic_end; move_pos.offset -= shift >> 9; goto reassemble; } else { - bch2_cut_back(atomic_end, ©.k.k); + bch2_cut_back(atomic_end, ©.k->k); } } bkey_init(&delete.k); delete.k.p = src->pos; - bch2_key_resize(&delete.k, copy.k.k.size); + bch2_key_resize(&delete.k, copy.k->k.size); next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; @@ -2495,12 +2504,12 @@ reassemble: * by the triggers machinery: */ if (insert && - bkey_cmp(bkey_start_pos(©.k.k), delete.k.p) < 0) { - bch2_cut_back(bkey_start_pos(©.k.k), &delete.k); + bkey_cmp(bkey_start_pos(©.k->k), delete.k.p) < 0) { + bch2_cut_back(bkey_start_pos(©.k->k), &delete.k); } else if (!insert && - bkey_cmp(copy.k.k.p, + bkey_cmp(copy.k->k.p, bkey_start_pos(&delete.k)) > 0) { - bch2_cut_front(copy.k.k.p, &delete); + bch2_cut_front(copy.k->k.p, &delete); del = bch2_trans_copy_iter(&trans, src); BUG_ON(IS_ERR_OR_NULL(del)); @@ -2509,10 +2518,10 @@ reassemble: bkey_start_pos(&delete.k)); } - bch2_trans_update(&trans, dst, ©.k); + bch2_trans_update(&trans, dst, copy.k); bch2_trans_update(&trans, del ?: src, &delete); - if (copy.k.k.size == k.k->size) { + if (copy.k->k.size == k.k->size) { /* * If we're moving the entire extent, we can skip * running triggers: @@ -2521,10 +2530,10 @@ reassemble: } else { /* We might end up splitting compressed extents: */ unsigned nr_ptrs = - bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(©.k)); + bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(copy.k)); ret = bch2_disk_reservation_get(c, &disk_res, - copy.k.k.size, nr_ptrs, + copy.k->k.size, nr_ptrs, BCH_DISK_RESERVATION_NOFAIL); BUG_ON(ret); } @@ -2559,6 +2568,7 @@ bkey_err: } err: bch2_trans_exit(&trans); + bkey_on_stack_exit(©, c); bch2_pagecache_block_put(&inode->ei_pagecache_lock); inode_unlock(&inode->v); return ret; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b241164f6f7e..e8cdae3c114b 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -3,6 +3,7 @@ #include "bcachefs.h" #include "acl.h" +#include "bkey_on_stack.h" #include "btree_update.h" #include "buckets.h" #include "chardev.h" @@ -875,7 +876,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - BKEY_PADDED(k) cur, prev; + struct bkey_on_stack cur, prev; struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); unsigned offset_into_extent, sectors; bool have_extent = false; @@ -888,6 +889,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (start + len < start) return -EINVAL; + bkey_on_stack_init(&cur); + bkey_on_stack_init(&prev); bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -902,15 +905,17 @@ retry: continue; } - bkey_reassemble(&cur.k, k); - k = bkey_i_to_s_c(&cur.k); + bkey_on_stack_realloc(&cur, c, k.k->u64s); + bkey_on_stack_realloc(&prev, c, k.k->u64s); + bkey_reassemble(cur.k, k); + k = bkey_i_to_s_c(cur.k); offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; ret = bch2_read_indirect_extent(&trans, - &offset_into_extent, &cur.k); + &offset_into_extent, cur.k); if (ret) break; @@ -920,19 +925,19 @@ retry: bch2_cut_front(POS(k.k->p.inode, bkey_start_offset(k.k) + offset_into_extent), - &cur.k); - bch2_key_resize(&cur.k.k, sectors); - cur.k.k.p = iter->pos; - cur.k.k.p.offset += cur.k.k.size; + cur.k); + bch2_key_resize(&cur.k->k, sectors); + cur.k->k.p = iter->pos; + cur.k->k.p.offset += cur.k->k.size; if (have_extent) { ret = bch2_fill_extent(c, info, - bkey_i_to_s_c(&prev.k), 0); + bkey_i_to_s_c(prev.k), 0); if (ret) break; } - bkey_copy(&prev.k, &cur.k); + bkey_copy(prev.k, cur.k); have_extent = true; if (k.k->type == KEY_TYPE_reflink_v) @@ -945,10 +950,12 @@ retry: goto retry; if (!ret && have_extent) - ret = bch2_fill_extent(c, info, bkey_i_to_s_c(&prev.k), + ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), FIEMAP_EXTENT_LAST); ret = bch2_trans_exit(&trans) ?: ret; + bkey_on_stack_exit(&cur, c); + bkey_on_stack_exit(&prev, c); return ret < 0 ? ret : 0; } diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index f53eee7accc8..4fe61705ae75 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -8,6 +8,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "bkey_on_stack.h" #include "bset.h" #include "btree_update.h" #include "buckets.h" @@ -394,12 +395,14 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end, int bch2_write_index_default(struct bch_write_op *op) { struct bch_fs *c = op->c; + struct bkey_on_stack sk; struct keylist *keys = &op->insert_keys; struct bkey_i *k = bch2_keylist_front(keys); struct btree_trans trans; struct btree_iter *iter; int ret; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -407,13 +410,14 @@ int bch2_write_index_default(struct bch_write_op *op) BTREE_ITER_SLOTS|BTREE_ITER_INTENT); do { - BKEY_PADDED(k) tmp; + k = bch2_keylist_front(keys); - bkey_copy(&tmp.k, bch2_keylist_front(keys)); + bkey_on_stack_realloc(&sk, c, k->k.u64s); + bkey_copy(sk.k, k); bch2_trans_begin_updates(&trans); - ret = bch2_extent_update(&trans, iter, &tmp.k, + ret = bch2_extent_update(&trans, iter, sk.k, &op->res, op_journal_seq(op), op->new_i_size, &op->i_sectors_delta); if (ret == -EINTR) @@ -421,13 +425,14 @@ int bch2_write_index_default(struct bch_write_op *op) if (ret) break; - if (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) < 0) - bch2_cut_front(iter->pos, bch2_keylist_front(keys)); + if (bkey_cmp(iter->pos, k->k.p) < 0) + bch2_cut_front(iter->pos, k); else bch2_keylist_pop_front(keys); } while (!bch2_keylist_empty(keys)); bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); return ret; } @@ -1463,13 +1468,14 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio { struct btree_trans trans; struct btree_iter *iter; - BKEY_PADDED(k) tmp; + struct bkey_on_stack sk; struct bkey_s_c k; int ret; flags &= ~BCH_READ_LAST_FRAGMENT; flags |= BCH_READ_MUST_CLONE; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -1481,11 +1487,12 @@ retry: if (bkey_err(k)) goto err; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + k = bkey_i_to_s_c(sk.k); bch2_trans_unlock(&trans); - if (!bch2_bkey_matches_ptr(c, bkey_i_to_s_c(&tmp.k), + if (!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, rbio->pos.offset - rbio->pick.crc.offset)) { @@ -1502,6 +1509,7 @@ retry: out: bch2_rbio_done(rbio); bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); return; err: rbio->bio.bi_status = BLK_STS_IOERR; @@ -1514,12 +1522,14 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, { struct btree_trans trans; struct btree_iter *iter; + struct bkey_on_stack sk; struct bkey_s_c k; int ret; flags &= ~BCH_READ_LAST_FRAGMENT; flags |= BCH_READ_MUST_CLONE; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -1527,18 +1537,18 @@ retry: for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode, bvec_iter.bi_sector), BTREE_ITER_SLOTS, k, ret) { - BKEY_PADDED(k) tmp; unsigned bytes, sectors, offset_into_extent; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + k = bkey_i_to_s_c(sk.k); offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; ret = bch2_read_indirect_extent(&trans, - &offset_into_extent, &tmp.k); + &offset_into_extent, sk.k); if (ret) break; @@ -1577,6 +1587,7 @@ err: rbio->bio.bi_status = BLK_STS_IOERR; out: bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); bch2_rbio_done(rbio); } @@ -1633,7 +1644,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - BKEY_PADDED(k) new; + struct bkey_on_stack new; struct bch_extent_crc_unpacked new_crc; u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset; int ret; @@ -1641,6 +1652,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) if (rbio->pick.crc.compression_type) return; + bkey_on_stack_init(&new); bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -1651,8 +1663,9 @@ retry: if (IS_ERR_OR_NULL(k.k)) goto out; - bkey_reassemble(&new.k, k); - k = bkey_i_to_s_c(&new.k); + bkey_on_stack_realloc(&new, c, k.k->u64s); + bkey_reassemble(new.k, k); + k = bkey_i_to_s_c(new.k); if (bversion_cmp(k.k->version, rbio->version) || !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) @@ -1671,10 +1684,10 @@ retry: goto out; } - if (!bch2_bkey_narrow_crcs(&new.k, new_crc)) + if (!bch2_bkey_narrow_crcs(new.k, new_crc)) goto out; - bch2_trans_update(&trans, iter, &new.k); + bch2_trans_update(&trans, iter, new.k); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| @@ -1683,6 +1696,7 @@ retry: goto retry; out: bch2_trans_exit(&trans); + bkey_on_stack_exit(&new, c); } /* Inner part that may run in process context */ @@ -2114,6 +2128,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) { struct btree_trans trans; struct btree_iter *iter; + struct bkey_on_stack sk; struct bkey_s_c k; unsigned flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE| @@ -2127,6 +2142,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) rbio->c = c; rbio->start_time = local_clock(); + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -2135,7 +2151,6 @@ retry: POS(inode, rbio->bio.bi_iter.bi_sector), BTREE_ITER_SLOTS); while (1) { - BKEY_PADDED(k) tmp; unsigned bytes, sectors, offset_into_extent; bch2_btree_iter_set_pos(iter, @@ -2146,15 +2161,16 @@ retry: if (ret) goto err; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); - offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + k = bkey_i_to_s_c(sk.k); + ret = bch2_read_indirect_extent(&trans, - &offset_into_extent, &tmp.k); + &offset_into_extent, sk.k); if (ret) goto err; @@ -2186,6 +2202,7 @@ retry: } out: bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); return; err: if (ret == -EINTR) diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index de8522f754e2..4dacbd637d02 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -4,6 +4,7 @@ */ #include "bcachefs.h" +#include "bkey_on_stack.h" #include "btree_update.h" #include "btree_update_interior.h" #include "buckets.h" @@ -40,9 +41,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - BKEY_PADDED(key) tmp; + struct bkey_on_stack sk; int ret = 0; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, @@ -58,9 +60,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags continue; } - bkey_reassemble(&tmp.key, k); + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); - ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.key), + ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k), dev_idx, flags, false); if (ret) break; @@ -70,11 +73,11 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags * will do the appropriate thing with it (turning it into a * KEY_TYPE_error key, or just a discard if it was a cached extent) */ - bch2_extent_normalize(c, bkey_i_to_s(&tmp.key)); + bch2_extent_normalize(c, bkey_i_to_s(sk.k)); - bch2_btree_iter_set_pos(iter, bkey_start_pos(&tmp.key.k)); + bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k)); - bch2_trans_update(&trans, iter, &tmp.key); + bch2_trans_update(&trans, iter, sk.k); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| @@ -92,6 +95,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags } ret = bch2_trans_exit(&trans) ?: ret; + bkey_on_stack_exit(&sk, c); BUG_ON(ret == -EINTR); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index c5d3375882d7..dbe35d16e7dd 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "bkey_on_stack.h" #include "btree_gc.h" #include "btree_update.h" #include "btree_update_interior.h" @@ -489,7 +490,7 @@ static int __bch2_move_data(struct bch_fs *c, { bool kthread = (current->flags & PF_KTHREAD) != 0; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); - BKEY_PADDED(k) tmp; + struct bkey_on_stack sk; struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; @@ -498,6 +499,7 @@ static int __bch2_move_data(struct bch_fs *c, u64 delay, cur_inum = U64_MAX; int ret = 0, ret2; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, 0, 0); stats->data_type = BCH_DATA_USER; @@ -577,8 +579,9 @@ peek: } /* unlock before doing IO: */ - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + k = bkey_i_to_s_c(sk.k); bch2_trans_unlock(&trans); ret2 = bch2_move_extent(c, ctxt, wp, io_opts, btree_id, k, @@ -605,6 +608,7 @@ next_nondata: } out: ret = bch2_trans_exit(&trans) ?: ret; + bkey_on_stack_exit(&sk, c); return ret; } diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 6e71c5e8f9a2..6d21086c3254 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "bkey_on_stack.h" #include "btree_update.h" #include "extents.h" #include "inode.h" @@ -160,7 +161,8 @@ s64 bch2_remap_range(struct bch_fs *c, struct btree_trans trans; struct btree_iter *dst_iter, *src_iter; struct bkey_s_c src_k; - BKEY_PADDED(k) new_dst, new_src; + BKEY_PADDED(k) new_dst; + struct bkey_on_stack new_src; struct bpos dst_end = dst_start, src_end = src_start; struct bpos dst_want, src_want; u64 src_done, dst_done; @@ -183,6 +185,7 @@ s64 bch2_remap_range(struct bch_fs *c, dst_end.offset += remap_sectors; src_end.offset += remap_sectors; + bkey_on_stack_init(&new_src); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096); src_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start, @@ -222,14 +225,15 @@ s64 bch2_remap_range(struct bch_fs *c, break; if (src_k.k->type == KEY_TYPE_extent) { - bkey_reassemble(&new_src.k, src_k); - src_k = bkey_i_to_s_c(&new_src.k); + bkey_on_stack_realloc(&new_src, c, src_k.k->u64s); + bkey_reassemble(new_src.k, src_k); + src_k = bkey_i_to_s_c(new_src.k); - bch2_cut_front(src_iter->pos, &new_src.k); - bch2_cut_back(src_end, &new_src.k.k); + bch2_cut_front(src_iter->pos, new_src.k); + bch2_cut_back(src_end, &new_src.k->k); ret = bch2_make_extent_indirect(&trans, src_iter, - bkey_i_to_extent(&new_src.k)); + bkey_i_to_extent(new_src.k)); if (ret) goto btree_err; @@ -299,6 +303,7 @@ err: } while (ret2 == -EINTR); ret = bch2_trans_exit(&trans) ?: ret; + bkey_on_stack_exit(&new_src, c); percpu_ref_put(&c->writes); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 14e2f6828cc6..8c7b56a95f4b 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -475,6 +475,7 @@ static void bch2_fs_free(struct bch_fs *c) free_percpu(c->usage[0]); kfree(c->usage_base); free_percpu(c->pcpu); + mempool_exit(&c->large_bkey_pool); mempool_exit(&c->btree_bounce_pool); bioset_exit(&c->btree_bio); mempool_exit(&c->btree_interior_update_pool); @@ -729,6 +730,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) !(c->online_reserved = alloc_percpu(u64)) || mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, btree_bytes(c)) || + mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || bch2_io_clock_init(&c->io_clock[READ]) || bch2_io_clock_init(&c->io_clock[WRITE]) || bch2_fs_journal_init(&c->journal) || -- cgit v1.2.3 From 184b1dc1a6bf4bc53a1c71bf14120498aad67ff5 Mon Sep 17 00:00:00 2001 From: Justin Husted Date: Mon, 11 Nov 2019 20:14:30 -0800 Subject: bcachefs: Update directory timestamps during link Timestamp updates on the directory during a link operation were cached. This is inconsistent with other metadata operations such as rename, as well as being less efficient. Signed-off-by: Justin Husted Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-common.c | 12 ++++++------ fs/bcachefs/fs-common.h | 1 + fs/bcachefs/fs.c | 12 +++++++++--- fs/bcachefs/fsck.c | 4 ++-- 4 files changed, 18 insertions(+), 11 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index a4497eeb1f1b..96f7bbe0a3ed 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -76,11 +76,10 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, } int bch2_link_trans(struct btree_trans *trans, u64 dir_inum, - u64 inum, struct bch_inode_unpacked *inode_u, - const struct qstr *name) + u64 inum, struct bch_inode_unpacked *dir_u, + struct bch_inode_unpacked *inode_u, const struct qstr *name) { struct btree_iter *dir_iter, *inode_iter; - struct bch_inode_unpacked dir_u; struct bch_hash_info dir_hash; u64 now = bch2_current_time(trans->c); @@ -91,18 +90,19 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum, inode_u->bi_ctime = now; bch2_inode_nlink_inc(inode_u); - dir_iter = bch2_inode_peek(trans, &dir_u, dir_inum, 0); + dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, 0); if (IS_ERR(dir_iter)) return PTR_ERR(dir_iter); - /* XXX: shouldn't we be updating mtime/ctime on the directory? */ + dir_u->bi_mtime = dir_u->bi_ctime = now; - dir_hash = bch2_hash_info_init(trans->c, &dir_u); + dir_hash = bch2_hash_info_init(trans->c, dir_u); bch2_trans_iter_put(trans, dir_iter); return bch2_dirent_create(trans, dir_inum, &dir_hash, mode_to_type(inode_u->bi_mode), name, inum, BCH_HASH_SET_MUST_CREATE) ?: + bch2_inode_write(trans, dir_iter, dir_u) ?: bch2_inode_write(trans, inode_iter, inode_u); } diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h index c1621485a526..2273b7961c9b 100644 --- a/fs/bcachefs/fs-common.h +++ b/fs/bcachefs/fs-common.h @@ -14,6 +14,7 @@ int bch2_create_trans(struct btree_trans *, u64, int bch2_link_trans(struct btree_trans *, u64, u64, struct bch_inode_unpacked *, + struct bch_inode_unpacked *, const struct qstr *); int bch2_unlink_trans(struct btree_trans *, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index e8cdae3c114b..c20eaa7418c2 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -395,7 +395,7 @@ static int __bch2_link(struct bch_fs *c, struct dentry *dentry) { struct btree_trans trans; - struct bch_inode_unpacked inode_u; + struct bch_inode_unpacked dir_u, inode_u; int ret; mutex_lock(&inode->ei_update_lock); @@ -405,7 +405,7 @@ static int __bch2_link(struct bch_fs *c, bch2_trans_begin(&trans); ret = bch2_link_trans(&trans, dir->v.i_ino, - inode->v.i_ino, &inode_u, + inode->v.i_ino, &dir_u, &inode_u, &dentry->d_name) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, @@ -413,8 +413,14 @@ static int __bch2_link(struct bch_fs *c, BTREE_INSERT_NOUNLOCK); } while (ret == -EINTR); - if (likely(!ret)) + if (likely(!ret)) { + BUG_ON(inode_u.bi_inum != inode->v.i_ino); + + journal_seq_copy(inode, dir->ei_journal_seq); + bch2_inode_update_after_write(c, dir, &dir_u, + ATTR_MTIME|ATTR_CTIME); bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME); + } bch2_trans_exit(&trans); mutex_unlock(&inode->ei_update_lock); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 0f2308e53d65..3ae545b31c7a 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -80,7 +80,7 @@ static int reattach_inode(struct bch_fs *c, struct bch_inode_unpacked *lostfound_inode, u64 inum) { - struct bch_inode_unpacked inode_u; + struct bch_inode_unpacked dir_u, inode_u; char name_buf[20]; struct qstr name; int ret; @@ -92,7 +92,7 @@ static int reattach_inode(struct bch_fs *c, BTREE_INSERT_ATOMIC| BTREE_INSERT_LAZY_RW, bch2_link_trans(&trans, lostfound_inode->bi_inum, - inum, &inode_u, &name)); + inum, &dir_u, &inode_u, &name)); if (ret) bch_err(c, "error %i reattaching inode %llu", ret, inum); -- cgit v1.2.3 From 58e2388f9e11eb2dfb12d7d11a9a3559cd0e8945 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 22 Dec 2019 23:39:28 -0500 Subject: bcachefs: Kill BTREE_INSERT_ATOMIC Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 1 - fs/bcachefs/alloc_background.c | 2 -- fs/bcachefs/btree_update.h | 10 +--------- fs/bcachefs/btree_update_leaf.c | 19 ++----------------- fs/bcachefs/ec.c | 3 --- fs/bcachefs/fs-io.c | 1 - fs/bcachefs/fs.c | 6 ------ fs/bcachefs/fsck.c | 8 -------- fs/bcachefs/inode.c | 1 - fs/bcachefs/io.c | 2 -- fs/bcachefs/migrate.c | 1 - fs/bcachefs/move.c | 1 - fs/bcachefs/recovery.c | 4 +--- fs/bcachefs/reflink.c | 3 +-- fs/bcachefs/xattr.c | 3 +-- 15 files changed, 6 insertions(+), 59 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 4e631e04cf0c..5a8d8311c08d 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -326,7 +326,6 @@ retry: ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); btree_err: if (ret == -EINTR) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 7b9079a740ef..bd3e46d066bd 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -316,7 +316,6 @@ retry: bch2_trans_update(trans, iter, &a->k_i); ret = bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_NOMARK| flags); @@ -913,7 +912,6 @@ retry: */ ret = bch2_trans_commit(trans, NULL, invalidating_cached_data ? journal_seq : NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index d72da179f866..aa87477b51e1 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -16,7 +16,6 @@ void bch2_btree_journal_key(struct btree_trans *, struct btree_iter *, struct bkey_i *); enum { - __BTREE_INSERT_ATOMIC, __BTREE_INSERT_NOUNLOCK, __BTREE_INSERT_NOFAIL, __BTREE_INSERT_NOCHECK_RW, @@ -35,12 +34,6 @@ enum { __BCH_HASH_SET_MUST_REPLACE, }; -/* - * Don't drop/retake locks before doing btree update, instead return -EINTR if - * we had to drop locks for any reason - */ -#define BTREE_INSERT_ATOMIC (1 << __BTREE_INSERT_ATOMIC) - /* * Don't drop locks _after_ successfully updating btree: */ @@ -101,8 +94,7 @@ int __bch2_trans_commit(struct btree_trans *); * This is main entry point for btree updates. * * Return values: - * -EINTR: locking changed, this function should be called again. Only returned - * if passed BTREE_INSERT_ATOMIC. + * -EINTR: locking changed, this function should be called again. * -EROFS: filesystem read only * -EIO: journal or btree node IO error */ diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 1112bdb689dc..94c1e1e2118a 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -298,8 +298,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) && bkey_cmp(i->k->k.p, i->iter->l[0].b->key.k.p) > 0); - EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) && - !(trans->flags & BTREE_INSERT_ATOMIC)); BUG_ON(debug_check_bkeys(c) && !bkey_deleted(&i->k->k) && @@ -641,8 +639,8 @@ int bch2_trans_commit_error(struct btree_trans *trans, /* * if the split succeeded without dropping locks the insert will - * still be atomic (in the BTREE_INSERT_ATOMIC sense, what the - * caller peeked() and is overwriting won't have changed) + * still be atomic (what the caller peeked() and is overwriting + * won't have changed) */ #if 0 /* @@ -713,13 +711,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, return ret2; } - /* - * BTREE_ITER_ATOMIC means we have to return -EINTR if we - * dropped locks: - */ - if (!(flags & BTREE_INSERT_ATOMIC)) - return 0; - trace_trans_restart_atomic(trans->ip); } @@ -756,9 +747,6 @@ int __bch2_trans_commit(struct btree_trans *trans) if (!trans->nr_updates) goto out_noupdates; - /* for the sake of sanity: */ - EBUG_ON(trans->nr_updates > 1 && !(trans->flags & BTREE_INSERT_ATOMIC)); - if (trans->flags & BTREE_INSERT_GC_LOCK_HELD) lockdep_assert_held(&trans->c->gc_lock); @@ -795,8 +783,6 @@ out: if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW))) percpu_ref_put(&trans->c->writes); out_noupdates: - EBUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR); - trans_for_each_iter_all(trans, iter) iter->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT; @@ -897,7 +883,6 @@ retry: bch2_trans_update(trans, iter, &delete); ret = bch2_trans_commit(trans, NULL, journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL); if (ret) break; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index ae07af49af02..a6bc9355c750 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -739,7 +739,6 @@ found_slot: bch2_trans_update(&trans, iter, &stripe->k_i); ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL); err: if (ret == -EINTR) @@ -822,7 +821,6 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, bch2_trans_update(&trans, iter, sk.k); ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE); if (ret == -EINTR) @@ -1235,7 +1233,6 @@ static int __bch2_stripe_write_key(struct btree_trans *trans, bch2_trans_update(trans, iter, &new_key->k_i); return bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL|flags); } diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index f766bbc35cee..15b0d20b2f81 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -2558,7 +2558,6 @@ reassemble: ret = bch2_trans_commit(&trans, &disk_res, &inode->ei_journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| commit_flags); bch2_disk_reservation_put(c, &disk_res); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index c20eaa7418c2..3cada7cc354a 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -143,7 +143,6 @@ retry: bch2_inode_write(&trans, iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); if (ret == -EINTR) @@ -279,7 +278,6 @@ retry: goto err_before_quota; ret = bch2_trans_commit(&trans, NULL, &journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); if (unlikely(ret)) { bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, @@ -409,7 +407,6 @@ static int __bch2_link(struct bch_fs *c, &dentry->d_name) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); } while (ret == -EINTR); @@ -466,7 +463,6 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) &inode_u, &dentry->d_name) ?: bch2_trans_commit(&trans, NULL, &dir->ei_journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); } while (ret == -EINTR); @@ -598,7 +594,6 @@ retry: mode) ?: bch2_trans_commit(&trans, NULL, &journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK); if (ret == -EINTR) goto retry; @@ -733,7 +728,6 @@ retry: ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); btree_err: diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index a0fdd2ba92f6..cd230dc10984 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -79,7 +79,6 @@ static int remove_dirent(struct btree_trans *trans, struct bkey_s_c_dirent dirent) { return __bch2_trans_do(trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, TRANS_RESET_MEM, @@ -99,7 +98,6 @@ static int reattach_inode(struct bch_fs *c, name = (struct qstr) QSTR(name_buf); ret = bch2_trans_do(c, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_LAZY_RW, bch2_link_trans(&trans, lostfound_inode->bi_inum, inum, &dir_u, &inode_u, &name)); @@ -199,7 +197,6 @@ static int hash_redo_key(const struct bch_hash_desc desc, return bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode, tmp, BCH_HASH_SET_MUST_CREATE) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); } @@ -213,7 +210,6 @@ static int fsck_hash_delete_at(struct btree_trans *trans, retry: ret = bch2_hash_delete_at(trans, desc, info, iter) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); if (ret == -EINTR) { @@ -389,7 +385,6 @@ static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h, if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)", buf, strlen(buf), d->v.d_name, len)) { ret = __bch2_trans_do(trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, TRANS_RESET_MEM, @@ -663,7 +658,6 @@ retry: n->v.d_type = mode_to_type(target.bi_mode); ret = __bch2_trans_do(&trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, TRANS_RESET_MEM, @@ -808,7 +802,6 @@ create_lostfound: bch2_inode_init_early(c, lostfound_inode); ret = bch2_trans_do(c, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, bch2_create_trans(&trans, @@ -1280,7 +1273,6 @@ static int check_inode(struct btree_trans *trans, bch2_inode_pack(&p, &u); ret = __bch2_trans_do(trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, TRANS_RESET_MEM, diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index e2407dcbcb35..77ac9ab7fc57 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -496,7 +496,6 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) bch2_trans_update(&trans, iter, &delete.k_i); ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL); } while (ret == -EINTR); diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 6e0444f3c4f9..4b54506b517c 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -313,7 +313,6 @@ int bch2_extent_update(struct btree_trans *trans, ret = bch2_trans_commit(trans, disk_res, journal_seq, BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| - BTREE_INSERT_ATOMIC| BTREE_INSERT_USE_RESERVE); if (!ret && i_sectors_delta) *i_sectors_delta += delta; @@ -1740,7 +1739,6 @@ retry: bch2_trans_update(&trans, iter, new.k); ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_NOWAIT); if (ret == -EINTR) diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 4b59dcd04cce..db86420bd647 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -79,7 +79,6 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags bch2_trans_update(&trans, iter, sk.k); ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL); /* diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 0aebae33d299..261e465341cd 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -153,7 +153,6 @@ static int bch2_migrate_index_update(struct bch_write_op *op) ret = bch2_trans_commit(&trans, &op->res, op_journal_seq(op), - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE| m->data_opts.btree_insert_flags); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index a3ee2f474952..44a1dcdb135d 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -309,14 +309,12 @@ retry: 0, -((s64) k->k.size), BCH_BUCKET_MARK_OVERWRITE) ?: bch2_trans_commit(&trans, &disk_res, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOMARK_OVERWRITES| BTREE_INSERT_NO_CLEAR_REPLICAS); } else { ret = bch2_trans_commit(&trans, &disk_res, NULL, - BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW| BTREE_INSERT_JOURNAL_REPLAY| @@ -1009,7 +1007,7 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_inode_init_early(c, &lostfound_inode); err = "error creating lost+found"; - ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_ATOMIC, + ret = bch2_trans_do(c, NULL, NULL, 0, bch2_create_trans(&trans, BCACHEFS_ROOT_INO, &root_inode, &lostfound_inode, &lostfound, diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index a65ada691ba1..5cad39fe031f 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -288,8 +288,7 @@ err: inode_u.bi_size < new_i_size) { inode_u.bi_size = new_i_size; ret2 = bch2_inode_write(&trans, inode_iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, journal_seq, - BTREE_INSERT_ATOMIC); + bch2_trans_commit(&trans, NULL, journal_seq, 0); } } while (ret2 == -EINTR); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 6cef6c14fc89..806a638508a6 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -328,8 +328,7 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler, struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - return bch2_trans_do(c, NULL, &inode->ei_journal_seq, - BTREE_INSERT_ATOMIC, + return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0, bch2_xattr_set(&trans, inode->v.i_ino, &inode->ei_str_hash, name, value, size, -- cgit v1.2.3 From 47c46c953163909944cd8ebf7e12107635fdb604 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 1 Apr 2020 17:28:39 -0400 Subject: bcachefs: Add another mssing bch2_trans_iter_put() call Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3cada7cc354a..4458a98b78ee 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -145,8 +145,6 @@ retry: &inode->ei_journal_seq, BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); - if (ret == -EINTR) - goto retry; /* * the btree node lock protects inode->ei_inode, not ei_update_lock; @@ -155,6 +153,11 @@ retry: if (!ret) bch2_inode_update_after_write(c, inode, &inode_u, fields); + bch2_trans_iter_put(&trans, iter); + + if (ret == -EINTR) + goto retry; + bch2_trans_exit(&trans); return ret < 0 ? ret : 0; } -- cgit v1.2.3 From 22d8a33d30d7a28d0dd972f36cc48b80f585d67b Mon Sep 17 00:00:00 2001 From: Yuxuan Shui Date: Fri, 22 May 2020 15:50:05 +0100 Subject: bcachefs: fix stack corruption When a bkey_on_stack is passed to bch_read_indirect_extent, there is no guarantee that it will be big enough to hold the bkey. And bch_read_indirect_extent is not aware of bkey_on_stack to call realloc on it. This cause a stack corruption. This commit makes bch_read_indirect_extent aware of bkey_on_stack so it can call realloc when appropriate. Tested-by: Yuxuan Shui Signed-off-by: Yuxuan Shui Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 2 +- fs/bcachefs/fs.c | 2 +- fs/bcachefs/io.c | 10 +++++----- fs/bcachefs/io.h | 7 ++++--- 4 files changed, 11 insertions(+), 10 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 9644d4624f80..7ce6d71aca29 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -788,7 +788,7 @@ retry: sectors = k.k->size - offset_into_extent; ret = bch2_read_indirect_extent(trans, - &offset_into_extent, sk.k); + &offset_into_extent, &sk); if (ret) break; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 4458a98b78ee..6aff3203b4e1 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -918,7 +918,7 @@ retry: sectors = k.k->size - offset_into_extent; ret = bch2_read_indirect_extent(&trans, - &offset_into_extent, cur.k); + &offset_into_extent, &cur); if (ret) break; diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 7df2b6c3f168..39a23c6570eb 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -1642,7 +1642,7 @@ retry: sectors = k.k->size - offset_into_extent; ret = bch2_read_indirect_extent(&trans, - &offset_into_extent, sk.k); + &offset_into_extent, &sk); if (ret) break; @@ -1944,14 +1944,14 @@ static void bch2_read_endio(struct bio *bio) int __bch2_read_indirect_extent(struct btree_trans *trans, unsigned *offset_into_extent, - struct bkey_i *orig_k) + struct bkey_on_stack *orig_k) { struct btree_iter *iter; struct bkey_s_c k; u64 reflink_offset; int ret; - reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k)->v.idx) + + reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) + *offset_into_extent; iter = bch2_trans_get_iter(trans, BTREE_ID_REFLINK, @@ -1974,7 +1974,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans, } *offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); - bkey_reassemble(orig_k, k); + bkey_on_stack_reassemble(orig_k, trans->c, k); err: bch2_trans_iter_put(trans, iter); return ret; @@ -2281,7 +2281,7 @@ retry: k = bkey_i_to_s_c(sk.k); ret = bch2_read_indirect_extent(&trans, - &offset_into_extent, sk.k); + &offset_into_extent, &sk); if (ret) goto err; diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h index 0a049cc14e42..f0fe0bf906d3 100644 --- a/fs/bcachefs/io.h +++ b/fs/bcachefs/io.h @@ -3,6 +3,7 @@ #define _BCACHEFS_IO_H #include "checksum.h" +#include "bkey_on_stack.h" #include "io_types.h" #define to_wbio(_bio) \ @@ -114,13 +115,13 @@ struct cache_promote_op; struct extent_ptr_decoded; int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, - struct bkey_i *); + struct bkey_on_stack *); static inline int bch2_read_indirect_extent(struct btree_trans *trans, unsigned *offset_into_extent, - struct bkey_i *k) + struct bkey_on_stack *k) { - return k->k.type == KEY_TYPE_reflink_p + return k->k->k.type == KEY_TYPE_reflink_p ? __bch2_read_indirect_extent(trans, offset_into_extent, k) : 0; } -- cgit v1.2.3 From 1ada160618d66bc57beacb4c35f13e9a4c269afa Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 15 Jun 2020 14:58:47 -0400 Subject: bcachefs: Turn c->state_lock into an rwsem Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 4 ++-- fs/bcachefs/btree_gc.c | 1 + fs/bcachefs/buckets.c | 7 +++--- fs/bcachefs/error.c | 4 ++-- fs/bcachefs/fs.c | 12 +++++----- fs/bcachefs/super.c | 60 +++++++++++++++++++++++++------------------------- fs/bcachefs/sysfs.c | 19 +++++----------- 7 files changed, 50 insertions(+), 57 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index a219969357bc..8d9cc7eb6ad7 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -551,8 +551,8 @@ struct bch_fs { struct super_block *vfs_sb; char name[40]; - /* ro/rw, add/remove devices: */ - struct mutex state_lock; + /* ro/rw, add/remove/resize devices: */ + struct rw_semaphore state_lock; /* Counts outstanding writes, for clean transition to read-only */ struct percpu_ref writes; diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 6589fe0bad6c..22aa845ea630 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -800,6 +800,7 @@ int bch2_gc(struct bch_fs *c, struct journal_keys *journal_keys, unsigned i, iter = 0; int ret; + lockdep_assert_held(&c->state_lock); trace_gc_start(c); down_write(&c->gc_lock); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 08e8b578fff5..5ee978c94568 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1967,6 +1967,8 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) int ret = -ENOMEM; unsigned i; + lockdep_assert_held(&c->state_lock); + memset(&free, 0, sizeof(free)); memset(&free_inc, 0, sizeof(free_inc)); memset(&alloc_heap, 0, sizeof(alloc_heap)); @@ -1993,7 +1995,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) bch2_copygc_stop(ca); if (resize) { - down_write(&c->gc_lock); down_write(&ca->bucket_lock); percpu_down_write(&c->mark_lock); } @@ -2036,10 +2037,8 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) nbuckets = ca->mi.nbuckets; - if (resize) { + if (resize) up_write(&ca->bucket_lock); - up_write(&c->gc_lock); - } if (start_copygc && bch2_copygc_start(c, ca)) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 1662a36244cd..cd46706fb6f5 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -37,7 +37,7 @@ void bch2_io_error_work(struct work_struct *work) struct bch_fs *c = ca->fs; bool dev; - mutex_lock(&c->state_lock); + down_write(&c->state_lock); dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO, BCH_FORCE_IF_DEGRADED); if (dev @@ -47,7 +47,7 @@ void bch2_io_error_work(struct work_struct *work) bch_err(ca, "too many IO errors, setting %s RO", dev ? "device" : "filesystem"); - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); } void bch2_io_error(struct bch_dev *ca) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 6aff3203b4e1..4538551ccca3 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1342,16 +1342,16 @@ static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * cons if (IS_ERR(c)) return c; - mutex_lock(&c->state_lock); + down_write(&c->state_lock); if (!test_bit(BCH_FS_STARTED, &c->flags)) { - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); closure_put(&c->cl); pr_err("err mounting %s: incomplete filesystem", dev_name); return ERR_PTR(-EINVAL); } - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); set_bit(BCH_FS_BDEV_MOUNTED, &c->flags); return c; @@ -1400,7 +1400,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) return ret; if (opts.read_only != c->opts.read_only) { - mutex_lock(&c->state_lock); + down_write(&c->state_lock); if (opts.read_only) { bch2_fs_read_only(c); @@ -1410,7 +1410,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) ret = bch2_fs_read_write(c); if (ret) { bch_err(c, "error going rw: %i", ret); - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); return -EINVAL; } @@ -1419,7 +1419,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) c->opts.read_only = opts.read_only; - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); } if (opts.errors >= 0) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 819d4392d529..3a7c48def9e8 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -340,9 +340,9 @@ static void bch2_fs_read_only_work(struct work_struct *work) struct bch_fs *c = container_of(work, struct bch_fs, read_only_work); - mutex_lock(&c->state_lock); + down_write(&c->state_lock); bch2_fs_read_only(c); - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); } static void bch2_fs_read_only_async(struct bch_fs *c) @@ -534,9 +534,9 @@ void bch2_fs_stop(struct bch_fs *c) cancel_work_sync(&c->journal_seq_blacklist_gc_work); - mutex_lock(&c->state_lock); + down_write(&c->state_lock); bch2_fs_read_only(c); - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); for_each_member_device(ca, c, i) if (ca->kobj.state_in_sysfs && @@ -607,7 +607,7 @@ static const char *bch2_fs_online(struct bch_fs *c) bch2_opts_create_sysfs_files(&c->opts_dir)) return "error creating sysfs objects"; - mutex_lock(&c->state_lock); + down_write(&c->state_lock); err = "error creating sysfs objects"; __for_each_member_device(ca, c, i, NULL) @@ -617,7 +617,7 @@ static const char *bch2_fs_online(struct bch_fs *c) list_add(&c->list, &bch_fs_list); err = NULL; err: - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); return err; } @@ -639,7 +639,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->minor = -1; c->disk_sb.fs_sb = true; - mutex_init(&c->state_lock); + init_rwsem(&c->state_lock); mutex_init(&c->sb_lock); mutex_init(&c->replicas_gc_lock); mutex_init(&c->btree_root_lock); @@ -832,7 +832,7 @@ int bch2_fs_start(struct bch_fs *c) unsigned i; int ret = -EINVAL; - mutex_lock(&c->state_lock); + down_write(&c->state_lock); BUG_ON(test_bit(BCH_FS_STARTED, &c->flags)); @@ -882,7 +882,7 @@ int bch2_fs_start(struct bch_fs *c) print_mount_opts(c); ret = 0; out: - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); return ret; err: switch (ret) { @@ -1376,9 +1376,9 @@ int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, { int ret; - mutex_lock(&c->state_lock); + down_write(&c->state_lock); ret = __bch2_dev_set_state(c, ca, new_state, flags); - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); return ret; } @@ -1391,7 +1391,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) unsigned dev_idx = ca->dev_idx, data; int ret = -EINVAL; - mutex_lock(&c->state_lock); + down_write(&c->state_lock); /* * We consume a reference to ca->ref, regardless of whether we succeed @@ -1481,13 +1481,13 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) bch2_write_super(c); mutex_unlock(&c->sb_lock); - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); return 0; err: if (ca->mi.state == BCH_MEMBER_STATE_RW && !percpu_ref_is_zero(&ca->io_ref)) __bch2_dev_read_write(c, ca); - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); return ret; } @@ -1563,7 +1563,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) dev_usage_clear(ca); - mutex_lock(&c->state_lock); + down_write(&c->state_lock); mutex_lock(&c->sb_lock); err = "insufficient space in new superblock"; @@ -1624,12 +1624,12 @@ have_slot: goto err_late; } - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); return 0; err_unlock: mutex_unlock(&c->sb_lock); - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); err: if (ca) bch2_dev_free(ca); @@ -1652,11 +1652,11 @@ int bch2_dev_online(struct bch_fs *c, const char *path) const char *err; int ret; - mutex_lock(&c->state_lock); + down_write(&c->state_lock); ret = bch2_read_super(path, &opts, &sb); if (ret) { - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); return ret; } @@ -1687,10 +1687,10 @@ int bch2_dev_online(struct bch_fs *c, const char *path) bch2_write_super(c); mutex_unlock(&c->sb_lock); - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); return 0; err: - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); bch2_free_super(&sb); bch_err(c, "error bringing %s online: %s", path, err); return -EINVAL; @@ -1698,23 +1698,23 @@ err: int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) { - mutex_lock(&c->state_lock); + down_write(&c->state_lock); if (!bch2_dev_is_online(ca)) { bch_err(ca, "Already offline"); - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); return 0; } if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) { bch_err(ca, "Cannot offline required disk"); - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); return -EINVAL; } __bch2_dev_offline(c, ca); - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); return 0; } @@ -1723,7 +1723,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) struct bch_member *mi; int ret = 0; - mutex_lock(&c->state_lock); + down_write(&c->state_lock); if (nbuckets < ca->mi.nbuckets) { bch_err(ca, "Cannot shrink yet"); @@ -1754,7 +1754,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) bch2_recalc_capacity(c); err: - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); return ret; } @@ -1834,13 +1834,13 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, goto err; err = "bch2_dev_online() error"; - mutex_lock(&c->state_lock); + down_write(&c->state_lock); for (i = 0; i < nr_devices; i++) if (bch2_dev_attach_bdev(c, &sb[i])) { - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); goto err_print; } - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); err = "insufficient devices"; if (!bch2_fs_may_start(c)) diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 06b59e991312..663b59e78824 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -427,7 +427,7 @@ SHOW(bch2_fs) return 0; } -STORE(__bch2_fs) +STORE(bch2_fs) { struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); @@ -485,8 +485,11 @@ STORE(__bch2_fs) if (attr == &sysfs_trigger_btree_coalesce) bch2_coalesce(c); - if (attr == &sysfs_trigger_gc) + if (attr == &sysfs_trigger_gc) { + down_read(&c->state_lock); bch2_gc(c, NULL, false, false); + up_read(&c->state_lock); + } if (attr == &sysfs_trigger_alloc_write) { bool wrote; @@ -501,6 +504,7 @@ STORE(__bch2_fs) sc.nr_to_scan = strtoul_or_return(buf); c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc); } + #ifdef CONFIG_BCACHEFS_TESTS if (attr == &sysfs_perf_test) { char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp; @@ -522,17 +526,6 @@ STORE(__bch2_fs) #endif return size; } - -STORE(bch2_fs) -{ - struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); - - mutex_lock(&c->state_lock); - size = __bch2_fs_store(kobj, attr, buf, size); - mutex_unlock(&c->state_lock); - - return size; -} SYSFS_OPS(bch2_fs); struct attribute *bch2_fs_files[] = { -- cgit v1.2.3 From ac7eef0318c34c87e7ef9d574175917de1817ae6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 15 Aug 2020 22:41:35 -0400 Subject: bcachefs: Don't report inodes to statfs We don't have a limit on the number of inodes in a filesystem, so this is apparently the right way to report that. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 4538551ccca3..562a7a833436 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1262,8 +1262,8 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = usage.capacity >> shift; buf->f_bfree = (usage.capacity - usage.used) >> shift; buf->f_bavail = buf->f_bfree; - buf->f_files = usage.nr_inodes; - buf->f_ffree = U64_MAX; + buf->f_files = 0; + buf->f_ffree = 0; fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^ le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64)); -- cgit v1.2.3 From 625104ea21386361b60d20ae696b9df6111236f5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 6 Sep 2020 22:58:28 -0400 Subject: bcachefs: Don't fail mount if device has been removed Also - make sure to show the devices we actually have open in /proc Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 20 +++++++++++++++++++- fs/bcachefs/super.c | 16 ++++++++++++++-- 2 files changed, 33 insertions(+), 3 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 562a7a833436..a4a3085e5185 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1428,6 +1428,24 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) return ret; } +static int bch2_show_devname(struct seq_file *seq, struct dentry *root) +{ + struct bch_fs *c = root->d_sb->s_fs_info; + struct bch_dev *ca; + unsigned i; + bool first = true; + + for_each_online_member(ca, c, i) { + if (!first) + seq_putc(seq, ':'); + first = false; + seq_puts(seq, "/dev/"); + seq_puts(seq, ca->name); + } + + return 0; +} + static int bch2_show_options(struct seq_file *seq, struct dentry *root) { struct bch_fs *c = root->d_sb->s_fs_info; @@ -1451,7 +1469,6 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root) } return 0; - } static const struct super_operations bch_super_operations = { @@ -1461,6 +1478,7 @@ static const struct super_operations bch_super_operations = { .evict_inode = bch2_evict_inode, .sync_fs = bch2_sync_fs, .statfs = bch2_statfs, + .show_devname = bch2_show_devname, .show_options = bch2_show_options, .remount_fs = bch2_remount, #if 0 diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 7377f44f15df..cb2b719165ce 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1790,7 +1790,6 @@ err: /* return with ref on ca->ref: */ struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path) { - struct bch_dev *ca; dev_t dev; unsigned i; @@ -1816,6 +1815,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, { struct bch_sb_handle *sb = NULL; struct bch_fs *c = NULL; + struct bch_sb_field_members *mi; unsigned i, best_sb = 0; const char *err; int ret = -ENOMEM; @@ -1851,10 +1851,22 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, le64_to_cpu(sb[best_sb].sb->seq)) best_sb = i; - for (i = 0; i < nr_devices; i++) { + mi = bch2_sb_get_members(sb[best_sb].sb); + + i = 0; + while (i < nr_devices) { + if (i != best_sb && + !bch2_dev_exists(sb[best_sb].sb, mi, sb[i].sb->dev_idx)) { + pr_info("%pg has been removed, skipping", sb[i].bdev); + bch2_free_super(&sb[i]); + array_remove_item(sb, nr_devices, i); + continue; + } + err = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb); if (err) goto err_print; + i++; } ret = -ENOMEM; -- cgit v1.2.3 From d5e4dcc29cce41b4bb51bf83c54940018d57e598 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 8 Sep 2020 18:30:32 -0400 Subject: bcachefs: Fix unmount path There was a long standing race in the mount/unmount code - the VFS intends for mount/unmount synchronizatino to be handled by the list of superblocks, but we were still holding devices open after tearing down our superblock in the unmount path. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 - fs/bcachefs/fs.c | 161 ++++++++++++++++++++++--------------------------- fs/bcachefs/super.c | 42 +++++++++---- fs/bcachefs/super.h | 2 + 4 files changed, 104 insertions(+), 102 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index baa8801c5412..f60d530313dc 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -491,7 +491,6 @@ enum { BCH_FS_ERRORS_FIXED, /* misc: */ - BCH_FS_BDEV_MOUNTED, BCH_FS_FIXED_GENS, BCH_FS_ALLOC_WRITTEN, BCH_FS_REBUILD_REPLICAS, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index a4a3085e5185..3239c4717cc6 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1300,91 +1300,36 @@ static struct bch_fs *bch2_path_to_fs(const char *path) return ERR_PTR(ret); c = bch2_dev_to_fs(dev); - return c ?: ERR_PTR(-ENOENT); -} - -static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * const *devs, - unsigned nr_devs, struct bch_opts opts) -{ - struct bch_fs *c, *c1, *c2; - size_t i; - - if (!nr_devs) - return ERR_PTR(-EINVAL); - - c = bch2_fs_open(devs, nr_devs, opts); - - if (IS_ERR(c) && PTR_ERR(c) == -EBUSY) { - /* - * Already open? - * Look up each block device, make sure they all belong to a - * filesystem and they all belong to the _same_ filesystem - */ - - c1 = bch2_path_to_fs(devs[0]); - if (IS_ERR(c1)) - return c; - - for (i = 1; i < nr_devs; i++) { - c2 = bch2_path_to_fs(devs[i]); - if (!IS_ERR(c2)) - closure_put(&c2->cl); - - if (c1 != c2) { - closure_put(&c1->cl); - return c; - } - } - - c = c1; - } - - if (IS_ERR(c)) - return c; - - down_write(&c->state_lock); - - if (!test_bit(BCH_FS_STARTED, &c->flags)) { - up_write(&c->state_lock); + if (c) closure_put(&c->cl); - pr_err("err mounting %s: incomplete filesystem", dev_name); - return ERR_PTR(-EINVAL); - } - - up_write(&c->state_lock); - - set_bit(BCH_FS_BDEV_MOUNTED, &c->flags); - return c; + return c ?: ERR_PTR(-ENOENT); } -static struct bch_fs *bch2_open_as_blockdevs(const char *_dev_name, - struct bch_opts opts) +static char **split_devs(const char *_dev_name, unsigned *nr) { char *dev_name = NULL, **devs = NULL, *s; - struct bch_fs *c = ERR_PTR(-ENOMEM); size_t i, nr_devs = 0; dev_name = kstrdup(_dev_name, GFP_KERNEL); if (!dev_name) - goto err; + return NULL; for (s = dev_name; s; s = strchr(s + 1, ':')) nr_devs++; - devs = kcalloc(nr_devs, sizeof(const char *), GFP_KERNEL); - if (!devs) - goto err; + devs = kcalloc(nr_devs + 1, sizeof(const char *), GFP_KERNEL); + if (!devs) { + kfree(dev_name); + return NULL; + } for (i = 0, s = dev_name; s; (s = strchr(s, ':')) && (*s++ = '\0')) devs[i++] = s; - c = __bch2_open_as_blockdevs(_dev_name, devs, nr_devs, opts); -err: - kfree(devs); - kfree(dev_name); - return c; + *nr = nr_devs; + return devs; } static int bch2_remount(struct super_block *sb, int *flags, char *data) @@ -1471,6 +1416,13 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root) return 0; } +static void bch2_put_super(struct super_block *sb) +{ + struct bch_fs *c = sb->s_fs_info; + + __bch2_fs_stop(c); +} + static const struct super_operations bch_super_operations = { .alloc_inode = bch2_alloc_inode, .destroy_inode = bch2_destroy_inode, @@ -1481,24 +1433,39 @@ static const struct super_operations bch_super_operations = { .show_devname = bch2_show_devname, .show_options = bch2_show_options, .remount_fs = bch2_remount, -#if 0 .put_super = bch2_put_super, +#if 0 .freeze_fs = bch2_freeze, .unfreeze_fs = bch2_unfreeze, #endif }; -static int bch2_test_super(struct super_block *s, void *data) -{ - return s->s_fs_info == data; -} - static int bch2_set_super(struct super_block *s, void *data) { s->s_fs_info = data; return 0; } +static int bch2_noset_super(struct super_block *s, void *data) +{ + return -EBUSY; +} + +static int bch2_test_super(struct super_block *s, void *data) +{ + struct bch_fs *c = s->s_fs_info; + struct bch_fs **devs = data; + unsigned i; + + if (!c) + return false; + + for (i = 0; devs[i]; i++) + if (c != devs[i]) + return false; + return true; +} + static struct dentry *bch2_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -1507,7 +1474,9 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, struct super_block *sb; struct inode *vinode; struct bch_opts opts = bch2_opts_empty(); - unsigned i; + char **devs; + struct bch_fs **devs_to_fs = NULL; + unsigned i, nr_devs; int ret; opt_set(opts, read_only, (flags & SB_RDONLY) != 0); @@ -1516,21 +1485,41 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, if (ret) return ERR_PTR(ret); - c = bch2_open_as_blockdevs(dev_name, opts); - if (IS_ERR(c)) - return ERR_CAST(c); + devs = split_devs(dev_name, &nr_devs); + if (!devs) + return ERR_PTR(-ENOMEM); - sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|SB_NOSEC, c); - if (IS_ERR(sb)) { - closure_put(&c->cl); - return ERR_CAST(sb); + devs_to_fs = kcalloc(nr_devs + 1, sizeof(void *), GFP_KERNEL); + if (!devs_to_fs) { + sb = ERR_PTR(-ENOMEM); + goto got_sb; } - BUG_ON(sb->s_fs_info != c); + for (i = 0; i < nr_devs; i++) + devs_to_fs[i] = bch2_path_to_fs(devs[i]); - if (sb->s_root) { - closure_put(&c->cl); + sb = sget(fs_type, bch2_test_super, bch2_noset_super, + flags|SB_NOSEC, devs_to_fs); + if (!IS_ERR(sb)) + goto got_sb; + + c = bch2_fs_open(devs, nr_devs, opts); + + if (!IS_ERR(c)) + sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c); + else + sb = ERR_CAST(c); +got_sb: + kfree(devs_to_fs); + kfree(devs[0]); + kfree(devs); + + if (IS_ERR(sb)) + return ERR_CAST(sb); + + c = sb->s_fs_info; + if (sb->s_root) { if ((flags ^ sb->s_flags) & SB_RDONLY) { ret = -EBUSY; goto err_put_super; @@ -1603,11 +1592,7 @@ static void bch2_kill_sb(struct super_block *sb) struct bch_fs *c = sb->s_fs_info; generic_shutdown_super(sb); - - if (test_bit(BCH_FS_BDEV_MOUNTED, &c->flags)) - bch2_fs_stop(c); - else - closure_put(&c->cl); + bch2_fs_free(c); } static struct file_system_type bcache_fs_type = { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index cb2b719165ce..d0d46023163c 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -465,7 +465,7 @@ int bch2_fs_read_write_early(struct bch_fs *c) /* Filesystem startup/shutdown: */ -static void bch2_fs_free(struct bch_fs *c) +static void __bch2_fs_free(struct bch_fs *c) { unsigned i; @@ -522,10 +522,10 @@ static void bch2_fs_release(struct kobject *kobj) { struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); - bch2_fs_free(c); + __bch2_fs_free(c); } -void bch2_fs_stop(struct bch_fs *c) +void __bch2_fs_stop(struct bch_fs *c) { struct bch_dev *ca; unsigned i; @@ -555,13 +555,6 @@ void bch2_fs_stop(struct bch_fs *c) kobject_put(&c->opts_dir); kobject_put(&c->internal); - mutex_lock(&bch_fs_list_lock); - list_del(&c->list); - mutex_unlock(&bch_fs_list_lock); - - closure_sync(&c->cl); - closure_debug_destroy(&c->cl); - /* btree prefetch might have kicked off reads in the background: */ bch2_btree_flush_all_reads(c); @@ -571,16 +564,39 @@ void bch2_fs_stop(struct bch_fs *c) cancel_work_sync(&c->btree_write_error_work); cancel_delayed_work_sync(&c->pd_controllers_update); cancel_work_sync(&c->read_only_work); +} - for (i = 0; i < c->sb.nr_devices; i++) - if (c->devs[i]) - bch2_dev_free(rcu_dereference_protected(c->devs[i], 1)); +void bch2_fs_free(struct bch_fs *c) +{ + unsigned i; + + mutex_lock(&bch_fs_list_lock); + list_del(&c->list); + mutex_unlock(&bch_fs_list_lock); + + closure_sync(&c->cl); + closure_debug_destroy(&c->cl); + + for (i = 0; i < c->sb.nr_devices; i++) { + struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true); + + if (ca) { + bch2_free_super(&ca->disk_sb); + bch2_dev_free(ca); + } + } bch_verbose(c, "shutdown complete"); kobject_put(&c->kobj); } +void bch2_fs_stop(struct bch_fs *c) +{ + __bch2_fs_stop(c); + bch2_fs_free(c); +} + static const char *bch2_fs_online(struct bch_fs *c) { struct bch_dev *ca; diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index fab4bee9c90e..795229e2d6a1 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -230,6 +230,8 @@ static inline void bch2_fs_lazy_rw(struct bch_fs *c) bch2_fs_read_write_early(c); } +void __bch2_fs_stop(struct bch_fs *); +void bch2_fs_free(struct bch_fs *); void bch2_fs_stop(struct bch_fs *); int bch2_fs_start(struct bch_fs *); -- cgit v1.2.3 From 61ce38b862c17acccd0df0004d69710d8b438e99 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 6 Oct 2020 22:18:21 -0400 Subject: bcachefs: Fix journal_seq_copy() We also need to update the journal's bloom filter of inode numbers that each journal write has upudates for - in case the inode gets evicted before it gets fsynced. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 25 ++++++++++++++----------- fs/bcachefs/journal.c | 15 +++++++++++++++ fs/bcachefs/journal.h | 1 + 3 files changed, 30 insertions(+), 11 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3239c4717cc6..a488dcebc11a 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -40,7 +40,8 @@ static void bch2_vfs_inode_init(struct bch_fs *, struct bch_inode_info *, struct bch_inode_unpacked *); -static void journal_seq_copy(struct bch_inode_info *dst, +static void journal_seq_copy(struct bch_fs *c, + struct bch_inode_info *dst, u64 journal_seq) { u64 old, v = READ_ONCE(dst->ei_journal_seq); @@ -51,6 +52,8 @@ static void journal_seq_copy(struct bch_inode_info *dst, if (old >= journal_seq) break; } while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old); + + bch2_journal_set_has_inum(&c->journal, dst->v.i_ino, journal_seq); } static void __pagecache_lock_put(struct pagecache_lock *lock, long i) @@ -294,12 +297,12 @@ err_before_quota: if (!tmpfile) { bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(dir, journal_seq); + journal_seq_copy(c, dir, journal_seq); mutex_unlock(&dir->ei_update_lock); } bch2_vfs_inode_init(c, inode, &inode_u); - journal_seq_copy(inode, journal_seq); + journal_seq_copy(c, inode, journal_seq); set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl); @@ -320,7 +323,7 @@ err_before_quota: * We raced, another process pulled the new inode into cache * before us: */ - journal_seq_copy(old, journal_seq); + journal_seq_copy(c, old, journal_seq); make_bad_inode(&inode->v); iput(&inode->v); @@ -416,7 +419,7 @@ static int __bch2_link(struct bch_fs *c, if (likely(!ret)) { BUG_ON(inode_u.bi_inum != inode->v.i_ino); - journal_seq_copy(inode, dir->ei_journal_seq); + journal_seq_copy(c, inode, dir->ei_journal_seq); bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME); @@ -473,7 +476,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) if (likely(!ret)) { BUG_ON(inode_u.bi_inum != inode->v.i_ino); - journal_seq_copy(inode, dir->ei_journal_seq); + journal_seq_copy(c, inode, dir->ei_journal_seq); bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); bch2_inode_update_after_write(c, inode, &inode_u, @@ -509,7 +512,7 @@ static int bch2_symlink(struct mnt_idmap *idmap, if (unlikely(ret)) goto err; - journal_seq_copy(dir, inode->ei_journal_seq); + journal_seq_copy(c, dir, inode->ei_journal_seq); ret = __bch2_link(c, inode, dir, dentry); if (unlikely(ret)) @@ -609,22 +612,22 @@ retry: bch2_inode_update_after_write(c, src_dir, &src_dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(src_dir, journal_seq); + journal_seq_copy(c, src_dir, journal_seq); if (src_dir != dst_dir) { bch2_inode_update_after_write(c, dst_dir, &dst_dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(dst_dir, journal_seq); + journal_seq_copy(c, dst_dir, journal_seq); } bch2_inode_update_after_write(c, src_inode, &src_inode_u, ATTR_CTIME); - journal_seq_copy(src_inode, journal_seq); + journal_seq_copy(c, src_inode, journal_seq); if (dst_inode) { bch2_inode_update_after_write(c, dst_inode, &dst_inode_u, ATTR_CTIME); - journal_seq_copy(dst_inode, journal_seq); + journal_seq_copy(c, dst_inode, journal_seq); } err: bch2_trans_exit(&trans); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 8b0746e092de..d1e4a8162ddd 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -17,6 +17,8 @@ #include "super-io.h" #include "trace.h" +static inline struct journal_buf *journal_seq_to_buf(struct journal *, u64); + static bool __journal_entry_is_open(union journal_res_state state) { return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL; @@ -304,6 +306,19 @@ u64 bch2_inode_journal_seq(struct journal *j, u64 inode) return seq; } +void bch2_journal_set_has_inum(struct journal *j, u64 inode, u64 seq) +{ + size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8)); + struct journal_buf *buf; + + spin_lock(&j->lock); + + if ((buf = journal_seq_to_buf(j, seq))) + set_bit(h, buf->has_inode); + + spin_unlock(&j->lock); +} + static int __journal_res_get(struct journal *j, struct journal_res *res, unsigned flags) { diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 26654b9cf0ea..b8e74c483a23 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -147,6 +147,7 @@ static inline u64 journal_cur_seq(struct journal *j) } u64 bch2_inode_journal_seq(struct journal *, u64); +void bch2_journal_set_has_inum(struct journal *, u64, u64); static inline int journal_state_count(union journal_res_state s, int idx) { -- cgit v1.2.3 From a10e677a1555e070f1a7b3c1dc3e3189d462ab9e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 23 Oct 2020 21:07:17 -0400 Subject: bcachefs: Fix for passing target= opts as mount opts Some options can't be parsed until the filesystem initialized; previously, passing these options to mount or remount would cause mount to fail. This changes the mount path so that we parse the options passed in twice, and just ignore any options that can't be parsed the first time. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 25 +++++++++++++++++++------ fs/bcachefs/opts.c | 7 ++++--- fs/bcachefs/opts.h | 2 +- 3 files changed, 24 insertions(+), 10 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index a488dcebc11a..b214d58e94e9 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1343,7 +1343,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) opt_set(opts, read_only, (*flags & SB_RDONLY) != 0); - ret = bch2_parse_mount_opts(&opts, data); + ret = bch2_parse_mount_opts(c, &opts, data); if (ret) return ret; @@ -1484,7 +1484,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, opt_set(opts, read_only, (flags & SB_RDONLY) != 0); - ret = bch2_parse_mount_opts(&opts, data); + ret = bch2_parse_mount_opts(NULL, &opts, data); if (ret) return ERR_PTR(ret); @@ -1507,11 +1507,24 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, goto got_sb; c = bch2_fs_open(devs, nr_devs, opts); - - if (!IS_ERR(c)) - sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c); - else + if (IS_ERR(c)) { sb = ERR_CAST(c); + goto got_sb; + } + + /* Some options can't be parsed until after the fs is started: */ + ret = bch2_parse_mount_opts(c, &opts, data); + if (ret) { + bch2_fs_stop(c); + sb = ERR_PTR(ret); + goto got_sb; + } + + bch2_opts_apply(&c->opts, opts); + + sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c); + if (IS_ERR(sb)) + bch2_fs_stop(c); got_sb: kfree(devs_to_fs); kfree(devs[0]); diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index afe25cd26c06..97a36ac0beea 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -247,7 +247,7 @@ int bch2_opt_parse(struct bch_fs *c, const struct bch_option *opt, break; case BCH_OPT_FN: if (!c) - return -EINVAL; + return 0; return opt->parse(c, val, res); } @@ -325,7 +325,8 @@ int bch2_opts_check_may_set(struct bch_fs *c) return 0; } -int bch2_parse_mount_opts(struct bch_opts *opts, char *options) +int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts, + char *options) { char *opt, *name, *val; int ret, id; @@ -340,7 +341,7 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options) if (id < 0) goto bad_opt; - ret = bch2_opt_parse(NULL, &bch2_opt_table[id], val, &v); + ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v); if (ret < 0) goto bad_val; } else { diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 6aaabb24d3ed..1ddb9c57b3a5 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -424,7 +424,7 @@ void bch2_opt_to_text(struct printbuf *, struct bch_fs *, int bch2_opt_check_may_set(struct bch_fs *, int, u64); int bch2_opts_check_may_set(struct bch_fs *); -int bch2_parse_mount_opts(struct bch_opts *, char *); +int bch2_parse_mount_opts(struct bch_fs *, struct bch_opts *, char *); /* inode opts: */ -- cgit v1.2.3 From 13dcd4abcd8d4e177f4f75ea3f5c8838a8a8c3c3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 24 Oct 2020 20:56:47 -0400 Subject: bcachefs: Fix rare use after free in read path If the bkey_on_stack_reassemble() call in __bch2_read_indirect_extent() reallocates the buffer, k in bch2_read - which we pointed at the bkey_on_stack buffer - will now point to a stale buffer. Whoops. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 7 ++++--- fs/bcachefs/fs.c | 11 ++++++----- fs/bcachefs/io.c | 6 ++++-- 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 0290f7410a5c..edc3d73d26ba 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -782,18 +782,19 @@ retry: if (ret) break; - bkey_on_stack_reassemble(&sk, c, k); - k = bkey_i_to_s_c(sk.k); - offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; + bkey_on_stack_reassemble(&sk, c, k); + ret = bch2_read_indirect_extent(trans, &offset_into_extent, &sk); if (ret) break; + k = bkey_i_to_s_c(sk.k); + sectors = min(sectors, k.k->size - offset_into_extent); bch2_trans_unlock(trans); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b214d58e94e9..a61d5f8aecd6 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -911,20 +911,21 @@ retry: continue; } - bkey_on_stack_realloc(&cur, c, k.k->u64s); - bkey_on_stack_realloc(&prev, c, k.k->u64s); - bkey_reassemble(cur.k, k); - k = bkey_i_to_s_c(cur.k); - offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; + bkey_on_stack_realloc(&cur, c, k.k->u64s); + bkey_on_stack_realloc(&prev, c, k.k->u64s); + bkey_reassemble(cur.k, k); + ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &cur); if (ret) break; + k = bkey_i_to_s_c(cur.k); + sectors = min(sectors, k.k->size - offset_into_extent); if (offset_into_extent) diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 5c12bfed3a7b..03f5b9034aa7 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -1667,7 +1667,6 @@ retry: unsigned bytes, sectors, offset_into_extent; bkey_on_stack_reassemble(&sk, c, k); - k = bkey_i_to_s_c(sk.k); offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); @@ -1678,6 +1677,8 @@ retry: if (ret) break; + k = bkey_i_to_s_c(sk.k); + sectors = min(sectors, k.k->size - offset_into_extent); bch2_trans_unlock(&trans); @@ -2311,13 +2312,14 @@ retry: sectors = k.k->size - offset_into_extent; bkey_on_stack_reassemble(&sk, c, k); - k = bkey_i_to_s_c(sk.k); ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &sk); if (ret) goto err; + k = bkey_i_to_s_c(sk.k); + /* * With indirect extents, the amount of data to read is the min * of the original extent and the indirect extent: -- cgit v1.2.3 From e7b854b1f76d34eeea6baa3a1b5eaa1f85ae6340 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 26 Oct 2020 17:03:28 -0400 Subject: bcachefs: fiemap fixes - fiemap didn't know about inline extents, fixed - advancing to the next extent after we'd chased a pointer to the reflink btree was wrong, fixed Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index a61d5f8aecd6..5119266a8493 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -831,7 +831,7 @@ static int bch2_fill_extent(struct bch_fs *c, struct fiemap_extent_info *info, struct bkey_s_c k, unsigned flags) { - if (bkey_extent_is_data(k.k)) { + if (bkey_extent_is_direct_data(k.k)) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -862,6 +862,12 @@ static int bch2_fill_extent(struct bch_fs *c, } return 0; + } else if (bkey_extent_is_inline_data(k.k)) { + return fiemap_fill_next_extent(info, + bkey_start_offset(k.k) << 9, + 0, k.k->size << 9, + flags| + FIEMAP_EXTENT_DATA_INLINE); } else if (k.k->type == KEY_TYPE_reservation) { return fiemap_fill_next_extent(info, bkey_start_offset(k.k) << 9, @@ -928,11 +934,10 @@ retry: sectors = min(sectors, k.k->size - offset_into_extent); - if (offset_into_extent) - bch2_cut_front(POS(k.k->p.inode, - bkey_start_offset(k.k) + - offset_into_extent), - cur.k); + bch2_cut_front(POS(k.k->p.inode, + bkey_start_offset(k.k) + + offset_into_extent), + cur.k); bch2_key_resize(&cur.k->k, sectors); cur.k->k.p = iter->pos; cur.k->k.p.offset += cur.k->k.size; @@ -947,10 +952,8 @@ retry: bkey_copy(prev.k, cur.k); have_extent = true; - if (k.k->type == KEY_TYPE_reflink_v) - bch2_btree_iter_set_pos(iter, k.k->p); - else - bch2_btree_iter_next(iter); + bch2_btree_iter_set_pos(iter, + POS(iter->pos.inode, iter->pos.offset + sectors)); } if (ret == -EINTR) -- cgit v1.2.3 From 527087c741dc1199fbf4a635a80bf4839a9a8288 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 27 Oct 2020 14:10:52 -0400 Subject: bcachefs: Fix stack corruption A bkey_on_stack_realloc() call was in the wrong place, and broken for indirect extents Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 5119266a8493..2ed80ef41d1a 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -921,9 +921,7 @@ retry: bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - bkey_on_stack_realloc(&cur, c, k.k->u64s); - bkey_on_stack_realloc(&prev, c, k.k->u64s); - bkey_reassemble(cur.k, k); + bkey_on_stack_reassemble(&cur, c, k); ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &cur); @@ -931,6 +929,7 @@ retry: break; k = bkey_i_to_s_c(cur.k); + bkey_on_stack_realloc(&prev, c, k.k->u64s); sectors = min(sectors, k.k->size - offset_into_extent); -- cgit v1.2.3 From df082b3a50e02bb8dfc583cea29ab94ab1a04692 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 2 Nov 2020 19:49:23 -0500 Subject: bcachefs: Report inode counts via statfs Took awhile to figure out exactly what statfs wanted... Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 2ed80ef41d1a..917a08ddc148 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1261,6 +1261,11 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) struct bch_fs *c = sb->s_fs_info; struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c); unsigned shift = sb->s_blocksize_bits - 9; + /* + * this assumes inodes take up 64 bytes, which is a decent average + * number: + */ + u64 avail_inodes = ((usage.capacity - usage.used) << 3); u64 fsid; buf->f_type = BCACHEFS_STATFS_MAGIC; @@ -1268,8 +1273,9 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = usage.capacity >> shift; buf->f_bfree = (usage.capacity - usage.used) >> shift; buf->f_bavail = buf->f_bfree; - buf->f_files = 0; - buf->f_ffree = 0; + + buf->f_files = usage.nr_inodes + avail_inodes; + buf->f_ffree = avail_inodes; fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^ le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64)); -- cgit v1.2.3 From b735d73a00d5d9f5652a299146d518b7eea47b7b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 5 Nov 2020 12:16:05 -0500 Subject: bcachefs: Build fixes for 32bit x86 PAGE_SIZE and size_t are not unsigned longs on 32 bit, annoying... also switch to atomic64_cmpxchg instead of cmpxchg() for journal_seq_copy, as atomic64_cmpxchg has a fallback that uses spinlocks for when it's not supported. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 2 +- fs/bcachefs/fs.c | 7 ++++++- fs/bcachefs/io.c | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index e5033b392432..42331f0e54e7 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1586,7 +1586,7 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c) size_t i; spin_lock(&c->ec_stripes_heap_lock); - for (i = 0; i < min(h->used, 20UL); i++) { + for (i = 0; i < min_t(size_t, h->used, 20); i++) { m = genradix_ptr(&c->stripes[0], h->data[i].idx); pr_buf(out, "%zu %u/%u+%u\n", h->data[i].idx, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 917a08ddc148..3e3ab4e53f33 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -44,6 +44,11 @@ static void journal_seq_copy(struct bch_fs *c, struct bch_inode_info *dst, u64 journal_seq) { + /* + * atomic64_cmpxchg has a fallback for archs that don't support it, + * cmpxchg does not: + */ + atomic64_t *dst_seq = (void *) &dst->ei_journal_seq; u64 old, v = READ_ONCE(dst->ei_journal_seq); do { @@ -51,7 +56,7 @@ static void journal_seq_copy(struct bch_fs *c, if (old >= journal_seq) break; - } while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old); + } while ((v = atomic64_cmpxchg(dst_seq, old, journal_seq)) != old); bch2_journal_set_has_inum(&c->journal, dst->v.i_ino, journal_seq); } diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 346d77d68ade..6df99ac013a1 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -180,7 +180,7 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio, while (size) { struct page *page = __bio_alloc_page_pool(c, &using_mempool); - unsigned len = min(PAGE_SIZE, size); + unsigned len = min_t(size_t, PAGE_SIZE, size); BUG_ON(!bio_add_page(bio, page, len, 0)); size -= len; -- cgit v1.2.3 From eb8e6e9ccbb4ba37c04a7cff032975b4df7d63c7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 11 Nov 2020 12:33:12 -0500 Subject: bcachefs: Deadlock prevention for ei_pagecache_lock In the dio write path, when get_user_pages() invokes the fault handler we have a recursive locking situation - we have to handle the lock ordering ourselves or we have a deadlock: this patch addresses that by checking for locking ordering violations and doing the unlock/relock dance if necessary. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++-- fs/bcachefs/fs.c | 5 ++++ fs/bcachefs/fs.h | 1 + 3 files changed, 72 insertions(+), 2 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 658d19c04b99..1afdd775ffb3 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -44,6 +44,22 @@ static inline bool bio_full(struct bio *bio, unsigned len) return false; } +static inline struct address_space *faults_disabled_mapping(void) +{ + return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL); +} + +static inline void set_fdm_dropped_locks(void) +{ + current->faults_disabled_mapping = + (void *) (((unsigned long) current->faults_disabled_mapping)|1); +} + +static inline bool fdm_dropped_locks(void) +{ + return ((unsigned long) current->faults_disabled_mapping) & 1; +} + struct quota_res { u64 sectors; }; @@ -501,10 +517,35 @@ static void bch2_set_page_dirty(struct bch_fs *c, vm_fault_t bch2_page_fault(struct vm_fault *vmf) { struct file *file = vmf->vma->vm_file; + struct address_space *mapping = file->f_mapping; + struct address_space *fdm = faults_disabled_mapping(); struct bch_inode_info *inode = file_bch_inode(file); int ret; + if (fdm == mapping) + return VM_FAULT_SIGBUS; + + /* Lock ordering: */ + if (fdm > mapping) { + struct bch_inode_info *fdm_host = to_bch_ei(fdm->host); + + if (bch2_pagecache_add_tryget(&inode->ei_pagecache_lock)) + goto got_lock; + + bch2_pagecache_block_put(&fdm_host->ei_pagecache_lock); + + bch2_pagecache_add_get(&inode->ei_pagecache_lock); + bch2_pagecache_add_put(&inode->ei_pagecache_lock); + + bch2_pagecache_block_get(&fdm_host->ei_pagecache_lock); + + /* Signal that lock has been dropped: */ + set_fdm_dropped_locks(); + return VM_FAULT_SIGBUS; + } + bch2_pagecache_add_get(&inode->ei_pagecache_lock); +got_lock: ret = filemap_fault(vmf); bch2_pagecache_add_put(&inode->ei_pagecache_lock); @@ -1765,14 +1806,16 @@ static long bch2_dio_write_loop(struct dio_write *dio) struct bio *bio = &dio->op.wbio.bio; struct bvec_iter_all iter; struct bio_vec *bv; - unsigned unaligned; - bool sync = dio->sync; + unsigned unaligned, iter_count; + bool sync = dio->sync, dropped_locks; long ret; if (dio->loop) goto loop; while (1) { + iter_count = dio->iter.count; + if (kthread) kthread_use_mm(dio->mm); BUG_ON(current->faults_disabled_mapping); @@ -1780,13 +1823,34 @@ static long bch2_dio_write_loop(struct dio_write *dio) ret = bio_iov_iter_get_pages(bio, &dio->iter); + dropped_locks = fdm_dropped_locks(); + current->faults_disabled_mapping = NULL; if (kthread) kthread_unuse_mm(dio->mm); + /* + * If the fault handler returned an error but also signalled + * that it dropped & retook ei_pagecache_lock, we just need to + * re-shoot down the page cache and retry: + */ + if (dropped_locks && ret) + ret = 0; + if (unlikely(ret < 0)) goto err; + if (unlikely(dropped_locks)) { + ret = write_invalidate_inode_pages_range(mapping, + req->ki_pos, + req->ki_pos + iter_count - 1); + if (unlikely(ret)) + goto err; + + if (!bio->bi_iter.bi_size) + continue; + } + unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1); bio->bi_iter.bi_size -= unaligned; iov_iter_revert(&dio->iter, unaligned); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3e3ab4e53f33..231a5433577f 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -93,6 +93,11 @@ void bch2_pagecache_add_put(struct pagecache_lock *lock) __pagecache_lock_put(lock, 1); } +bool bch2_pagecache_add_tryget(struct pagecache_lock *lock) +{ + return __pagecache_lock_tryget(lock, 1); +} + void bch2_pagecache_add_get(struct pagecache_lock *lock) { __pagecache_lock_get(lock, 1); diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index b3a2993dd9bc..7c095b856b05 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -26,6 +26,7 @@ static inline void pagecache_lock_init(struct pagecache_lock *lock) } void bch2_pagecache_add_put(struct pagecache_lock *); +bool bch2_pagecache_add_tryget(struct pagecache_lock *); void bch2_pagecache_add_get(struct pagecache_lock *); void bch2_pagecache_block_put(struct pagecache_lock *); void bch2_pagecache_block_get(struct pagecache_lock *); -- cgit v1.2.3 From 0b5c9f59401e4f339c2b716d2f9210114b7885c1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 15 Nov 2020 20:52:55 -0500 Subject: bcachefs: Set preallocated transaction mem to avoid restarts this will reduce transaction restarts, from observation of tracepoints. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 8 ++++++-- fs/bcachefs/btree_update_interior.c | 19 +++++++++++-------- fs/bcachefs/fs.c | 3 ++- 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 007d69656660..6eebbadcef45 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2370,8 +2370,12 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, */ bch2_trans_alloc_iters(trans, c); - if (expected_mem_bytes) - bch2_trans_preload_mem(trans, expected_mem_bytes); + if (expected_mem_bytes) { + expected_mem_bytes = roundup_pow_of_two(expected_mem_bytes); + trans->mem = kmalloc(expected_mem_bytes, GFP_KERNEL); + if (trans->mem) + trans->mem_bytes = expected_mem_bytes; + } trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index c1f822b96c48..0a83d9fdecd1 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -523,6 +523,7 @@ static void btree_update_nodes_written(struct btree_update *as) { struct bch_fs *c = as->c; struct btree *b = as->b; + struct btree_trans trans; u64 journal_seq = 0; unsigned i; int ret; @@ -540,14 +541,16 @@ static void btree_update_nodes_written(struct btree_update *as) * journal reclaim does btree updates when flushing bkey_cached entries, * which may require allocations as well. */ - ret = bch2_trans_do(c, &as->disk_res, &journal_seq, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE| - BTREE_INSERT_USE_ALLOC_RESERVE| - BTREE_INSERT_NOCHECK_RW| - BTREE_INSERT_JOURNAL_RECLAIM| - BTREE_INSERT_JOURNAL_RESERVED, - btree_update_nodes_written_trans(&trans, as)); + bch2_trans_init(&trans, c, 0, 512); + ret = __bch2_trans_do(&trans, &as->disk_res, &journal_seq, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_USE_RESERVE| + BTREE_INSERT_USE_ALLOC_RESERVE| + BTREE_INSERT_NOCHECK_RW| + BTREE_INSERT_JOURNAL_RECLAIM| + BTREE_INSERT_JOURNAL_RESERVED, + btree_update_nodes_written_trans(&trans, as)); + bch2_trans_exit(&trans); BUG_ON(ret && !bch2_journal_error(&c->journal)); if (b) { diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 231a5433577f..480469784152 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -278,7 +278,8 @@ __bch2_create(struct mnt_idmap *idmap, if (!tmpfile) mutex_lock(&dir->ei_update_lock); - bch2_trans_init(&trans, c, 8, 1024); + bch2_trans_init(&trans, c, 8, + 2048 + (!tmpfile ? dentry->d_name.len : 0)); retry: bch2_trans_begin(&trans); -- cgit v1.2.3 From 6584e84a978ed710ee295201647b7f05dbbc56ee Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 20 Nov 2020 21:21:28 -0500 Subject: bcachefs: Don't use bkey cache for inode update in fsck fsck doesn't know about the btree key cache, and non-cached iterators aren't cache coherent (yet?) Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 2 +- fs/bcachefs/fsck.c | 2 +- fs/bcachefs/inode.c | 14 ++++++++++---- fs/bcachefs/inode.h | 2 +- 4 files changed, 13 insertions(+), 7 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 480469784152..a3810493826b 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1262,7 +1262,7 @@ static void bch2_evict_inode(struct inode *vinode) KEY_TYPE_QUOTA_WARN); bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, KEY_TYPE_QUOTA_WARN); - bch2_inode_rm(c, inode->v.i_ino); + bch2_inode_rm(c, inode->v.i_ino, true); } } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 0c5035270846..09ce6c29b88c 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1254,7 +1254,7 @@ static int check_inode(struct btree_trans *trans, bch2_fs_lazy_rw(c); - ret = bch2_inode_rm(c, u.bi_inum); + ret = bch2_inode_rm(c, u.bi_inum, false); if (ret) bch_err(c, "error in fsck: error %i while deleting inode", ret); return ret; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index f00778d78271..b1f420776d9a 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -542,7 +542,7 @@ found_slot: return ret; } -int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) +int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached) { struct btree_trans trans; struct btree_iter *iter; @@ -576,9 +576,15 @@ retry: bi_generation = 0; - iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr), - BTREE_ITER_CACHED|BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_cached(iter); + if (cached) { + iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr), + BTREE_ITER_CACHED|BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_cached(iter); + } else { + iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(iter); + } ret = bkey_err(k); if (ret) diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index ef7e885dce0c..dbdfcf63d079 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -71,7 +71,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *, int bch2_inode_create(struct btree_trans *, struct bch_inode_unpacked *); -int bch2_inode_rm(struct bch_fs *, u64); +int bch2_inode_rm(struct bch_fs *, u64, bool); int bch2_inode_find_by_inum_trans(struct btree_trans *, u64, struct bch_inode_unpacked *); -- cgit v1.2.3 From 33c74e4119a91c3ae87fc207777e34fdbb613c66 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 3 Dec 2020 14:27:20 -0500 Subject: bcachefs: Flag inodes that had btree update errors On write error, the vfs inode's i_size may be inconsistent with the btree inode's i_size - flag this so we don't have spurious assertions. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 13 +++++++++++-- fs/bcachefs/fs.c | 1 + fs/bcachefs/fs.h | 7 +++++++ 3 files changed, 19 insertions(+), 2 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index c10192e2a688..2d31547446ac 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -994,6 +994,8 @@ static void bch2_writepage_io_done(struct closure *cl) unsigned i; if (io->op.error) { + set_bit(EI_INODE_ERROR, &io->inode->ei_flags); + bio_for_each_segment_all(bvec, bio, iter) { struct bch_page_state *s; @@ -1916,7 +1918,13 @@ loop: bio_for_each_segment_all(bv, bio, iter) put_page(bv->bv_page); - if (!dio->iter.count || dio->op.error) + + if (dio->op.error) { + set_bit(EI_INODE_ERROR, &inode->ei_flags); + break; + } + + if (!dio->iter.count) break; bio_reset(bio, NULL, REQ_OP_WRITE); @@ -2306,7 +2314,8 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) if (ret) goto err; - BUG_ON(inode->v.i_size < inode_u.bi_size); + WARN_ON(!test_bit(EI_INODE_ERROR, &inode->ei_flags) && + inode->v.i_size < inode_u.bi_size); if (iattr->ia_size > inode->v.i_size) { ret = bch2_extend(inode, &inode_u, iattr); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index a3810493826b..7cd3f243d1ed 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1161,6 +1161,7 @@ static void bch2_vfs_inode_init(struct bch_fs *c, inode->v.i_generation = bi->bi_generation; inode->v.i_size = bi->bi_size; + inode->ei_flags = 0; inode->ei_journal_seq = 0; inode->ei_quota_reserved = 0; inode->ei_str_hash = bch2_hash_info_init(c, bi); diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 7c095b856b05..8c2796aa7abf 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -33,6 +33,7 @@ void bch2_pagecache_block_get(struct pagecache_lock *); struct bch_inode_info { struct inode v; + unsigned long ei_flags; struct mutex ei_update_lock; u64 ei_journal_seq; @@ -49,6 +50,12 @@ struct bch_inode_info { struct bch_inode_unpacked ei_inode; }; +/* + * Set if we've gotten a btree error for this inode, and thus the vfs inode and + * btree inode may be inconsistent: + */ +#define EI_INODE_ERROR 0 + #define to_bch_ei(_inode) \ container_of_or_null(_inode, struct bch_inode_info, v) -- cgit v1.2.3 From 07a1006ae81580c6a1b52b80e32fa9dadea1954b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 17 Dec 2020 15:08:58 -0500 Subject: bcachefs: Reduce/kill BKEY_PADDED use With various newer key types - stripe keys, inline data extents - the old approach of calculating the maximum size of the value is becoming more and more error prone. Better to switch to bkey_on_stack, which can dynamically allocate if necessary to handle any size bkey. In particular we also want to get rid of BKEY_EXTENT_VAL_U64s_MAX. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 2 -- fs/bcachefs/bkey_buf.h | 60 +++++++++++++++++++++++++++++++++++++ fs/bcachefs/bkey_on_stack.h | 43 -------------------------- fs/bcachefs/bkey_sort.c | 18 +++++------ fs/bcachefs/btree_cache.c | 13 +++++--- fs/bcachefs/btree_gc.c | 22 +++++++------- fs/bcachefs/btree_io.c | 23 ++++++++------ fs/bcachefs/btree_iter.c | 35 ++++++++++++++-------- fs/bcachefs/btree_types.h | 2 +- fs/bcachefs/btree_update_interior.c | 2 +- fs/bcachefs/ec.c | 10 +++---- fs/bcachefs/extent_update.c | 1 - fs/bcachefs/fs-io.c | 18 +++++------ fs/bcachefs/fs.c | 16 +++++----- fs/bcachefs/fsck.c | 10 +++---- fs/bcachefs/io.c | 59 ++++++++++++++++++------------------ fs/bcachefs/io.h | 6 ++-- fs/bcachefs/journal.c | 2 +- fs/bcachefs/journal_io.c | 2 ++ fs/bcachefs/journal_types.h | 2 +- fs/bcachefs/migrate.c | 20 +++++++------ fs/bcachefs/move.c | 26 +++++++++------- fs/bcachefs/recovery.c | 34 +++++++++++---------- fs/bcachefs/reflink.c | 21 ++++++------- 24 files changed, 247 insertions(+), 200 deletions(-) create mode 100644 fs/bcachefs/bkey_buf.h delete mode 100644 fs/bcachefs/bkey_on_stack.h (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 397099514418..b88a9fdf17ad 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -638,8 +638,6 @@ struct bch_reservation { #define BKEY_EXTENT_VAL_U64s_MAX \ (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1)) -#define BKEY_PADDED(key) __BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX) - /* * Maximum possible size of an entire extent, key + value: */ #define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX) diff --git a/fs/bcachefs/bkey_buf.h b/fs/bcachefs/bkey_buf.h new file mode 100644 index 000000000000..0d7c67a959af --- /dev/null +++ b/fs/bcachefs/bkey_buf.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_BKEY_BUF_H +#define _BCACHEFS_BKEY_BUF_H + +#include "bcachefs.h" + +struct bkey_buf { + struct bkey_i *k; + u64 onstack[12]; +}; + +static inline void bch2_bkey_buf_realloc(struct bkey_buf *s, + struct bch_fs *c, unsigned u64s) +{ + if (s->k == (void *) s->onstack && + u64s > ARRAY_SIZE(s->onstack)) { + s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS); + memcpy(s->k, s->onstack, sizeof(s->onstack)); + } +} + +static inline void bch2_bkey_buf_reassemble(struct bkey_buf *s, + struct bch_fs *c, + struct bkey_s_c k) +{ + bch2_bkey_buf_realloc(s, c, k.k->u64s); + bkey_reassemble(s->k, k); +} + +static inline void bch2_bkey_buf_copy(struct bkey_buf *s, + struct bch_fs *c, + struct bkey_i *src) +{ + bch2_bkey_buf_realloc(s, c, src->k.u64s); + bkey_copy(s->k, src); +} + +static inline void bch2_bkey_buf_unpack(struct bkey_buf *s, + struct bch_fs *c, + struct btree *b, + struct bkey_packed *src) +{ + bch2_bkey_buf_realloc(s, c, BKEY_U64s + + bkeyp_val_u64s(&b->format, src)); + bch2_bkey_unpack(b, s->k, src); +} + +static inline void bch2_bkey_buf_init(struct bkey_buf *s) +{ + s->k = (void *) s->onstack; +} + +static inline void bch2_bkey_buf_exit(struct bkey_buf *s, struct bch_fs *c) +{ + if (s->k != (void *) s->onstack) + mempool_free(s->k, &c->large_bkey_pool); + s->k = NULL; +} + +#endif /* _BCACHEFS_BKEY_BUF_H */ diff --git a/fs/bcachefs/bkey_on_stack.h b/fs/bcachefs/bkey_on_stack.h deleted file mode 100644 index f607a0cb37ed..000000000000 --- a/fs/bcachefs/bkey_on_stack.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _BCACHEFS_BKEY_ON_STACK_H -#define _BCACHEFS_BKEY_ON_STACK_H - -#include "bcachefs.h" - -struct bkey_on_stack { - struct bkey_i *k; - u64 onstack[12]; -}; - -static inline void bkey_on_stack_realloc(struct bkey_on_stack *s, - struct bch_fs *c, unsigned u64s) -{ - if (s->k == (void *) s->onstack && - u64s > ARRAY_SIZE(s->onstack)) { - s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS); - memcpy(s->k, s->onstack, sizeof(s->onstack)); - } -} - -static inline void bkey_on_stack_reassemble(struct bkey_on_stack *s, - struct bch_fs *c, - struct bkey_s_c k) -{ - bkey_on_stack_realloc(s, c, k.k->u64s); - bkey_reassemble(s->k, k); -} - -static inline void bkey_on_stack_init(struct bkey_on_stack *s) -{ - s->k = (void *) s->onstack; -} - -static inline void bkey_on_stack_exit(struct bkey_on_stack *s, - struct bch_fs *c) -{ - if (s->k != (void *) s->onstack) - mempool_free(s->k, &c->large_bkey_pool); - s->k = NULL; -} - -#endif /* _BCACHEFS_BKEY_ON_STACK_H */ diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c index 99e0a4011fae..2e1d9cd65f43 100644 --- a/fs/bcachefs/bkey_sort.c +++ b/fs/bcachefs/bkey_sort.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "bkey_sort.h" #include "bset.h" #include "extents.h" @@ -187,11 +187,11 @@ bch2_sort_repack_merge(struct bch_fs *c, bool filter_whiteouts) { struct bkey_packed *out = vstruct_last(dst), *k_packed; - struct bkey_on_stack k; + struct bkey_buf k; struct btree_nr_keys nr; memset(&nr, 0, sizeof(nr)); - bkey_on_stack_init(&k); + bch2_bkey_buf_init(&k); while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) { if (filter_whiteouts && bkey_whiteout(k_packed)) @@ -204,7 +204,7 @@ bch2_sort_repack_merge(struct bch_fs *c, * node; we have to make a copy of the entire key before calling * normalize */ - bkey_on_stack_realloc(&k, c, k_packed->u64s + BKEY_U64s); + bch2_bkey_buf_realloc(&k, c, k_packed->u64s + BKEY_U64s); bch2_bkey_unpack(src, k.k, k_packed); if (filter_whiteouts && @@ -215,7 +215,7 @@ bch2_sort_repack_merge(struct bch_fs *c, } dst->u64s = cpu_to_le16((u64 *) out - dst->_data); - bkey_on_stack_exit(&k, c); + bch2_bkey_buf_exit(&k, c); return nr; } @@ -315,11 +315,11 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, struct bkey l_unpacked, r_unpacked; struct bkey_s l, r; struct btree_nr_keys nr; - struct bkey_on_stack split; + struct bkey_buf split; unsigned i; memset(&nr, 0, sizeof(nr)); - bkey_on_stack_init(&split); + bch2_bkey_buf_init(&split); sort_iter_sort(iter, extent_sort_fix_overlapping_cmp); for (i = 0; i < iter->used;) { @@ -379,7 +379,7 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, /* * r wins, but it overlaps in the middle of l - split l: */ - bkey_on_stack_reassemble(&split, c, l.s_c); + bch2_bkey_buf_reassemble(&split, c, l.s_c); bch2_cut_back(bkey_start_pos(r.k), split.k); bch2_cut_front_s(r.k->p, l); @@ -398,7 +398,7 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, dst->u64s = cpu_to_le16((u64 *) out - dst->_data); - bkey_on_stack_exit(&split, c); + bch2_bkey_buf_exit(&split, c); return nr; } diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 04c71f11a555..d859cd26259b 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "bkey_buf.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_iter.h" @@ -899,10 +900,12 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, struct btree *parent; struct btree_node_iter node_iter; struct bkey_packed *k; - BKEY_PADDED(k) tmp; + struct bkey_buf tmp; struct btree *ret = NULL; unsigned level = b->c.level; + bch2_bkey_buf_init(&tmp); + parent = btree_iter_node(iter, level + 1); if (!parent) return NULL; @@ -936,9 +939,9 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, if (!k) goto out; - bch2_bkey_unpack(parent, &tmp.k, k); + bch2_bkey_buf_unpack(&tmp, c, parent, k); - ret = bch2_btree_node_get(c, iter, &tmp.k, level, + ret = bch2_btree_node_get(c, iter, tmp.k, level, SIX_LOCK_intent, _THIS_IP_); if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) { @@ -958,7 +961,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c, if (sib == btree_prev_sib) btree_node_unlock(iter, level); - ret = bch2_btree_node_get(c, iter, &tmp.k, level, + ret = bch2_btree_node_get(c, iter, tmp.k, level, SIX_LOCK_intent, _THIS_IP_); /* @@ -999,6 +1002,8 @@ out: bch2_btree_trans_verify_locks(trans); + bch2_bkey_buf_exit(&tmp, c); + return ret; } diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 8ab4c0df0d83..c390b490433a 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -8,7 +8,7 @@ #include "alloc_background.h" #include "alloc_foreground.h" #include "bkey_methods.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_locking.h" #include "btree_update_interior.h" #include "btree_io.h" @@ -267,10 +267,12 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, struct btree_and_journal_iter iter; struct bkey_s_c k; struct bpos next_node_start = b->data->min_key; + struct bkey_buf tmp; u8 max_stale = 0; int ret = 0; bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b); + bch2_bkey_buf_init(&tmp); while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { bch2_bkey_debugcheck(c, b, k); @@ -284,10 +286,9 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, if (b->c.level) { struct btree *child; - BKEY_PADDED(k) tmp; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); + bch2_bkey_buf_reassemble(&tmp, c, k); + k = bkey_i_to_s_c(tmp.k); bch2_btree_and_journal_iter_advance(&iter); @@ -299,7 +300,7 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, break; if (b->c.level > target_depth) { - child = bch2_btree_node_get_noiter(c, &tmp.k, + child = bch2_btree_node_get_noiter(c, tmp.k, b->c.btree_id, b->c.level - 1); ret = PTR_ERR_OR_ZERO(child); if (ret) @@ -317,6 +318,7 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, } } + bch2_bkey_buf_exit(&tmp, c); return ret; } @@ -929,10 +931,10 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - struct bkey_on_stack sk; + struct bkey_buf sk; int ret = 0; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, @@ -941,7 +943,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k))) { if (gc_btree_gens_key(c, k)) { - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); bch2_extent_normalize(c, bkey_i_to_s(sk.k)); bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k)); @@ -961,7 +963,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) } bch2_trans_exit(&trans); - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); return ret; } @@ -1073,7 +1075,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter, } if (bch2_keylist_realloc(&keylist, NULL, 0, - (BKEY_U64s + BKEY_EXTENT_U64s_MAX) * nr_old_nodes)) { + BKEY_BTREE_PTR_U64s_MAX * nr_old_nodes)) { trace_btree_gc_coalesce_fail(c, BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC); return; diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index c100f930bb8f..831f387557aa 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1320,12 +1320,13 @@ static void bch2_btree_node_write_error(struct bch_fs *c, struct btree_write_bio *wbio) { struct btree *b = wbio->wbio.bio.bi_private; - __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; + struct bkey_buf k; struct bch_extent_ptr *ptr; struct btree_trans trans; struct btree_iter *iter; int ret; + bch2_bkey_buf_init(&k); bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_node_iter(&trans, b->c.btree_id, b->key.k.p, @@ -1344,21 +1345,22 @@ retry: BUG_ON(!btree_node_hashed(b)); - bkey_copy(&tmp.k, &b->key); + bch2_bkey_buf_copy(&k, c, &b->key); - bch2_bkey_drop_ptrs(bkey_i_to_s(&tmp.k), ptr, + bch2_bkey_drop_ptrs(bkey_i_to_s(k.k), ptr, bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev)); - if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&tmp.k))) + if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(k.k))) goto err; - ret = bch2_btree_node_update_key(c, iter, b, &tmp.k); + ret = bch2_btree_node_update_key(c, iter, b, k.k); if (ret == -EINTR) goto retry; if (ret) goto err; out: bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&k, c); bio_put(&wbio->wbio.bio); btree_node_write_done(c, b); return; @@ -1476,7 +1478,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, struct bset *i; struct btree_node *bn = NULL; struct btree_node_entry *bne = NULL; - BKEY_PADDED(key) k; + struct bkey_buf k; struct bch_extent_ptr *ptr; struct sort_iter sort_iter; struct nonce nonce; @@ -1487,6 +1489,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool validate_before_checksum = false; void *data; + bch2_bkey_buf_init(&k); + if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) return; @@ -1696,15 +1700,16 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, * just make all btree node writes FUA to keep things sane. */ - bkey_copy(&k.key, &b->key); + bch2_bkey_buf_copy(&k, c, &b->key); - bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&k.key)), ptr) + bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(k.k)), ptr) ptr->offset += b->written; b->written += sectors_to_write; /* XXX: submitting IO with btree locks held: */ - bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, &k.key); + bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, k.k); + bch2_bkey_buf_exit(&k, c); return; err: set_btree_node_noevict(b); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 15963a657c72..47d833f5ad56 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "bkey_methods.h" +#include "bkey_buf.h" #include "btree_cache.h" #include "btree_iter.h" #include "btree_key_cache.h" @@ -1048,27 +1049,31 @@ static void btree_iter_prefetch(struct btree_iter *iter) struct btree_iter_level *l = &iter->l[iter->level]; struct btree_node_iter node_iter = l->iter; struct bkey_packed *k; - BKEY_PADDED(k) tmp; + struct bkey_buf tmp; unsigned nr = test_bit(BCH_FS_STARTED, &c->flags) ? (iter->level > 1 ? 0 : 2) : (iter->level > 1 ? 1 : 16); bool was_locked = btree_node_locked(iter, iter->level); + bch2_bkey_buf_init(&tmp); + while (nr) { if (!bch2_btree_node_relock(iter, iter->level)) - return; + break; bch2_btree_node_iter_advance(&node_iter, l->b); k = bch2_btree_node_iter_peek(&node_iter, l->b); if (!k) break; - bch2_bkey_unpack(l->b, &tmp.k, k); - bch2_btree_node_prefetch(c, iter, &tmp.k, iter->level - 1); + bch2_bkey_buf_unpack(&tmp, c, l->b, k); + bch2_btree_node_prefetch(c, iter, tmp.k, iter->level - 1); } if (!was_locked) btree_node_unlock(iter, iter->level); + + bch2_bkey_buf_exit(&tmp, c); } static noinline void btree_node_mem_ptr_set(struct btree_iter *iter, @@ -1100,30 +1105,34 @@ static __always_inline int btree_iter_down(struct btree_iter *iter, struct btree *b; unsigned level = iter->level - 1; enum six_lock_type lock_type = __btree_lock_want(iter, level); - BKEY_PADDED(k) tmp; + struct bkey_buf tmp; + int ret; EBUG_ON(!btree_node_locked(iter, iter->level)); - bch2_bkey_unpack(l->b, &tmp.k, + bch2_bkey_buf_init(&tmp); + bch2_bkey_buf_unpack(&tmp, c, l->b, bch2_btree_node_iter_peek(&l->iter, l->b)); - b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type, trace_ip); - if (unlikely(IS_ERR(b))) - return PTR_ERR(b); + b = bch2_btree_node_get(c, iter, tmp.k, level, lock_type, trace_ip); + ret = PTR_ERR_OR_ZERO(b); + if (unlikely(ret)) + goto err; mark_btree_node_locked(iter, level, lock_type); btree_iter_node_set(iter, b); - if (tmp.k.k.type == KEY_TYPE_btree_ptr_v2 && - unlikely(b != btree_node_mem_ptr(&tmp.k))) + if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 && + unlikely(b != btree_node_mem_ptr(tmp.k))) btree_node_mem_ptr_set(iter, level + 1, b); if (iter->flags & BTREE_ITER_PREFETCH) btree_iter_prefetch(iter); iter->level = level; - - return 0; +err: + bch2_bkey_buf_exit(&tmp, c); + return ret; } static void btree_iter_up(struct btree_iter *iter) diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 51ad87abc763..e51e3c7868de 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -57,7 +57,7 @@ struct btree_write { struct btree_alloc { struct open_buckets ob; - BKEY_PADDED(k); + __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); }; struct btree_bkey_cached_common { diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 6d69c7cb3665..2fa3a9aeb89a 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -195,7 +195,7 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, { struct write_point *wp; struct btree *b; - BKEY_PADDED(k) tmp; + __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; struct open_buckets ob = { .nr = 0 }; struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; unsigned nr_reserve; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 09de3270bff0..5dc2fc23c134 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -4,7 +4,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "bset.h" #include "btree_gc.h" #include "btree_update.h" @@ -783,10 +783,10 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, struct btree_iter *iter; struct bkey_s_c k; struct bkey_s_extent e; - struct bkey_on_stack sk; + struct bkey_buf sk; int ret = 0, dev, idx; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); /* XXX this doesn't support the reflink btree */ @@ -813,7 +813,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, dev = s->key.v.ptrs[idx].dev; - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); e = bkey_i_to_s_extent(sk.k); bch2_bkey_drop_ptrs(e.s, ptr, ptr->dev != dev); @@ -834,7 +834,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, } bch2_trans_exit(&trans); - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); return ret; } diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index fd011df3cb99..1faca4bc1825 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_on_stack.h" #include "btree_update.h" #include "btree_update_interior.h" #include "buckets.h" diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index c2d024dec5c9..d48aa5b31e7b 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -3,7 +3,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" #include "clock.h" @@ -774,7 +774,7 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, struct readpages_iter *readpages_iter) { struct bch_fs *c = trans->c; - struct bkey_on_stack sk; + struct bkey_buf sk; int flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE; int ret = 0; @@ -782,7 +782,7 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, rbio->c = c; rbio->start_time = local_clock(); - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); retry: while (1) { struct bkey_s_c k; @@ -800,7 +800,7 @@ retry: bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); ret = bch2_read_indirect_extent(trans, &offset_into_extent, &sk); @@ -845,7 +845,7 @@ retry: bio_endio(&rbio->bio); } - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); } void bch2_readahead(struct readahead_control *ractl) @@ -2431,7 +2431,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; - struct bkey_on_stack copy; + struct bkey_buf copy; struct btree_trans trans; struct btree_iter *src, *dst; loff_t shift, new_size; @@ -2441,7 +2441,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, if ((offset | len) & (block_bytes(c) - 1)) return -EINVAL; - bkey_on_stack_init(©); + bch2_bkey_buf_init(©); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256); /* @@ -2529,7 +2529,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0) break; reassemble: - bkey_on_stack_reassemble(©, c, k); + bch2_bkey_buf_reassemble(©, c, k); if (insert && bkey_cmp(bkey_start_pos(k.k), move_pos) < 0) @@ -2606,7 +2606,7 @@ bkey_err: } err: bch2_trans_exit(&trans); - bkey_on_stack_exit(©, c); + bch2_bkey_buf_exit(©, c); bch2_pagecache_block_put(&inode->ei_pagecache_lock); inode_unlock(&inode->v); return ret; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 7cd3f243d1ed..bcb2f83fe354 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -3,7 +3,7 @@ #include "bcachefs.h" #include "acl.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" #include "chardev.h" @@ -899,7 +899,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - struct bkey_on_stack cur, prev; + struct bkey_buf cur, prev; struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); unsigned offset_into_extent, sectors; bool have_extent = false; @@ -912,8 +912,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (start + len < start) return -EINVAL; - bkey_on_stack_init(&cur); - bkey_on_stack_init(&prev); + bch2_bkey_buf_init(&cur); + bch2_bkey_buf_init(&prev); bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -932,7 +932,7 @@ retry: bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - bkey_on_stack_reassemble(&cur, c, k); + bch2_bkey_buf_reassemble(&cur, c, k); ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &cur); @@ -940,7 +940,7 @@ retry: break; k = bkey_i_to_s_c(cur.k); - bkey_on_stack_realloc(&prev, c, k.k->u64s); + bch2_bkey_buf_realloc(&prev, c, k.k->u64s); sectors = min(sectors, k.k->size - offset_into_extent); @@ -974,8 +974,8 @@ retry: FIEMAP_EXTENT_LAST); ret = bch2_trans_exit(&trans) ?: ret; - bkey_on_stack_exit(&cur, c); - bkey_on_stack_exit(&prev, c); + bch2_bkey_buf_exit(&cur, c); + bch2_bkey_buf_exit(&prev, c); return ret < 0 ? ret : 0; } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 39f872de0c18..df0f00f10bd7 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_update.h" #include "dirent.h" #include "error.h" @@ -464,11 +464,11 @@ static int check_extents(struct bch_fs *c) struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - struct bkey_on_stack prev; + struct bkey_buf prev; u64 i_sectors; int ret = 0; - bkey_on_stack_init(&prev); + bch2_bkey_buf_init(&prev); prev.k->k = KEY(0, 0, 0); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); @@ -500,7 +500,7 @@ retry: goto err; } } - bkey_on_stack_reassemble(&prev, c, k); + bch2_bkey_buf_reassemble(&prev, c, k); ret = walk_inode(&trans, &w, k.k->p.inode); if (ret) @@ -569,7 +569,7 @@ err: fsck_err: if (ret == -EINTR) goto retry; - bkey_on_stack_exit(&prev, c); + bch2_bkey_buf_exit(&prev, c); return bch2_trans_exit(&trans) ?: ret; } diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index b0d017e0b220..bc1e2dc04850 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -9,7 +9,7 @@ #include "bcachefs.h" #include "alloc_background.h" #include "alloc_foreground.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "bset.h" #include "btree_update.h" #include "buckets.h" @@ -425,14 +425,14 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end, int bch2_write_index_default(struct bch_write_op *op) { struct bch_fs *c = op->c; - struct bkey_on_stack sk; + struct bkey_buf sk; struct keylist *keys = &op->insert_keys; struct bkey_i *k = bch2_keylist_front(keys); struct btree_trans trans; struct btree_iter *iter; int ret; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -444,7 +444,7 @@ int bch2_write_index_default(struct bch_write_op *op) k = bch2_keylist_front(keys); - bkey_on_stack_realloc(&sk, c, k->k.u64s); + bch2_bkey_buf_realloc(&sk, c, k->k.u64s); bkey_copy(sk.k, k); bch2_cut_front(iter->pos, sk.k); @@ -461,7 +461,7 @@ int bch2_write_index_default(struct bch_write_op *op) } while (!bch2_keylist_empty(keys)); bch2_trans_exit(&trans); - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); return ret; } @@ -1620,14 +1620,14 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio { struct btree_trans trans; struct btree_iter *iter; - struct bkey_on_stack sk; + struct bkey_buf sk; struct bkey_s_c k; int ret; flags &= ~BCH_READ_LAST_FRAGMENT; flags |= BCH_READ_MUST_CLONE; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -1639,7 +1639,7 @@ retry: if (bkey_err(k)) goto err; - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); bch2_trans_unlock(&trans); @@ -1660,7 +1660,7 @@ retry: out: bch2_rbio_done(rbio); bch2_trans_exit(&trans); - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); return; err: rbio->bio.bi_status = BLK_STS_IOERR; @@ -1673,14 +1673,14 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, { struct btree_trans trans; struct btree_iter *iter; - struct bkey_on_stack sk; + struct bkey_buf sk; struct bkey_s_c k; int ret; flags &= ~BCH_READ_LAST_FRAGMENT; flags |= BCH_READ_MUST_CLONE; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -1690,7 +1690,7 @@ retry: BTREE_ITER_SLOTS, k, ret) { unsigned bytes, sectors, offset_into_extent; - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); @@ -1739,7 +1739,7 @@ err: rbio->bio.bi_status = BLK_STS_IOERR; out: bch2_trans_exit(&trans); - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); bch2_rbio_done(rbio); } @@ -1810,17 +1810,6 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, if ((ret = bkey_err(k))) goto out; - /* - * going to be temporarily appending another checksum entry: - */ - new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + - BKEY_EXTENT_U64s_MAX * 8); - if ((ret = PTR_ERR_OR_ZERO(new))) - goto out; - - bkey_reassemble(new, k); - k = bkey_i_to_s_c(new); - if (bversion_cmp(k.k->version, rbio->version) || !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) goto out; @@ -1839,6 +1828,16 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, goto out; } + /* + * going to be temporarily appending another checksum entry: + */ + new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + + sizeof(struct bch_extent_crc128)); + if ((ret = PTR_ERR_OR_ZERO(new))) + goto out; + + bkey_reassemble(new, k); + if (!bch2_bkey_narrow_crcs(new, new_crc)) goto out; @@ -2005,7 +2004,7 @@ static void bch2_read_endio(struct bio *bio) int __bch2_read_indirect_extent(struct btree_trans *trans, unsigned *offset_into_extent, - struct bkey_on_stack *orig_k) + struct bkey_buf *orig_k) { struct btree_iter *iter; struct bkey_s_c k; @@ -2032,7 +2031,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans, } *offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); - bkey_on_stack_reassemble(orig_k, trans->c, k); + bch2_bkey_buf_reassemble(orig_k, trans->c, k); err: bch2_trans_iter_put(trans, iter); return ret; @@ -2304,7 +2303,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) { struct btree_trans trans; struct btree_iter *iter; - struct bkey_on_stack sk; + struct bkey_buf sk; struct bkey_s_c k; unsigned flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE| @@ -2318,7 +2317,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) rbio->c = c; rbio->start_time = local_clock(); - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -2341,7 +2340,7 @@ retry: bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &sk); @@ -2378,7 +2377,7 @@ retry: } out: bch2_trans_exit(&trans); - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); return; err: if (ret == -EINTR) diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h index 6721440e8bc7..8535e1f631be 100644 --- a/fs/bcachefs/io.h +++ b/fs/bcachefs/io.h @@ -3,7 +3,7 @@ #define _BCACHEFS_IO_H #include "checksum.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "io_types.h" #define to_wbio(_bio) \ @@ -118,11 +118,11 @@ struct cache_promote_op; struct extent_ptr_decoded; int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, - struct bkey_on_stack *); + struct bkey_buf *); static inline int bch2_read_indirect_extent(struct btree_trans *trans, unsigned *offset_into_extent, - struct bkey_on_stack *k) + struct bkey_buf *k) { return k->k->k.type == KEY_TYPE_reflink_p ? __bch2_read_indirect_extent(trans, offset_into_extent, k) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index be2c2d92384e..3ca8137923a6 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1097,7 +1097,7 @@ int bch2_fs_journal_init(struct journal *j) /* Btree roots: */ j->entry_u64s_reserved += - BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX); + BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_BTREE_PTR_U64s_MAX); atomic64_set(&j->reservations.counter, ((union journal_res_state) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index cb2cfbbf50d4..25010aa42af6 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -989,6 +989,8 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w, done: rcu_read_unlock(); + BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX); + return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS; } diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 1b130541f00b..150e691d5317 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -20,7 +20,7 @@ struct journal_buf { struct jset *data; - BKEY_PADDED(key); + __BKEY_PADDED(key, BCH_REPLICAS_MAX); struct closure_waitlist wait; diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 96c8690adc5b..6241ff0c129f 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -4,7 +4,7 @@ */ #include "bcachefs.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_update.h" #include "btree_update_interior.h" #include "buckets.h" @@ -41,10 +41,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - struct bkey_on_stack sk; + struct bkey_buf sk; int ret = 0; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, @@ -57,7 +57,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags continue; } - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k), dev_idx, flags, false); @@ -90,7 +90,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags } ret = bch2_trans_exit(&trans) ?: ret; - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); BUG_ON(ret == -EINTR); @@ -109,6 +109,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) struct btree_iter *iter; struct closure cl; struct btree *b; + struct bkey_buf k; unsigned id; int ret; @@ -116,28 +117,28 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) if (flags & BCH_FORCE_IF_METADATA_LOST) return -EINVAL; + bch2_bkey_buf_init(&k); bch2_trans_init(&trans, c, 0, 0); closure_init_stack(&cl); for (id = 0; id < BTREE_ID_NR; id++) { for_each_btree_node(&trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH, b) { - __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; retry: if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key), dev_idx)) continue; - bkey_copy(&tmp.k, &b->key); + bch2_bkey_buf_copy(&k, c, &b->key); - ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.k), + ret = drop_dev_ptrs(c, bkey_i_to_s(k.k), dev_idx, flags, true); if (ret) { bch_err(c, "Cannot drop device without losing data"); goto err; } - ret = bch2_btree_node_update_key(c, iter, b, &tmp.k); + ret = bch2_btree_node_update_key(c, iter, b, k.k); if (ret == -EINTR) { b = bch2_btree_iter_peek_node(iter); goto retry; @@ -157,6 +158,7 @@ retry: ret = 0; err: ret = bch2_trans_exit(&trans) ?: ret; + bch2_bkey_buf_exit(&k, c); BUG_ON(ret == -EINTR); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 7f0990617b29..28e2125c12ed 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -2,7 +2,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_gc.h" #include "btree_update.h" #include "btree_update_interior.h" @@ -60,8 +60,13 @@ static int bch2_migrate_index_update(struct bch_write_op *op) struct migrate_write *m = container_of(op, struct migrate_write, op); struct keylist *keys = &op->insert_keys; + struct bkey_buf _new, _insert; int ret = 0; + bch2_bkey_buf_init(&_new); + bch2_bkey_buf_init(&_insert); + bch2_bkey_buf_realloc(&_insert, c, U8_MAX); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); iter = bch2_trans_get_iter(&trans, m->btree_id, @@ -72,7 +77,6 @@ static int bch2_migrate_index_update(struct bch_write_op *op) struct bkey_s_c k; struct bkey_i *insert; struct bkey_i_extent *new; - BKEY_PADDED(k) _new, _insert; const union bch_extent_entry *entry; struct extent_ptr_decoded p; bool did_work = false; @@ -92,11 +96,11 @@ static int bch2_migrate_index_update(struct bch_write_op *op) !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset)) goto nomatch; - bkey_reassemble(&_insert.k, k); - insert = &_insert.k; + bkey_reassemble(_insert.k, k); + insert = _insert.k; - bkey_copy(&_new.k, bch2_keylist_front(keys)); - new = bkey_i_to_extent(&_new.k); + bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys)); + new = bkey_i_to_extent(_new.k); bch2_cut_front(iter->pos, &new->k_i); bch2_cut_front(iter->pos, insert); @@ -192,6 +196,8 @@ nomatch: } out: bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&_insert, c); + bch2_bkey_buf_exit(&_new, c); BUG_ON(ret == -EINTR); return ret; } @@ -511,7 +517,7 @@ static int __bch2_move_data(struct bch_fs *c, { bool kthread = (current->flags & PF_KTHREAD) != 0; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); - struct bkey_on_stack sk; + struct bkey_buf sk; struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; @@ -520,7 +526,7 @@ static int __bch2_move_data(struct bch_fs *c, u64 delay, cur_inum = U64_MAX; int ret = 0, ret2; - bkey_on_stack_init(&sk); + bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); stats->data_type = BCH_DATA_user; @@ -600,7 +606,7 @@ peek: } /* unlock before doing IO: */ - bkey_on_stack_reassemble(&sk, c, k); + bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); bch2_trans_unlock(&trans); @@ -634,7 +640,7 @@ next_nondata: } out: ret = bch2_trans_exit(&trans) ?: ret; - bkey_on_stack_exit(&sk, c); + bch2_bkey_buf_exit(&sk, c); return ret; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 1883a1faf380..c5da1be46444 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "bkey_buf.h" #include "alloc_background.h" #include "btree_gc.h" #include "btree_update.h" @@ -224,28 +225,29 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b if (b->c.level) { struct btree *child; - BKEY_PADDED(k) tmp; + struct bkey_buf tmp; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); + bch2_bkey_buf_init(&tmp); + bch2_bkey_buf_reassemble(&tmp, c, k); + k = bkey_i_to_s_c(tmp.k); bch2_btree_and_journal_iter_advance(&iter); - if (b->c.level > 0) { - child = bch2_btree_node_get_noiter(c, &tmp.k, - b->c.btree_id, b->c.level - 1); - ret = PTR_ERR_OR_ZERO(child); - if (ret) - break; + child = bch2_btree_node_get_noiter(c, tmp.k, + b->c.btree_id, b->c.level - 1); + bch2_bkey_buf_exit(&tmp, c); - ret = (node_fn ? node_fn(c, b) : 0) ?: - bch2_btree_and_journal_walk_recurse(c, child, - journal_keys, btree_id, node_fn, key_fn); - six_unlock_read(&child->c.lock); + ret = PTR_ERR_OR_ZERO(child); + if (ret) + break; - if (ret) - break; - } + ret = (node_fn ? node_fn(c, b) : 0) ?: + bch2_btree_and_journal_walk_recurse(c, child, + journal_keys, btree_id, node_fn, key_fn); + six_unlock_read(&child->c.lock); + + if (ret) + break; } else { bch2_btree_and_journal_iter_advance(&iter); } diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 8abcbfb3bd64..930547de3309 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" -#include "bkey_on_stack.h" +#include "bkey_buf.h" #include "btree_update.h" #include "extents.h" #include "inode.h" @@ -198,8 +198,7 @@ s64 bch2_remap_range(struct bch_fs *c, struct btree_trans trans; struct btree_iter *dst_iter, *src_iter; struct bkey_s_c src_k; - BKEY_PADDED(k) new_dst; - struct bkey_on_stack new_src; + struct bkey_buf new_dst, new_src; struct bpos dst_end = dst_start, src_end = src_start; struct bpos dst_want, src_want; u64 src_done, dst_done; @@ -216,7 +215,8 @@ s64 bch2_remap_range(struct bch_fs *c, dst_end.offset += remap_sectors; src_end.offset += remap_sectors; - bkey_on_stack_init(&new_src); + bch2_bkey_buf_init(&new_dst); + bch2_bkey_buf_init(&new_src); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096); src_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start, @@ -257,7 +257,7 @@ s64 bch2_remap_range(struct bch_fs *c, break; if (src_k.k->type != KEY_TYPE_reflink_p) { - bkey_on_stack_reassemble(&new_src, c, src_k); + bch2_bkey_buf_reassemble(&new_src, c, src_k); src_k = bkey_i_to_s_c(new_src.k); bch2_cut_front(src_iter->pos, new_src.k); @@ -275,7 +275,7 @@ s64 bch2_remap_range(struct bch_fs *c, struct bkey_s_c_reflink_p src_p = bkey_s_c_to_reflink_p(src_k); struct bkey_i_reflink_p *dst_p = - bkey_reflink_p_init(&new_dst.k); + bkey_reflink_p_init(new_dst.k); u64 offset = le64_to_cpu(src_p.v->idx) + (src_iter->pos.offset - @@ -286,12 +286,12 @@ s64 bch2_remap_range(struct bch_fs *c, BUG(); } - new_dst.k.k.p = dst_iter->pos; - bch2_key_resize(&new_dst.k.k, + new_dst.k->k.p = dst_iter->pos; + bch2_key_resize(&new_dst.k->k, min(src_k.k->p.offset - src_iter->pos.offset, dst_end.offset - dst_iter->pos.offset)); - ret = bch2_extent_update(&trans, dst_iter, &new_dst.k, + ret = bch2_extent_update(&trans, dst_iter, new_dst.k, NULL, journal_seq, new_i_size, i_sectors_delta); if (ret) @@ -333,7 +333,8 @@ err: } while (ret2 == -EINTR); ret = bch2_trans_exit(&trans) ?: ret; - bkey_on_stack_exit(&new_src, c); + bch2_bkey_buf_exit(&new_src, c); + bch2_bkey_buf_exit(&new_dst, c); percpu_ref_put(&c->writes); -- cgit v1.2.3 From 41f8b09edc25d8ea1f4cee44a9931deb3cf8b9d6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 20 Feb 2021 19:27:37 -0500 Subject: bcachefs: Rename BTREE_ID enums for consistency with other enums Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 10 ++++---- fs/bcachefs/bcachefs.h | 18 ++++++------- fs/bcachefs/bcachefs_format.h | 20 +++++++-------- fs/bcachefs/bkey_methods.c | 30 +++++++++++----------- fs/bcachefs/btree_cache.c | 7 ------ fs/bcachefs/btree_cache.h | 2 -- fs/bcachefs/btree_gc.h | 2 +- fs/bcachefs/btree_io.c | 2 +- fs/bcachefs/btree_io.h | 4 +-- fs/bcachefs/btree_types.h | 30 +++++++++++----------- fs/bcachefs/btree_update_interior.c | 2 +- fs/bcachefs/btree_update_leaf.c | 2 +- fs/bcachefs/buckets.c | 10 ++++---- fs/bcachefs/dirent.c | 6 ++--- fs/bcachefs/ec.c | 16 ++++++------ fs/bcachefs/extent_update.c | 2 +- fs/bcachefs/extents.c | 2 +- fs/bcachefs/fs-io.c | 14 +++++------ fs/bcachefs/fs.c | 2 +- fs/bcachefs/fsck.c | 26 +++++++++---------- fs/bcachefs/inode.c | 18 ++++++------- fs/bcachefs/io.c | 18 ++++++------- fs/bcachefs/migrate.c | 4 +-- fs/bcachefs/move.c | 6 ++--- fs/bcachefs/opts.c | 7 ++++++ fs/bcachefs/opts.h | 1 + fs/bcachefs/quota.c | 12 ++++----- fs/bcachefs/recovery.c | 20 +++++++-------- fs/bcachefs/reflink.c | 6 ++--- fs/bcachefs/super-io.c | 2 +- fs/bcachefs/super.c | 4 +-- fs/bcachefs/sysfs.c | 2 +- fs/bcachefs/tests.c | 50 ++++++++++++++++++------------------- fs/bcachefs/xattr.c | 4 +-- 34 files changed, 180 insertions(+), 181 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 54e58b377e51..34590e4b8f5d 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -316,7 +316,7 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys) int ret; down_read(&c->gc_lock); - ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_ALLOC, + ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_alloc, NULL, bch2_alloc_read_fn); up_read(&c->gc_lock); @@ -344,7 +344,7 @@ retry: bch2_trans_begin(trans); ret = bch2_btree_key_cache_flush(trans, - BTREE_ID_ALLOC, iter->pos); + BTREE_ID_alloc, iter->pos); if (ret) goto err; @@ -386,7 +386,7 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags) bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN, + iter = bch2_trans_get_iter(&trans, BTREE_ID_alloc, POS_MIN, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); for_each_member_device(ca, c, i) { @@ -423,7 +423,7 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, u64 *time, now; int ret = 0; - iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, POS(dev, bucket_nr), + iter = bch2_trans_get_iter(trans, BTREE_ID_alloc, POS(dev, bucket_nr), BTREE_ITER_CACHED| BTREE_ITER_CACHED_NOFILL| BTREE_ITER_INTENT); @@ -927,7 +927,7 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca) bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, + iter = bch2_trans_get_iter(&trans, BTREE_ID_alloc, POS(ca->dev_idx, 0), BTREE_ITER_CACHED| BTREE_ITER_CACHED_NOFILL| diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 51ba38f19ca9..9f4e7a3ada36 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -369,14 +369,14 @@ enum gc_phase { GC_PHASE_START, GC_PHASE_SB, - GC_PHASE_BTREE_EC, - GC_PHASE_BTREE_EXTENTS, - GC_PHASE_BTREE_INODES, - GC_PHASE_BTREE_DIRENTS, - GC_PHASE_BTREE_XATTRS, - GC_PHASE_BTREE_ALLOC, - GC_PHASE_BTREE_QUOTAS, - GC_PHASE_BTREE_REFLINK, + GC_PHASE_BTREE_stripes, + GC_PHASE_BTREE_extents, + GC_PHASE_BTREE_inodes, + GC_PHASE_BTREE_dirents, + GC_PHASE_BTREE_xattrs, + GC_PHASE_BTREE_alloc, + GC_PHASE_BTREE_quotas, + GC_PHASE_BTREE_reflink, GC_PHASE_PENDING_DELETE, GC_PHASE_ALLOC, @@ -722,7 +722,7 @@ struct bch_fs { * Tracks GC's progress - everything in the range [ZERO_KEY..gc_cur_pos] * has been marked by GC. * - * gc_cur_phase is a superset of btree_ids (BTREE_ID_EXTENTS etc.) + * gc_cur_phase is a superset of btree_ids (BTREE_ID_extents etc.) * * Protected by gc_pos_lock. Only written to by GC thread, so GC thread * can read without a lock. diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 1df6b7c6e4d7..e9e501a8c3ec 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1658,18 +1658,18 @@ LE32_BITMASK(JSET_NO_FLUSH, struct jset, flags, 5, 6); /* Btree: */ -#define BCH_BTREE_IDS() \ - x(EXTENTS, 0, "extents") \ - x(INODES, 1, "inodes") \ - x(DIRENTS, 2, "dirents") \ - x(XATTRS, 3, "xattrs") \ - x(ALLOC, 4, "alloc") \ - x(QUOTAS, 5, "quotas") \ - x(EC, 6, "stripes") \ - x(REFLINK, 7, "reflink") +#define BCH_BTREE_IDS() \ + x(extents, 0) \ + x(inodes, 1) \ + x(dirents, 2) \ + x(xattrs, 3) \ + x(alloc, 4) \ + x(quotas, 5) \ + x(stripes, 6) \ + x(reflink, 7) enum btree_id { -#define x(kwd, val, name) BTREE_ID_##kwd = val, +#define x(kwd, val) BTREE_ID_##kwd = val, BCH_BTREE_IDS() #undef x BTREE_ID_NR diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 756bf5aeee9b..79e249f49971 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -104,7 +104,7 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, if (k.k->u64s < BKEY_U64s) return "u64s too small"; - if (type == BKEY_TYPE_BTREE && + if (type == BKEY_TYPE_btree && bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) return "value too big"; @@ -122,7 +122,7 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, if (k.k->p.snapshot) return "nonzero snapshot"; - if (type != BKEY_TYPE_BTREE && + if (type != BKEY_TYPE_btree && !bkey_cmp(k.k->p, POS_MAX)) return "POS_MAX key"; @@ -263,18 +263,18 @@ static const struct old_bkey_type { u8 old; u8 new; } bkey_renumber_table[] = { - {BKEY_TYPE_BTREE, 128, KEY_TYPE_btree_ptr }, - {BKEY_TYPE_EXTENTS, 128, KEY_TYPE_extent }, - {BKEY_TYPE_EXTENTS, 129, KEY_TYPE_extent }, - {BKEY_TYPE_EXTENTS, 130, KEY_TYPE_reservation }, - {BKEY_TYPE_INODES, 128, KEY_TYPE_inode }, - {BKEY_TYPE_INODES, 130, KEY_TYPE_inode_generation }, - {BKEY_TYPE_DIRENTS, 128, KEY_TYPE_dirent }, - {BKEY_TYPE_DIRENTS, 129, KEY_TYPE_hash_whiteout }, - {BKEY_TYPE_XATTRS, 128, KEY_TYPE_xattr }, - {BKEY_TYPE_XATTRS, 129, KEY_TYPE_hash_whiteout }, - {BKEY_TYPE_ALLOC, 128, KEY_TYPE_alloc }, - {BKEY_TYPE_QUOTAS, 128, KEY_TYPE_quota }, + {BKEY_TYPE_btree, 128, KEY_TYPE_btree_ptr }, + {BKEY_TYPE_extents, 128, KEY_TYPE_extent }, + {BKEY_TYPE_extents, 129, KEY_TYPE_extent }, + {BKEY_TYPE_extents, 130, KEY_TYPE_reservation }, + {BKEY_TYPE_inodes, 128, KEY_TYPE_inode }, + {BKEY_TYPE_inodes, 130, KEY_TYPE_inode_generation }, + {BKEY_TYPE_dirents, 128, KEY_TYPE_dirent }, + {BKEY_TYPE_dirents, 129, KEY_TYPE_hash_whiteout }, + {BKEY_TYPE_xattrs, 128, KEY_TYPE_xattr }, + {BKEY_TYPE_xattrs, 129, KEY_TYPE_hash_whiteout }, + {BKEY_TYPE_alloc, 128, KEY_TYPE_alloc }, + {BKEY_TYPE_quotas, 128, KEY_TYPE_quota }, }; void bch2_bkey_renumber(enum btree_node_type btree_node_type, @@ -320,7 +320,7 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id, break; case 2: if (version < bcachefs_metadata_version_inode_btree_change && - btree_id == BTREE_ID_INODES) { + btree_id == BTREE_ID_inodes) { if (!bkey_packed(k)) { struct bkey_i *u = packed_to_bkey(k); swap(u->k.p.inode, u->k.p.offset); diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 97b4f87a377f..1a6b4618c2ae 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -13,13 +13,6 @@ #include #include -const char * const bch2_btree_ids[] = { -#define x(kwd, val, name) name, - BCH_BTREE_IDS() -#undef x - NULL -}; - void bch2_recalc_btree_reserve(struct bch_fs *c) { unsigned i, reserve = 16; diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 5fffae92effb..217988696a77 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -7,8 +7,6 @@ struct btree_iter; -extern const char * const bch2_btree_ids[]; - void bch2_recalc_btree_reserve(struct bch_fs *); void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *); diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h index f516faded269..d5559827ed7f 100644 --- a/fs/bcachefs/btree_gc.h +++ b/fs/bcachefs/btree_gc.h @@ -57,7 +57,7 @@ static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r) static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id) { switch (id) { -#define x(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n; +#define x(name, v) case BTREE_ID_##name: return GC_PHASE_BTREE_##name; BCH_BTREE_IDS() #undef x default: diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index f081233a1ef1..b7d931335dd6 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1321,7 +1321,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, unsigned whiteout_u64s = 0; int ret; - if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE)) + if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_btree)) return -1; ret = validate_bset(c, NULL, b, i, sectors, WRITE, false) ?: diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index 89685bd57fc0..16ce6dff6af7 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -193,7 +193,7 @@ static inline void compat_bformat(unsigned level, enum btree_id btree_id, int write, struct bkey_format *f) { if (version < bcachefs_metadata_version_inode_btree_change && - btree_id == BTREE_ID_INODES) { + btree_id == BTREE_ID_inodes) { swap(f->bits_per_field[BKEY_FIELD_INODE], f->bits_per_field[BKEY_FIELD_OFFSET]); swap(f->field_offset[BKEY_FIELD_INODE], @@ -209,7 +209,7 @@ static inline void compat_bpos(unsigned level, enum btree_id btree_id, bch2_bpos_swab(p); if (version < bcachefs_metadata_version_inode_btree_change && - btree_id == BTREE_ID_INODES) + btree_id == BTREE_ID_inodes) swap(p->inode, p->offset); } diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 35511d47ae97..fcaa13b9129c 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -545,16 +545,16 @@ static inline unsigned bset_byte_offset(struct btree *b, void *i) } enum btree_node_type { -#define x(kwd, val, name) BKEY_TYPE_##kwd = val, +#define x(kwd, val) BKEY_TYPE_##kwd = val, BCH_BTREE_IDS() #undef x - BKEY_TYPE_BTREE, + BKEY_TYPE_btree, }; /* Type of a key in btree @id at level @level: */ static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id) { - return level ? BKEY_TYPE_BTREE : (enum btree_node_type) id; + return level ? BKEY_TYPE_btree : (enum btree_node_type) id; } /* Type of keys @b contains: */ @@ -566,8 +566,8 @@ static inline enum btree_node_type btree_node_type(struct btree *b) static inline bool btree_node_type_is_extents(enum btree_node_type type) { switch (type) { - case BKEY_TYPE_EXTENTS: - case BKEY_TYPE_REFLINK: + case BKEY_TYPE_extents: + case BKEY_TYPE_reflink: return true; default: return false; @@ -590,18 +590,18 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter) } #define BTREE_NODE_TYPE_HAS_TRIGGERS \ - ((1U << BKEY_TYPE_EXTENTS)| \ - (1U << BKEY_TYPE_ALLOC)| \ - (1U << BKEY_TYPE_INODES)| \ - (1U << BKEY_TYPE_REFLINK)| \ - (1U << BKEY_TYPE_EC)| \ - (1U << BKEY_TYPE_BTREE)) + ((1U << BKEY_TYPE_extents)| \ + (1U << BKEY_TYPE_alloc)| \ + (1U << BKEY_TYPE_inodes)| \ + (1U << BKEY_TYPE_reflink)| \ + (1U << BKEY_TYPE_stripes)| \ + (1U << BKEY_TYPE_btree)) #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ - ((1U << BKEY_TYPE_EXTENTS)| \ - (1U << BKEY_TYPE_INODES)| \ - (1U << BKEY_TYPE_EC)| \ - (1U << BKEY_TYPE_REFLINK)) + ((1U << BKEY_TYPE_extents)| \ + (1U << BKEY_TYPE_inodes)| \ + (1U << BKEY_TYPE_stripes)| \ + (1U << BKEY_TYPE_reflink)) enum btree_trigger_flags { __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 961191881b48..e1dd21320153 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1196,7 +1196,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, struct bkey_packed *src, *dst, *n; struct bset *i; - BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE); + BUG_ON(btree_node_type(b) != BKEY_TYPE_btree); bch2_btree_node_iter_init(&node_iter, b, &k->k.p); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 6100f164278b..c46016961284 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -341,7 +341,7 @@ static inline bool iter_has_nontrans_triggers(struct btree_iter *iter) { return (((BTREE_NODE_TYPE_HAS_TRIGGERS & ~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS)) | - (1U << BTREE_ID_EC)) & + (1U << BTREE_ID_stripes)) & (1U << iter->btree_id); } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 3dcc77d5242f..55b9818a1dc2 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1509,7 +1509,7 @@ static int trans_get_key(struct btree_trans *trans, struct btree_iter **iter, struct bkey_s_c *k) { - unsigned flags = btree_id != BTREE_ID_ALLOC + unsigned flags = btree_id != BTREE_ID_alloc ? BTREE_ITER_SLOTS : BTREE_ITER_CACHED; int ret; @@ -1545,11 +1545,11 @@ bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_it if (IS_ERR(a)) return a; - iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k); + iter = trans_get_update(trans, BTREE_ID_alloc, pos, &k); if (iter) { *u = bch2_alloc_unpack(k); } else { - iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, pos, + iter = bch2_trans_get_iter(trans, BTREE_ID_alloc, pos, BTREE_ITER_CACHED| BTREE_ITER_CACHED_NOFILL| BTREE_ITER_INTENT); @@ -1606,7 +1606,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, struct bch_replicas_padded r; int ret = 0; - ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.ec.idx), &iter, &k); + ret = trans_get_key(trans, BTREE_ID_stripes, POS(0, p.ec.idx), &iter, &k); if (ret < 0) return ret; @@ -1830,7 +1830,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, __le64 *refcount; s64 ret; - ret = trans_get_key(trans, BTREE_ID_REFLINK, + ret = trans_get_key(trans, BTREE_ID_reflink, POS(0, idx), &iter, &k); if (ret < 0) return ret; diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index d2ebf1e5819d..b0625176ab35 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -64,7 +64,7 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) } const struct bch_hash_desc bch2_dirent_hash_desc = { - .btree_id = BTREE_ID_DIRENTS, + .btree_id = BTREE_ID_dirents, .key_type = KEY_TYPE_dirent, .hash_key = dirent_hash_key, .hash_bkey = dirent_hash_bkey, @@ -332,7 +332,7 @@ int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum) struct bkey_s_c k; int ret; - for_each_btree_key(trans, iter, BTREE_ID_DIRENTS, + for_each_btree_key(trans, iter, BTREE_ID_dirents, POS(dir_inum, 0), 0, k, ret) { if (k.k->p.inode > dir_inum) break; @@ -357,7 +357,7 @@ int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, + for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS(inum, ctx->pos), 0, k, ret) { if (k.k->p.inode > inum) break; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index a70b859363f0..ced8ceeef992 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -433,7 +433,7 @@ static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *strip int ret; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, idx), BTREE_ITER_SLOTS); + iter = bch2_trans_get_iter(&trans, BTREE_ID_stripes, POS(0, idx), BTREE_ITER_SLOTS); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) @@ -668,7 +668,7 @@ void bch2_stripes_heap_update(struct bch_fs *c, static int ec_stripe_delete(struct bch_fs *c, size_t idx) { - return bch2_btree_delete_range(c, BTREE_ID_EC, + return bch2_btree_delete_range(c, BTREE_ID_stripes, POS(0, idx), POS(0, idx + 1), NULL); @@ -713,7 +713,7 @@ static int ec_stripe_bkey_insert(struct bch_fs *c, retry: bch2_trans_begin(&trans); - for_each_btree_key(&trans, iter, BTREE_ID_EC, start_pos, + for_each_btree_key(&trans, iter, BTREE_ID_stripes, start_pos, BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) { if (start_pos.offset) { @@ -765,7 +765,7 @@ static int ec_stripe_bkey_update(struct btree_trans *trans, unsigned i; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_EC, + iter = bch2_trans_get_iter(trans, BTREE_ID_stripes, new->k.p, BTREE_ITER_INTENT); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); @@ -831,7 +831,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, /* XXX this doesn't support the reflink btree */ - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, bkey_start_pos(pos), BTREE_ITER_INTENT); @@ -1604,7 +1604,7 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags) bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN, + iter = bch2_trans_get_iter(&trans, BTREE_ID_stripes, POS_MIN, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); genradix_for_each(&c->stripes[0], giter, m) { @@ -1645,7 +1645,7 @@ static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id, int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys) { - int ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_EC, + int ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_stripes, NULL, bch2_stripes_read_fn); if (ret) bch_err(c, "error reading stripes: %i", ret); @@ -1663,7 +1663,7 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc) bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS(0, U64_MAX), 0); + iter = bch2_trans_get_iter(&trans, BTREE_ID_stripes, POS(0, U64_MAX), 0); k = bch2_btree_iter_prev(iter); if (!IS_ERR_OR_NULL(k.k)) diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index 16d2bca8a662..bb4b2b4352e0 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -62,7 +62,7 @@ static int count_iters_for_insert(struct btree_trans *trans, struct bkey_s_c r_k; for_each_btree_key(trans, iter, - BTREE_ID_REFLINK, POS(0, idx + offset), + BTREE_ID_reflink, POS(0, idx + offset), BTREE_ITER_SLOTS, r_k, ret2) { if (bkey_cmp(bkey_start_pos(r_k.k), POS(0, idx + sectors)) >= 0) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 08236ceac4df..515840bc3eaa 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -676,7 +676,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos, + for_each_btree_key(&trans, iter, BTREE_ID_extents, pos, BTREE_ITER_SLOTS, k, err) { if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) break; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 80ef9d6df287..4ccc9318a924 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -865,7 +865,7 @@ void bch2_readahead(struct readahead_control *ractl) bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, + iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS_MIN, BTREE_ITER_SLOTS); bch2_pagecache_add_get(&inode->ei_pagecache_lock); @@ -911,7 +911,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0)); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, + iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS_MIN, BTREE_ITER_SLOTS); bchfs_read(&trans, iter, rbio, inum, NULL); @@ -2144,7 +2144,7 @@ static inline int range_has_data(struct bch_fs *c, bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, start, 0, k, ret) { + for_each_btree_key(&trans, iter, BTREE_ID_extents, start, 0, k, ret) { if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) break; @@ -2520,7 +2520,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, goto err; } - src = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + src = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(inode->v.i_ino, src_start >> 9), BTREE_ITER_INTENT); dst = bch2_trans_copy_iter(&trans, src); @@ -2675,7 +2675,7 @@ static long bchfs_fallocate(struct bch_inode_info *inode, int mode, truncate_pagecache_range(&inode->v, offset, end - 1); } - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(inode->v.i_ino, block_start >> 9), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); end_pos = POS(inode->v.i_ino, block_end >> 9); @@ -3006,7 +3006,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, + for_each_btree_key(&trans, iter, BTREE_ID_extents, POS(inode->v.i_ino, offset >> 9), 0, k, ret) { if (k.k->p.inode != inode->v.i_ino) { break; @@ -3101,7 +3101,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, + for_each_btree_key(&trans, iter, BTREE_ID_extents, POS(inode->v.i_ino, offset >> 9), BTREE_ITER_SLOTS, k, ret) { if (k.k->p.inode != inode->v.i_ino) { diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index bcb2f83fe354..2d5e00a42b3e 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -916,7 +916,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bch2_bkey_buf_init(&prev); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(ei->v.i_ino, start >> 9), 0); retry: while ((k = bch2_btree_iter_peek(iter)).k && diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 7f78edcfe565..ebc234b0b6fe 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -24,7 +24,7 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) u64 sectors = 0; int ret; - for_each_btree_key(trans, iter, BTREE_ID_EXTENTS, + for_each_btree_key(trans, iter, BTREE_ID_extents, POS(inum, 0), 0, k, ret) { if (k.k->p.inode != inum) break; @@ -396,7 +396,7 @@ err_redo: if (fsck_err(c, "cannot fix dirent by removing trailing garbage %s (%zu)\n" "hash table key at wrong offset: btree %u, offset %llu, " "hashed to %llu chain starts at %llu\n%s", - buf, strlen(buf), BTREE_ID_DIRENTS, + buf, strlen(buf), BTREE_ID_dirents, k->k->p.offset, hash, h->chain->pos.offset, (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { @@ -415,7 +415,7 @@ err_redo: static int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size) { - return bch2_btree_delete_range(c, BTREE_ID_EXTENTS, + return bch2_btree_delete_range(c, BTREE_ID_extents, POS(inode_nr, round_up(new_size, block_bytes(c)) >> 9), POS(inode_nr + 1, 0), NULL); } @@ -474,7 +474,7 @@ static int check_extents(struct bch_fs *c) bch_verbose(c, "checking extents"); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_INTENT); retry: @@ -537,7 +537,7 @@ retry: bch2_inode_pack(c, &p, &w.inode); - ret = bch2_btree_insert(c, BTREE_ID_INODES, + ret = bch2_btree_insert(c, BTREE_ID_inodes, &p.inode.k_i, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); @@ -595,7 +595,7 @@ static int check_dirents(struct bch_fs *c) hash_check_init(&h); - iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, + iter = bch2_trans_get_iter(&trans, BTREE_ID_dirents, POS(BCACHEFS_ROOT_INO, 0), 0); retry: for_each_btree_key_continue(iter, 0, k, ret) { @@ -747,7 +747,7 @@ static int check_xattrs(struct bch_fs *c) bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, + iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, POS(BCACHEFS_ROOT_INO, 0), 0); retry: for_each_btree_key_continue(iter, 0, k, ret) { @@ -810,7 +810,7 @@ create_root: bch2_inode_pack(c, &packed, root_inode); - return bch2_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i, + return bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); @@ -958,7 +958,7 @@ next: if (e->offset == U64_MAX) goto up; - for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, + for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS(e->inum, e->offset + 1), 0, k, ret) { if (k.k->p.inode != e->inum) break; @@ -1011,7 +1011,7 @@ up: path.nr--; } - iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS_MIN, 0); + iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, POS_MIN, 0); retry: for_each_btree_key_continue(iter, 0, k, ret) { if (k.k->type != KEY_TYPE_inode) @@ -1108,7 +1108,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, inc_link(c, links, range_start, range_end, BCACHEFS_ROOT_INO, false); - for_each_btree_key(&trans, iter, BTREE_ID_DIRENTS, POS_MIN, 0, k, ret) { + for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN, 0, k, ret) { switch (k.k->type) { case KEY_TYPE_dirent: d = bkey_s_c_to_dirent(k); @@ -1349,7 +1349,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c, bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, + iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, POS(0, range_start), 0); nlinks_iter = genradix_iter_init(links, 0); @@ -1475,7 +1475,7 @@ int bch2_fsck_walk_inodes_only(struct bch_fs *c) bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, 0, k, ret) { + for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, 0, k, ret) { if (k.k->type != KEY_TYPE_inode) continue; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 8377d39ccc4d..a3d2bae0a652 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -300,7 +300,7 @@ struct btree_iter *bch2_inode_peek(struct btree_trans *trans, struct bkey_s_c k; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, inum), + iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, POS(0, inum), BTREE_ITER_CACHED|flags); k = bch2_btree_iter_peek_cached(iter); ret = bkey_err(k); @@ -498,7 +498,7 @@ int bch2_inode_create(struct btree_trans *trans, if (IS_ERR(inode_p)) return PTR_ERR(inode_p); again: - for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(0, start), + for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, start), BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { if (bkey_cmp(iter->pos, POS(0, max)) > 0) break; @@ -513,7 +513,7 @@ again: * cache before using a slot: */ if (k.k->type != KEY_TYPE_inode && - !bch2_btree_key_cache_find(c, BTREE_ID_INODES, iter->pos)) + !bch2_btree_key_cache_find(c, BTREE_ID_inodes, iter->pos)) goto found_slot; } @@ -560,11 +560,11 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached) * XXX: the dirent could ideally would delete whiteouts when they're no * longer needed */ - ret = bch2_btree_delete_range_trans(&trans, BTREE_ID_EXTENTS, + ret = bch2_btree_delete_range_trans(&trans, BTREE_ID_extents, start, end, NULL) ?: - bch2_btree_delete_range_trans(&trans, BTREE_ID_XATTRS, + bch2_btree_delete_range_trans(&trans, BTREE_ID_xattrs, start, end, NULL) ?: - bch2_btree_delete_range_trans(&trans, BTREE_ID_DIRENTS, + bch2_btree_delete_range_trans(&trans, BTREE_ID_dirents, start, end, NULL); if (ret) goto err; @@ -574,11 +574,11 @@ retry: bi_generation = 0; if (cached) { - iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr), + iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, POS(0, inode_nr), BTREE_ITER_CACHED|BTREE_ITER_INTENT); k = bch2_btree_iter_peek_cached(iter); } else { - iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr), + iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, POS(0, inode_nr), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); k = bch2_btree_iter_peek_slot(iter); } @@ -636,7 +636,7 @@ int __bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr, struct bkey_s_c k; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, + iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, POS(0, inode_nr), flags); k = (flags & BTREE_ITER_TYPE) == BTREE_ITER_CACHED ? bch2_btree_iter_peek_cached(iter) diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 8a4d05eee381..de3bd22edb5a 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -408,7 +408,7 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end, int ret = 0; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(inum, start), BTREE_ITER_INTENT); @@ -435,7 +435,7 @@ int bch2_write_index_default(struct bch_write_op *op) bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, bkey_start_pos(&k->k), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); @@ -1530,8 +1530,8 @@ static struct promote_op *promote_alloc(struct bch_fs *c, promote = __promote_alloc(c, k.k->type == KEY_TYPE_reflink_v - ? BTREE_ID_REFLINK - : BTREE_ID_EXTENTS, + ? BTREE_ID_reflink + : BTREE_ID_extents, k, pos, pick, opts, sectors, rbio); if (!promote) return NULL; @@ -1627,7 +1627,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, rbio->pos, BTREE_ITER_SLOTS); retry: rbio->bio.bi_status = 0; @@ -1682,7 +1682,7 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, retry: bch2_trans_begin(&trans); - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, + for_each_btree_key(&trans, iter, BTREE_ID_extents, POS(inode, bvec_iter.bi_sector), BTREE_ITER_SLOTS, k, ret) { unsigned bytes, sectors, offset_into_extent; @@ -1801,7 +1801,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, if (crc_is_compressed(rbio->pick.crc)) return 0; - iter = bch2_trans_get_iter(trans, BTREE_ID_EXTENTS, rbio->pos, + iter = bch2_trans_get_iter(trans, BTREE_ID_extents, rbio->pos, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); k = bch2_btree_iter_peek_slot(iter); if ((ret = bkey_err(k))) @@ -2011,7 +2011,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans, reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) + *offset_into_extent; - iter = bch2_trans_get_iter(trans, BTREE_ID_REFLINK, + iter = bch2_trans_get_iter(trans, BTREE_ID_reflink, POS(0, reflink_offset), BTREE_ITER_SLOTS); k = bch2_btree_iter_peek_slot(iter); @@ -2319,7 +2319,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) retry: bch2_trans_begin(&trans); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(inode, rbio->bio.bi_iter.bi_sector), BTREE_ITER_SLOTS); while (1) { diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 6241ff0c129f..1db2c2d6b970 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -99,8 +99,8 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) { - return __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_EXTENTS) ?: - __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_REFLINK); + return __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_extents) ?: + __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_reflink); } static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 2343f41715ef..dfe7f05f39e9 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -578,7 +578,7 @@ peek: if (!bkey_extent_is_direct_data(k.k)) goto next_nondata; - if (btree_id == BTREE_ID_EXTENTS && + if (btree_id == BTREE_ID_extents && cur_inum != k.k->p.inode) { struct bch_inode_unpacked inode; @@ -668,8 +668,8 @@ int bch2_move_data(struct bch_fs *c, id++) { stats->btree_id = id; - if (id != BTREE_ID_EXTENTS && - id != BTREE_ID_REFLINK) + if (id != BTREE_ID_extents && + id != BTREE_ID_reflink) continue; ret = __bch2_move_data(c, &ctxt, rate, wp, diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index d53b6dccd161..a6c734efe328 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -30,6 +30,13 @@ const char * const bch2_sb_compat[] = { NULL }; +const char * const bch2_btree_ids[] = { +#define x(name, ...) #name, + BCH_BTREE_IDS() +#undef x + NULL +}; + const char * const bch2_csum_opts[] = { "none", "crc32c", diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index d2b3549a33af..38d78ca46c9c 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -11,6 +11,7 @@ extern const char * const bch2_error_actions[]; extern const char * const bch2_sb_features[]; extern const char * const bch2_sb_compat[]; +extern const char * const bch2_btree_ids[]; extern const char * const bch2_csum_opts[]; extern const char * const bch2_compression_opts[]; extern const char * const bch2_str_hash_types[]; diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index d3032a46e7f3..041da982d051 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -363,7 +363,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_QUOTAS, POS(type, 0), + for_each_btree_key(&trans, iter, BTREE_ID_quotas, POS(type, 0), BTREE_ITER_PREFETCH, k, ret) { if (k.k->p.inode != type) break; @@ -435,7 +435,7 @@ int bch2_fs_quota_read(struct bch_fs *c) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_INODES, POS_MIN, + for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { switch (k.k->type) { case KEY_TYPE_inode: @@ -526,7 +526,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags) if (c->opts.usrquota) return -EINVAL; - ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS, + ret = bch2_btree_delete_range(c, BTREE_ID_quotas, POS(QTYP_USR, 0), POS(QTYP_USR + 1, 0), NULL); @@ -538,7 +538,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags) if (c->opts.grpquota) return -EINVAL; - ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS, + ret = bch2_btree_delete_range(c, BTREE_ID_quotas, POS(QTYP_GRP, 0), POS(QTYP_GRP + 1, 0), NULL); @@ -550,7 +550,7 @@ static int bch2_quota_remove(struct super_block *sb, unsigned uflags) if (c->opts.prjquota) return -EINVAL; - ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS, + ret = bch2_btree_delete_range(c, BTREE_ID_quotas, POS(QTYP_PRJ, 0), POS(QTYP_PRJ + 1, 0), NULL); @@ -718,7 +718,7 @@ static int bch2_set_quota_trans(struct btree_trans *trans, struct bkey_s_c k; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_QUOTAS, new_quota->k.p, + iter = bch2_trans_get_iter(trans, BTREE_ID_quotas, new_quota->k.p, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); k = bch2_btree_iter_peek_slot(iter); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 4d7badcc568b..b68fcd1d19e4 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -33,7 +33,7 @@ static void drop_alloc_keys(struct journal_keys *keys) size_t src, dst; for (src = 0, dst = 0; src < keys->nr; src++) - if (keys->d[src].btree_id != BTREE_ID_ALLOC) + if (keys->d[src].btree_id != BTREE_ID_alloc) keys->d[dst++] = keys->d[src]; keys->nr = dst; @@ -554,7 +554,7 @@ static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k) struct btree_iter *iter; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, k->k.p, + iter = bch2_trans_get_iter(trans, BTREE_ID_alloc, k->k.p, BTREE_ITER_CACHED| BTREE_ITER_CACHED_NOFILL| BTREE_ITER_INTENT); @@ -606,7 +606,7 @@ static int bch2_journal_replay(struct bch_fs *c, for_each_journal_key(keys, i) { cond_resched(); - if (!i->level && i->btree_id == BTREE_ID_ALLOC) { + if (!i->level && i->btree_id == BTREE_ID_alloc) { j->replay_journal_seq = keys.journal_seq_base + i->journal_seq; ret = bch2_alloc_replay_key(c, i->k); if (ret) @@ -645,7 +645,7 @@ static int bch2_journal_replay(struct bch_fs *c, for_each_journal_key(keys, i) { cond_resched(); - if (i->level || i->btree_id == BTREE_ID_ALLOC) + if (i->level || i->btree_id == BTREE_ID_alloc) continue; replay_now_at(j, keys.journal_seq_base + i->journal_seq); @@ -931,28 +931,28 @@ static int read_btree_roots(struct bch_fs *c) if (!r->alive) continue; - if (i == BTREE_ID_ALLOC && + if (i == BTREE_ID_alloc && c->opts.reconstruct_alloc) { c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); continue; } if (r->error) { - __fsck_err(c, i == BTREE_ID_ALLOC + __fsck_err(c, i == BTREE_ID_alloc ? FSCK_CAN_IGNORE : 0, "invalid btree root %s", bch2_btree_ids[i]); - if (i == BTREE_ID_ALLOC) + if (i == BTREE_ID_alloc) c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); } ret = bch2_btree_root_read(c, i, &r->key, r->level); if (ret) { - __fsck_err(c, i == BTREE_ID_ALLOC + __fsck_err(c, i == BTREE_ID_alloc ? FSCK_CAN_IGNORE : 0, "error reading btree root %s", bch2_btree_ids[i]); - if (i == BTREE_ID_ALLOC) + if (i == BTREE_ID_alloc) c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); } } @@ -1346,7 +1346,7 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_inode_pack(c, &packed_inode, &root_inode); err = "error creating root directory"; - ret = bch2_btree_insert(c, BTREE_ID_INODES, + ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, NULL, 0); if (ret) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 930547de3309..a2cc078597f2 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -119,7 +119,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, if (orig->k.type == KEY_TYPE_inline_data) bch2_check_set_feature(c, BCH_FEATURE_reflink_inline_data); - for_each_btree_key(trans, reflink_iter, BTREE_ID_REFLINK, + for_each_btree_key(trans, reflink_iter, BTREE_ID_reflink, POS(0, c->reflink_hint), BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) { if (reflink_iter->pos.inode) { @@ -219,9 +219,9 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_bkey_buf_init(&new_src); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096); - src_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start, + src_iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, src_start, BTREE_ITER_INTENT); - dst_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, dst_start, + dst_iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, dst_start, BTREE_ITER_INTENT); while (1) { diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 776c026ac838..f843a3b34ba2 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -945,7 +945,7 @@ void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write) for (entry = clean->start; entry < (struct jset_entry *) vstruct_end(&clean->field); entry = vstruct_next(entry)) - bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write); + bch2_bkey_renumber(BKEY_TYPE_btree, bkey_to_packed(entry->start), write); } int bch2_fs_mark_dirty(struct bch_fs *c) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index be6e66e0db71..de8e770ba300 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1436,7 +1436,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) for (i = 0; i < ca->mi.nbuckets; i++) { ret = bch2_btree_key_cache_flush(&trans, - BTREE_ID_ALLOC, POS(ca->dev_idx, i)); + BTREE_ID_alloc, POS(ca->dev_idx, i)); if (ret) break; } @@ -1445,7 +1445,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) if (ret) return ret; - return bch2_btree_delete_range(c, BTREE_ID_ALLOC, + return bch2_btree_delete_range(c, BTREE_ID_alloc, POS(ca->dev_idx, 0), POS(ca->dev_idx + 1, 0), NULL); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index c4d79096c53a..b9078adaa747 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -259,7 +259,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k, ret) + for_each_btree_key(&trans, iter, BTREE_ID_extents, POS_MIN, 0, k, ret) if (k.k->type == KEY_TYPE_extent) { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const union bch_extent_entry *entry; diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 6d0f8e233e8b..f25a27f26202 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -13,12 +13,12 @@ static void delete_test_keys(struct bch_fs *c) { int ret; - ret = bch2_btree_delete_range(c, BTREE_ID_EXTENTS, + ret = bch2_btree_delete_range(c, BTREE_ID_extents, POS(0, 0), POS(0, U64_MAX), NULL); BUG_ON(ret); - ret = bch2_btree_delete_range(c, BTREE_ID_XATTRS, + ret = bch2_btree_delete_range(c, BTREE_ID_xattrs, POS(0, 0), POS(0, U64_MAX), NULL); BUG_ON(ret); @@ -37,7 +37,7 @@ static int test_delete(struct bch_fs *c, u64 nr) bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, k.k.p, + iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, k.k.p, BTREE_ITER_INTENT); ret = bch2_btree_iter_traverse(iter); @@ -82,7 +82,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, k.k.p, + iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, k.k.p, BTREE_ITER_INTENT); ret = bch2_btree_iter_traverse(iter); @@ -130,7 +130,7 @@ static int test_iterate(struct bch_fs *c, u64 nr) bkey_cookie_init(&k.k_i); k.k.p.offset = i; - ret = bch2_btree_insert(c, BTREE_ID_XATTRS, &k.k_i, + ret = bch2_btree_insert(c, BTREE_ID_xattrs, &k.k_i, NULL, NULL, 0); if (ret) { bch_err(c, "insert error in test_iterate: %i", ret); @@ -142,7 +142,7 @@ static int test_iterate(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, + for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN, 0, k, ret) { if (k.k->p.inode) break; @@ -184,7 +184,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) k.k.p.offset = i + 8; k.k.size = 8; - ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i, + ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, NULL, 0); if (ret) { bch_err(c, "insert error in test_iterate_extents: %i", ret); @@ -196,7 +196,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, + for_each_btree_key(&trans, iter, BTREE_ID_extents, POS_MIN, 0, k, ret) { BUG_ON(bkey_start_offset(k.k) != i); i = k.k->p.offset; @@ -237,7 +237,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) bkey_cookie_init(&k.k_i); k.k.p.offset = i * 2; - ret = bch2_btree_insert(c, BTREE_ID_XATTRS, &k.k_i, + ret = bch2_btree_insert(c, BTREE_ID_xattrs, &k.k_i, NULL, NULL, 0); if (ret) { bch_err(c, "insert error in test_iterate_slots: %i", ret); @@ -249,7 +249,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN, + for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN, 0, k, ret) { if (k.k->p.inode) break; @@ -265,7 +265,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN, + for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN, BTREE_ITER_SLOTS, k, ret) { BUG_ON(k.k->p.offset != i); BUG_ON(bkey_deleted(k.k) != (i & 1)); @@ -300,7 +300,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) k.k.p.offset = i + 16; k.k.size = 8; - ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i, + ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, NULL, 0); if (ret) { bch_err(c, "insert error in test_iterate_slots_extents: %i", ret); @@ -312,7 +312,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, + for_each_btree_key(&trans, iter, BTREE_ID_extents, POS_MIN, 0, k, ret) { BUG_ON(bkey_start_offset(k.k) != i + 8); BUG_ON(k.k->size != 8); @@ -326,7 +326,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) i = 0; - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, + for_each_btree_key(&trans, iter, BTREE_ID_extents, POS_MIN, BTREE_ITER_SLOTS, k, ret) { BUG_ON(bkey_deleted(k.k) != !(i % 16)); @@ -354,7 +354,7 @@ static int test_peek_end(struct bch_fs *c, u64 nr) bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, POS_MIN, 0); + iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, POS_MIN, 0); k = bch2_btree_iter_peek(iter); BUG_ON(k.k); @@ -374,7 +374,7 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr) bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, 0); + iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS_MIN, 0); k = bch2_btree_iter_peek(iter); BUG_ON(k.k); @@ -403,7 +403,7 @@ static int insert_test_extent(struct bch_fs *c, k.k_i.k.size = end - start; k.k_i.k.version.lo = test_version++; - ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &k.k_i, + ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, NULL, 0); if (ret) bch_err(c, "insert error in insert_test_extent: %i", ret); @@ -475,7 +475,7 @@ static int rand_insert(struct bch_fs *c, u64 nr) k.k.p.offset = test_rand(); ret = __bch2_trans_do(&trans, NULL, NULL, 0, - __bch2_btree_insert(&trans, BTREE_ID_XATTRS, &k.k_i)); + __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i)); if (ret) { bch_err(c, "error in rand_insert: %i", ret); break; @@ -495,7 +495,7 @@ static int rand_lookup(struct bch_fs *c, u64 nr) u64 i; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, POS_MIN, 0); + iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, POS_MIN, 0); for (i = 0; i < nr; i++) { bch2_btree_iter_set_pos(iter, POS(0, test_rand())); @@ -522,7 +522,7 @@ static int rand_mixed(struct bch_fs *c, u64 nr) u64 i; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_XATTRS, POS_MIN, 0); + iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, POS_MIN, 0); for (i = 0; i < nr; i++) { bch2_btree_iter_set_pos(iter, POS(0, test_rand())); @@ -561,7 +561,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos) struct bkey_s_c k; int ret = 0; - iter = bch2_trans_get_iter(trans, BTREE_ID_XATTRS, pos, + iter = bch2_trans_get_iter(trans, BTREE_ID_xattrs, pos, BTREE_ITER_INTENT); k = bch2_btree_iter_peek(iter); ret = bkey_err(k); @@ -616,7 +616,7 @@ static int seq_insert(struct bch_fs *c, u64 nr) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN, + for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN, BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { insert.k.p = iter->pos; @@ -643,7 +643,7 @@ static int seq_lookup(struct bch_fs *c, u64 nr) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN, 0, k, ret) + for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN, 0, k, ret) ; bch2_trans_exit(&trans); return ret; @@ -658,7 +658,7 @@ static int seq_overwrite(struct bch_fs *c, u64 nr) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, POS_MIN, + for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN, BTREE_ITER_INTENT, k, ret) { struct bkey_i_cookie u; @@ -679,7 +679,7 @@ static int seq_delete(struct bch_fs *c, u64 nr) { int ret; - ret = bch2_btree_delete_range(c, BTREE_ID_XATTRS, + ret = bch2_btree_delete_range(c, BTREE_ID_xattrs, POS(0, 0), POS(0, U64_MAX), NULL); if (ret) diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index c7840bb949a1..5555d45df54e 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -61,7 +61,7 @@ static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) } const struct bch_hash_desc bch2_xattr_hash_desc = { - .btree_id = BTREE_ID_XATTRS, + .btree_id = BTREE_ID_xattrs, .key_type = KEY_TYPE_xattr, .hash_key = xattr_hash_key, .hash_bkey = xattr_hash_bkey, @@ -279,7 +279,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_XATTRS, + for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS(inum, 0), 0, k, ret) { BUG_ON(k.k->p.inode < inum); -- cgit v1.2.3 From 5f0e4ae1c73efe9e4f74492df08202a5845bd19a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 12 Mar 2021 17:52:42 -0500 Subject: bcachefs: Use __bch2_trans_do() in a few more places Minor cleanup, it was being open coded. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 52 +++++++++++++++++++--------------------------------- 1 file changed, 19 insertions(+), 33 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 2d5e00a42b3e..a168d09ffd37 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -416,16 +416,12 @@ static int __bch2_link(struct bch_fs *c, mutex_lock(&inode->ei_update_lock); bch2_trans_init(&trans, c, 4, 1024); - do { - bch2_trans_begin(&trans); - ret = bch2_link_trans(&trans, + ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, + BTREE_INSERT_NOUNLOCK, + bch2_link_trans(&trans, dir->v.i_ino, inode->v.i_ino, &dir_u, &inode_u, - &dentry->d_name) ?: - bch2_trans_commit(&trans, NULL, - &inode->ei_journal_seq, - BTREE_INSERT_NOUNLOCK); - } while (ret == -EINTR); + &dentry->d_name)); if (likely(!ret)) { BUG_ON(inode_u.bi_inum != inode->v.i_ino); @@ -472,17 +468,12 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); bch2_trans_init(&trans, c, 4, 1024); - do { - bch2_trans_begin(&trans); - - ret = bch2_unlink_trans(&trans, + ret = __bch2_trans_do(&trans, NULL, &dir->ei_journal_seq, + BTREE_INSERT_NOUNLOCK| + BTREE_INSERT_NOFAIL, + bch2_unlink_trans(&trans, dir->v.i_ino, &dir_u, - &inode_u, &dentry->d_name) ?: - bch2_trans_commit(&trans, NULL, - &dir->ei_journal_seq, - BTREE_INSERT_NOUNLOCK| - BTREE_INSERT_NOFAIL); - } while (ret == -EINTR); + &inode_u, &dentry->d_name)); if (likely(!ret)) { BUG_ON(inode_u.bi_inum != inode->v.i_ino); @@ -599,21 +590,16 @@ static int bch2_rename2(struct mnt_idmap *idmap, goto err; } -retry: - bch2_trans_begin(&trans); - ret = bch2_rename_trans(&trans, - src_dir->v.i_ino, &src_dir_u, - dst_dir->v.i_ino, &dst_dir_u, - &src_inode_u, - &dst_inode_u, - &src_dentry->d_name, - &dst_dentry->d_name, - mode) ?: - bch2_trans_commit(&trans, NULL, - &journal_seq, - BTREE_INSERT_NOUNLOCK); - if (ret == -EINTR) - goto retry; + ret = __bch2_trans_do(&trans, NULL, &journal_seq, + BTREE_INSERT_NOUNLOCK, + bch2_rename_trans(&trans, + src_dir->v.i_ino, &src_dir_u, + dst_dir->v.i_ino, &dst_dir_u, + &src_inode_u, + &dst_inode_u, + &src_dentry->d_name, + &dst_dentry->d_name, + mode)); if (unlikely(ret)) goto err; -- cgit v1.2.3 From 07bca3bd1e5423b2d6fe8c7085af3e92b31c461f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 2 Mar 2021 18:35:30 -0500 Subject: bcachefs: Kill ei_str_hash Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 15 +++++++++------ fs/bcachefs/acl.h | 4 ++-- fs/bcachefs/fs-ioctl.c | 4 ++-- fs/bcachefs/fs.c | 7 +++---- fs/bcachefs/fs.h | 2 -- fs/bcachefs/xattr.c | 19 ++++++++++--------- 6 files changed, 26 insertions(+), 25 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index acc1d03c79e4..3879815bcede 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -217,6 +217,7 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap, { struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c_xattr xattr; @@ -227,7 +228,7 @@ retry: bch2_trans_begin(&trans); iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc, - &inode->ei_str_hash, inode->v.i_ino, + &hash, inode->v.i_ino, &X_SEARCH(acl_to_xattr_type(type), "", 0), 0); if (IS_ERR(iter)) { @@ -290,6 +291,7 @@ int bch2_set_acl(struct mnt_idmap *idmap, struct btree_trans trans; struct btree_iter *inode_iter; struct bch_inode_unpacked inode_u; + struct bch_hash_info hash_info; struct posix_acl *acl; umode_t mode; int ret; @@ -314,9 +316,9 @@ retry: goto err; } - ret = bch2_set_acl_trans(&trans, &inode_u, - &inode->ei_str_hash, - acl, type); + hash_info = bch2_hash_info_init(c, &inode_u); + + ret = bch2_set_acl_trans(&trans, &inode_u, &hash_info, acl, type); if (ret) goto btree_err; @@ -345,10 +347,11 @@ err: } int bch2_acl_chmod(struct btree_trans *trans, - struct bch_inode_info *inode, + struct bch_inode_unpacked *inode, umode_t mode, struct posix_acl **new_acl) { + struct bch_hash_info hash_info = bch2_hash_info_init(trans->c, inode); struct btree_iter *iter; struct bkey_s_c_xattr xattr; struct bkey_i_xattr *new; @@ -356,7 +359,7 @@ int bch2_acl_chmod(struct btree_trans *trans, int ret = 0; iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, - &inode->ei_str_hash, inode->v.i_ino, + &hash_info, inode->bi_inum, &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), BTREE_ITER_INTENT); if (IS_ERR(iter)) diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h index 73739e38e2d5..f11eb9d4592c 100644 --- a/fs/bcachefs/acl.h +++ b/fs/bcachefs/acl.h @@ -33,7 +33,7 @@ int bch2_set_acl_trans(struct btree_trans *, const struct bch_hash_info *, struct posix_acl *, int); int bch2_set_acl(struct mnt_idmap *, struct dentry *, struct posix_acl *, int); -int bch2_acl_chmod(struct btree_trans *, struct bch_inode_info *, +int bch2_acl_chmod(struct btree_trans *, struct bch_inode_unpacked *, umode_t, struct posix_acl **); #else @@ -47,7 +47,7 @@ static inline int bch2_set_acl_trans(struct btree_trans *trans, } static inline int bch2_acl_chmod(struct btree_trans *trans, - struct bch_inode_info *inode, + struct bch_inode_unpacked *inode, umode_t mode, struct posix_acl **new_acl) { diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index f6773783b958..09a9567b402c 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -183,6 +183,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c, struct bch_inode_info *src, const char __user *name) { + struct bch_hash_info hash = bch2_hash_info_init(c, &src->ei_inode); struct bch_inode_info *dst; struct inode *vinode = NULL; char *kname = NULL; @@ -202,8 +203,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c, qstr.name = kname; ret = -ENOENT; - inum = bch2_dirent_lookup(c, src->v.i_ino, - &src->ei_str_hash, + inum = bch2_dirent_lookup(c, src->v.i_ino, &hash, &qstr); if (!inum) goto err1; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index a168d09ffd37..ef8505da7391 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -370,11 +370,11 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, { struct bch_fs *c = vdir->i_sb->s_fs_info; struct bch_inode_info *dir = to_bch_ei(vdir); + struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode); struct inode *vinode = NULL; u64 inum; - inum = bch2_dirent_lookup(c, dir->v.i_ino, - &dir->ei_str_hash, + inum = bch2_dirent_lookup(c, dir->v.i_ino, &hash, &dentry->d_name); if (inum) @@ -723,7 +723,7 @@ retry: bch2_setattr_copy(idmap, inode, &inode_u, attr); if (attr->ia_valid & ATTR_MODE) { - ret = bch2_acl_chmod(&trans, inode, inode_u.bi_mode, &acl); + ret = bch2_acl_chmod(&trans, &inode_u, inode_u.bi_mode, &acl); if (ret) goto btree_err; } @@ -1150,7 +1150,6 @@ static void bch2_vfs_inode_init(struct bch_fs *c, inode->ei_flags = 0; inode->ei_journal_seq = 0; inode->ei_quota_reserved = 0; - inode->ei_str_hash = bch2_hash_info_init(c, bi); inode->ei_qid = bch_qid(bi); inode->v.i_mapping->a_ops = &bch_address_space_operations; diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 8c2796aa7abf..f3072780af51 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -44,8 +44,6 @@ struct bch_inode_info { struct mutex ei_quota_lock; struct bch_qid ei_qid; - struct bch_hash_info ei_str_hash; - /* copy of inode in btree: */ struct bch_inode_unpacked ei_inode; }; diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 5555d45df54e..5692b47eb3c9 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -121,6 +121,7 @@ void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c, int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, const char *name, void *buffer, size_t size, int type) { + struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c_xattr xattr; @@ -128,8 +129,8 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, bch2_trans_init(&trans, c, 0, 0); - iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc, - &inode->ei_str_hash, inode->v.i_ino, + iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc, &hash, + inode->v.i_ino, &X_SEARCH(type, name, strlen(name)), 0); if (IS_ERR(iter)) { @@ -239,7 +240,7 @@ static int bch2_xattr_emit(struct dentry *dentry, } static int bch2_xattr_list_bcachefs(struct bch_fs *c, - struct bch_inode_info *inode, + struct bch_inode_unpacked *inode, struct xattr_buf *buf, bool all) { @@ -249,12 +250,12 @@ static int bch2_xattr_list_bcachefs(struct bch_fs *c, u64 v; for (id = 0; id < Inode_opt_nr; id++) { - v = bch2_inode_opt_get(&inode->ei_inode, id); + v = bch2_inode_opt_get(inode, id); if (!v) continue; if (!all && - !(inode->ei_inode.bi_fields_set & (1 << id))) + !(inode->bi_fields_set & (1 << id))) continue; ret = __bch2_xattr_emit(prefix, bch2_inode_opts[id], @@ -298,11 +299,11 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (ret) return ret; - ret = bch2_xattr_list_bcachefs(c, inode, &buf, false); + ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, false); if (ret) return ret; - ret = bch2_xattr_list_bcachefs(c, inode, &buf, true); + ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, true); if (ret) return ret; @@ -327,10 +328,10 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler, { struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0, - bch2_xattr_set(&trans, inode->v.i_ino, - &inode->ei_str_hash, + bch2_xattr_set(&trans, inode->v.i_ino, &hash, name, value, size, handler->flags, flags)); } -- cgit v1.2.3 From 5ff75ccbbc3f262158e5bf02c639539a4da93a43 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 14 Mar 2021 21:30:08 -0400 Subject: bcachefs: Fix read retry path for indirect extents In the read path, for retry of indirect extents to work we need to differentiate between the location in the btree the read was for, vs. the location where we found the data. This patch adds that plumbing to bch_read_bio. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 6 ++++-- fs/bcachefs/fs.c | 4 +++- fs/bcachefs/io.c | 46 +++++++++++++++++++++++++--------------------- fs/bcachefs/io.h | 23 +++++++++++++---------- fs/bcachefs/io_types.h | 14 ++++++++++++-- fs/bcachefs/move.c | 8 +++++--- 6 files changed, 62 insertions(+), 39 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 4ccc9318a924..8584b90a3df9 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -788,6 +788,7 @@ retry: while (1) { struct bkey_s_c k; unsigned bytes, sectors, offset_into_extent; + enum btree_id data_btree = BTREE_ID_extents; bch2_btree_iter_set_pos(iter, POS(inum, rbio->bio.bi_iter.bi_sector)); @@ -803,7 +804,7 @@ retry: bch2_bkey_buf_reassemble(&sk, c, k); - ret = bch2_read_indirect_extent(trans, + ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, &sk); if (ret) break; @@ -827,7 +828,8 @@ retry: if (bkey_extent_is_allocation(k.k)) bch2_add_page_sectors(&rbio->bio, k); - bch2_read_extent(trans, rbio, k, offset_into_extent, flags); + bch2_read_extent(trans, rbio, iter->pos, + data_btree, k, offset_into_extent, flags); if (flags & BCH_READ_LAST_FRAGMENT) break; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index ef8505da7391..1fafd393912c 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -908,6 +908,8 @@ retry: while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k)) && bkey_cmp(iter->pos, end) < 0) { + enum btree_id data_btree = BTREE_ID_extents; + if (!bkey_extent_is_data(k.k) && k.k->type != KEY_TYPE_reservation) { bch2_btree_iter_next(iter); @@ -920,7 +922,7 @@ retry: bch2_bkey_buf_reassemble(&cur, c, k); - ret = bch2_read_indirect_extent(&trans, + ret = bch2_read_indirect_extent(&trans, &data_btree, &offset_into_extent, &cur); if (ret) break; diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index b402fc2e51d6..425502f7b1b8 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -1627,8 +1627,8 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, - rbio->pos, BTREE_ITER_SLOTS); + iter = bch2_trans_get_iter(&trans, rbio->data_btree, + rbio->read_pos, BTREE_ITER_SLOTS); retry: rbio->bio.bi_status = 0; @@ -1642,14 +1642,17 @@ retry: if (!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, - rbio->pos.offset - + rbio->data_pos.offset - rbio->pick.crc.offset)) { /* extent we wanted to read no longer exists: */ rbio->hole = true; goto out; } - ret = __bch2_read_extent(&trans, rbio, bvec_iter, k, 0, failed, flags); + ret = __bch2_read_extent(&trans, rbio, bvec_iter, + rbio->read_pos, + rbio->data_btree, + k, 0, failed, flags); if (ret == READ_RETRY) goto retry; if (ret) @@ -1671,7 +1674,7 @@ static void bch2_rbio_retry(struct work_struct *work) struct bch_fs *c = rbio->c; struct bvec_iter iter = rbio->bvec_iter; unsigned flags = rbio->flags; - u64 inode = rbio->pos.inode; + u64 inode = rbio->read_pos.inode; struct bch_io_failures failed = { .nr = 0 }; trace_read_retry(&rbio->bio); @@ -1719,7 +1722,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, struct bch_read_bio *rbio) { struct bch_fs *c = rbio->c; - u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset; + u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; struct bch_extent_crc_unpacked new_crc; struct btree_iter *iter = NULL; struct bkey_i *new; @@ -1729,7 +1732,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, if (crc_is_compressed(rbio->pick.crc)) return 0; - iter = bch2_trans_get_iter(trans, BTREE_ID_extents, rbio->pos, + iter = bch2_trans_get_iter(trans, rbio->data_btree, rbio->data_pos, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); k = bch2_btree_iter_peek_slot(iter); if ((ret = bkey_err(k))) @@ -1862,14 +1865,14 @@ csum_err: return; } - bch2_dev_inum_io_error(ca, rbio->pos.inode, (u64) rbio->bvec_iter.bi_sector, + bch2_dev_inum_io_error(ca, rbio->read_pos.inode, (u64) rbio->bvec_iter.bi_sector, "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %u)", rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo, csum.hi, csum.lo, crc.csum_type); bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); return; decompression_err: - bch_err_inum_ratelimited(c, rbio->pos.inode, + bch_err_inum_ratelimited(c, rbio->read_pos.inode, "decompression error"); bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); return; @@ -1892,13 +1895,9 @@ static void bch2_read_endio(struct bio *bio) if (!rbio->split) rbio->bio.bi_end_io = rbio->end_io; - /* - * XXX: rbio->pos is not what we want here when reading from indirect - * extents - */ if (bch2_dev_inum_io_err_on(bio->bi_status, ca, - rbio->pos.inode, - rbio->pos.offset, + rbio->read_pos.inode, + rbio->read_pos.offset, "data read error: %s", bch2_blk_status_to_str(bio->bi_status))) { bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); @@ -1963,7 +1962,8 @@ err: } int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - struct bvec_iter iter, struct bkey_s_c k, + struct bvec_iter iter, struct bpos read_pos, + enum btree_id data_btree, struct bkey_s_c k, unsigned offset_into_extent, struct bch_io_failures *failed, unsigned flags) { @@ -1973,7 +1973,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, struct bch_dev *ca; struct promote_op *promote = NULL; bool bounce = false, read_full = false, narrow_crcs = false; - struct bpos pos = bkey_start_pos(k.k); + struct bpos data_pos = bkey_start_pos(k.k); int pick_ret; if (bkey_extent_is_inline_data(k.k)) { @@ -2049,7 +2049,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, pick.crc.offset || offset_into_extent)); - pos.offset += offset_into_extent; + data_pos.offset += offset_into_extent; pick.ptr.offset += pick.crc.offset + offset_into_extent; offset_into_extent = 0; @@ -2123,7 +2123,9 @@ get_bio: /* XXX: only initialize this if needed */ rbio->devs_have = bch2_bkey_devs(k); rbio->pick = pick; - rbio->pos = pos; + rbio->read_pos = read_pos; + rbio->data_btree = data_btree; + rbio->data_pos = data_pos; rbio->version = k.k->version; rbio->promote = promote; INIT_WORK(&rbio->work, NULL); @@ -2249,6 +2251,7 @@ retry: BTREE_ITER_SLOTS); while (1) { unsigned bytes, sectors, offset_into_extent; + enum btree_id data_btree = BTREE_ID_extents; bch2_btree_iter_set_pos(iter, POS(inode, bvec_iter.bi_sector)); @@ -2264,7 +2267,7 @@ retry: bch2_bkey_buf_reassemble(&sk, c, k); - ret = bch2_read_indirect_extent(&trans, + ret = bch2_read_indirect_extent(&trans, &data_btree, &offset_into_extent, &sk); if (ret) goto err; @@ -2289,7 +2292,8 @@ retry: if (bvec_iter.bi_size == bytes) flags |= BCH_READ_LAST_FRAGMENT; - ret = __bch2_read_extent(&trans, rbio, bvec_iter, k, + ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter->pos, + data_btree, k, offset_into_extent, failed, flags); switch (ret) { case READ_RETRY: diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h index 1c0a444ea325..ccbd8c3e6642 100644 --- a/fs/bcachefs/io.h +++ b/fs/bcachefs/io.h @@ -121,12 +121,15 @@ int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, struct bkey_buf *); static inline int bch2_read_indirect_extent(struct btree_trans *trans, + enum btree_id *data_btree, unsigned *offset_into_extent, struct bkey_buf *k) { - return k->k->k.type == KEY_TYPE_reflink_p - ? __bch2_read_indirect_extent(trans, offset_into_extent, k) - : 0; + if (k->k->k.type != KEY_TYPE_reflink_p) + return 0; + + *data_btree = BTREE_ID_reflink; + return __bch2_read_indirect_extent(trans, offset_into_extent, k); } enum bch_read_flags { @@ -143,17 +146,17 @@ enum bch_read_flags { }; int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *, - struct bvec_iter, struct bkey_s_c, unsigned, + struct bvec_iter, struct bpos, enum btree_id, + struct bkey_s_c, unsigned, struct bch_io_failures *, unsigned); static inline void bch2_read_extent(struct btree_trans *trans, - struct bch_read_bio *rbio, - struct bkey_s_c k, - unsigned offset_into_extent, - unsigned flags) + struct bch_read_bio *rbio, struct bpos read_pos, + enum btree_id data_btree, struct bkey_s_c k, + unsigned offset_into_extent, unsigned flags) { - __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, k, - offset_into_extent, NULL, flags); + __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos, + data_btree, k, offset_into_extent, NULL, flags); } void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter, diff --git a/fs/bcachefs/io_types.h b/fs/bcachefs/io_types.h index 65969eeac253..99b4b4c4a53b 100644 --- a/fs/bcachefs/io_types.h +++ b/fs/bcachefs/io_types.h @@ -58,8 +58,18 @@ struct bch_read_bio { struct bch_devs_list devs_have; struct extent_ptr_decoded pick; - /* start pos of data we read (may not be pos of data we want) */ - struct bpos pos; + + /* + * pos we read from - different from data_pos for indirect extents: + */ + struct bpos read_pos; + + /* + * start pos of data we read (may not be pos of data we want) - for + * promote, narrow extents paths: + */ + enum btree_id data_btree; + struct bpos data_pos; struct bversion version; struct promote_op *promote; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index dfe7f05f39e9..3ff31d25f396 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -208,9 +208,9 @@ void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio) BUG_ON(!m->op.wbio.bio.bi_vcnt); m->ptr = rbio->pick.ptr; - m->offset = rbio->pos.offset - rbio->pick.crc.offset; + m->offset = rbio->data_pos.offset - rbio->pick.crc.offset; m->op.devs_have = rbio->devs_have; - m->op.pos = rbio->pos; + m->op.pos = rbio->data_pos; m->op.version = rbio->version; m->op.crc = rbio->pick.crc; m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9; @@ -492,7 +492,9 @@ static int bch2_move_extent(struct btree_trans *trans, * ctxt when doing wakeup */ closure_get(&ctxt->cl); - bch2_read_extent(trans, &io->rbio, k, 0, + bch2_read_extent(trans, &io->rbio, + bkey_start_pos(k.k), + btree_id, k, 0, BCH_READ_NODECODE| BCH_READ_LAST_FRAGMENT); return 0; -- cgit v1.2.3 From 50dc0f692a0dbe3e6a95d3f8e5c7e718bc9f021d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 19 Mar 2021 20:29:11 -0400 Subject: bcachefs: Require all btree iterators to be freed We keep running into occasional bugs with btree transaction iterators overflowing - this will make those bugs more visible. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 20 ++++++++++------- fs/bcachefs/alloc_background.c | 4 ++-- fs/bcachefs/btree_gc.c | 12 ++++++++--- fs/bcachefs/btree_io.c | 1 + fs/bcachefs/btree_iter.c | 17 +++++++++++++++ fs/bcachefs/debug.c | 4 ++++ fs/bcachefs/dirent.c | 3 +++ fs/bcachefs/ec.c | 4 +++- fs/bcachefs/extents.c | 2 ++ fs/bcachefs/fs-io.c | 49 +++++++++++++++++++++++------------------- fs/bcachefs/fs.c | 3 +++ fs/bcachefs/fsck.c | 3 ++- fs/bcachefs/inode.c | 1 + fs/bcachefs/io.c | 38 ++++++++++++++++---------------- fs/bcachefs/migrate.c | 9 ++++++-- fs/bcachefs/move.c | 3 +++ fs/bcachefs/quota.c | 7 +++++- fs/bcachefs/reflink.c | 26 +++++++++------------- fs/bcachefs/tests.c | 28 ++++++++++++++++++------ fs/bcachefs/xattr.c | 18 +++++++++------- 20 files changed, 163 insertions(+), 89 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 3879815bcede..afb9562be2b2 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -241,12 +241,12 @@ retry: } xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter)); - acl = bch2_acl_from_disk(xattr_val(xattr.v), le16_to_cpu(xattr.v->x_val_len)); if (!IS_ERR(acl)) set_cached_acl(&inode->v, type, acl); + bch2_trans_iter_put(&trans, iter); out: bch2_trans_exit(&trans); return acl; @@ -313,7 +313,7 @@ retry: if (type == ACL_TYPE_ACCESS) { ret = posix_acl_update_mode(idmap, &inode->v, &mode, &acl); if (ret) - goto err; + goto btree_err; } hash_info = bch2_hash_info_init(c, &inode_u); @@ -330,6 +330,8 @@ retry: &inode->ei_journal_seq, BTREE_INSERT_NOUNLOCK); btree_err: + bch2_trans_iter_put(&trans, inode_iter); + if (ret == -EINTR) goto retry; if (unlikely(ret)) @@ -356,21 +358,22 @@ int bch2_acl_chmod(struct btree_trans *trans, struct bkey_s_c_xattr xattr; struct bkey_i_xattr *new; struct posix_acl *acl; - int ret = 0; + int ret; iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, &hash_info, inode->bi_inum, &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), BTREE_ITER_INTENT); - if (IS_ERR(iter)) - return PTR_ERR(iter) != -ENOENT ? PTR_ERR(iter) : 0; + ret = PTR_ERR_OR_ZERO(iter); + if (ret) + return ret == -ENOENT ? 0 : ret; xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter)); - acl = bch2_acl_from_disk(xattr_val(xattr.v), le16_to_cpu(xattr.v->x_val_len)); - if (IS_ERR_OR_NULL(acl)) - return PTR_ERR(acl); + ret = PTR_ERR_OR_ZERO(acl); + if (ret || !acl) + goto err; ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode); if (ret) @@ -387,6 +390,7 @@ int bch2_acl_chmod(struct btree_trans *trans, *new_acl = acl; acl = NULL; err: + bch2_trans_iter_put(trans, iter); kfree(acl); return ret; } diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index add04dcb849b..e2200cedecca 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -385,7 +385,6 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags) int ret = 0; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_alloc, POS_MIN, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); @@ -405,6 +404,7 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags) } } err: + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); return ret; } @@ -926,7 +926,6 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca) int ret = 0; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_alloc, POS(ca->dev_idx, 0), BTREE_ITER_CACHED| @@ -942,6 +941,7 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca) (!fifo_empty(&ca->free_inc) ? BTREE_INSERT_NOWAIT : 0)); + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); /* If we used NOWAIT, don't return the error: */ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index e8cdc82d3451..a303cd376d4b 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -456,6 +456,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, bch2_trans_cond_resched(&trans); } + bch2_trans_iter_put(&trans, iter); + ret = bch2_trans_exit(&trans) ?: ret; if (ret) return ret; @@ -1212,6 +1214,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) bch2_btree_iter_next(iter); } + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); @@ -1509,6 +1512,7 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) struct btree *b; bool kthread = (current->flags & PF_KTHREAD) != 0; unsigned i; + int ret = 0; /* Sliding window of adjacent btree nodes */ struct btree *merge[GC_MERGE_NODES]; @@ -1557,8 +1561,8 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) lock_seq[0] = merge[0]->c.lock.state.seq; if (kthread && kthread_should_stop()) { - bch2_trans_exit(&trans); - return -ESHUTDOWN; + ret = -ESHUTDOWN; + break; } bch2_trans_cond_resched(&trans); @@ -1573,7 +1577,9 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) memset(merge + 1, 0, (GC_MERGE_NODES - 1) * sizeof(merge[0])); } - return bch2_trans_exit(&trans); + bch2_trans_iter_put(&trans, iter); + + return bch2_trans_exit(&trans) ?: ret; } /** diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index d547bfabf09f..7ec14cd8f02b 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1208,6 +1208,7 @@ retry: if (ret) goto err; out: + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&k, c); bio_put(&wbio->wbio.bio); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 02a486e83881..00140ae50cb9 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -9,6 +9,7 @@ #include "btree_locking.h" #include "btree_update.h" #include "debug.h" +#include "error.h" #include "extents.h" #include "journal.h" #include "trace.h" @@ -2116,6 +2117,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans, for (i = 0; i < ARRAY_SIZE(iter->l); i++) iter->l[i].b = NULL; iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT; + iter->ip_allocated = _RET_IP_; return iter; } @@ -2224,6 +2226,8 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags) (void *) &trans->fs_usage_deltas->memset_start); } + bch2_trans_cond_resched(trans); + if (!(flags & TRANS_RESET_NOTRAVERSE)) bch2_btree_iter_traverse_all(trans); } @@ -2290,6 +2294,19 @@ int bch2_trans_exit(struct btree_trans *trans) bch2_trans_unlock(trans); #ifdef CONFIG_BCACHEFS_DEBUG + if (trans->iters_live) { + struct btree_iter *iter; + + bch_err(c, "btree iterators leaked!"); + trans_for_each_iter(trans, iter) + if (btree_iter_live(trans, iter)) + printk(KERN_ERR " btree %s allocated at %pS\n", + bch2_btree_ids[iter->btree_id], + (void *) iter->ip_allocated); + /* Be noisy about this: */ + bch2_fatal_error(c); + } + mutex_lock(&trans->c->btree_trans_lock); list_del(&trans->list); mutex_unlock(&trans->c->btree_trans_lock); diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 3ac700dc72d5..2c2d58514c68 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -242,6 +242,8 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, if (!i->size) break; } + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); return err < 0 ? err : i->ret; @@ -294,6 +296,8 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, if (!i->size) break; } + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); return err < 0 ? err : i->ret; diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index b0625176ab35..592dd80cf963 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -321,6 +321,7 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum, k = bch2_btree_iter_peek_slot(iter); inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum); + bch2_trans_iter_put(&trans, iter); out: bch2_trans_exit(&trans); return inum; @@ -379,6 +380,8 @@ int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx) break; ctx->pos = dirent.k->p.offset + 1; } + bch2_trans_iter_put(&trans, iter); + ret = bch2_trans_exit(&trans) ?: ret; return ret; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index f61d4c873a82..bdce37981c5c 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -873,6 +873,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, if (ret) break; } + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); @@ -1663,12 +1664,13 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc) int ret = 0; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_stripes, POS(0, U64_MAX), 0); k = bch2_btree_iter_prev(iter); if (!IS_ERR_OR_NULL(k.k)) idx = k.k->p.offset + 1; + + bch2_trans_iter_put(&trans, iter); ret = bch2_trans_exit(&trans); if (ret) return ret; diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 3fe9ef50f5c0..8cf45b7b9459 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -687,6 +687,8 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, break; } } + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); return ret; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 332795eb9ae8..8891207c46a9 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -866,7 +866,6 @@ void bch2_readahead(struct readahead_control *ractl) BUG_ON(ret); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS_MIN, BTREE_ITER_SLOTS); @@ -895,6 +894,7 @@ void bch2_readahead(struct readahead_control *ractl) bch2_pagecache_add_put(&inode->ei_pagecache_lock); + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); kfree(readpages_iter.pages); } @@ -918,6 +918,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, bchfs_read(&trans, iter, rbio, inum, NULL); + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); } @@ -2155,6 +2156,7 @@ static inline int range_has_data(struct bch_fs *c, break; } } + bch2_trans_iter_put(&trans, iter); return bch2_trans_exit(&trans) ?: ret; } @@ -2325,6 +2327,7 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) bch2_trans_init(&trans, c, 0, 0); iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, 0); ret = PTR_ERR_OR_ZERO(iter); + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); if (ret) @@ -2459,14 +2462,11 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, struct btree_iter *src, *dst, *del; loff_t shift, new_size; u64 src_start; - int ret; + int ret = 0; if ((offset | len) & (block_bytes(c) - 1)) return -EINVAL; - bch2_bkey_buf_init(©); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256); - /* * We need i_mutex to keep the page cache consistent with the extents * btree, and the btree consistent with i_size - we don't need outside @@ -2522,13 +2522,15 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, goto err; } + bch2_bkey_buf_init(©); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256); src = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(inode->v.i_ino, src_start >> 9), BTREE_ITER_INTENT); dst = bch2_trans_copy_iter(&trans, src); del = bch2_trans_copy_iter(&trans, src); - while (1) { + while (ret == 0 || ret == -EINTR) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete; @@ -2542,7 +2544,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, ? bch2_btree_iter_peek_prev(src) : bch2_btree_iter_peek(src); if ((ret = bkey_err(k))) - goto bkey_err; + continue; if (!k.k || k.k->p.inode != inode->v.i_ino) break; @@ -2562,7 +2564,7 @@ reassemble: ret = bch2_extent_atomic_end(dst, copy.k, &atomic_end); if (ret) - goto bkey_err; + continue; if (bkey_cmp(atomic_end, copy.k->k.p)) { if (insert) { @@ -2605,18 +2607,18 @@ reassemble: &inode->ei_journal_seq, BTREE_INSERT_NOFAIL); bch2_disk_reservation_put(c, &disk_res); -bkey_err: + if (!ret) bch2_btree_iter_set_pos(src, next_pos); - - if (ret == -EINTR) - ret = 0; - if (ret) - goto err; - - bch2_trans_cond_resched(&trans); } - bch2_trans_unlock(&trans); + bch2_trans_iter_put(&trans, del); + bch2_trans_iter_put(&trans, dst); + bch2_trans_iter_put(&trans, src); + bch2_trans_exit(&trans); + bch2_bkey_buf_exit(©, c); + + if (ret) + goto err; if (!insert) { i_size_write(&inode->v, new_size); @@ -2626,8 +2628,6 @@ bkey_err: mutex_unlock(&inode->ei_update_lock); } err: - bch2_trans_exit(&trans); - bch2_bkey_buf_exit(©, c); bch2_pagecache_block_put(&inode->ei_pagecache_lock); inode_unlock(&inode->v); return ret; @@ -2682,7 +2682,7 @@ static long bchfs_fallocate(struct bch_inode_info *inode, int mode, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); end_pos = POS(inode->v.i_ino, block_end >> 9); - while (bkey_cmp(iter->pos, end_pos) < 0) { + while (!ret && bkey_cmp(iter->pos, end_pos) < 0) { s64 i_sectors_delta = 0; struct disk_reservation disk_res = { 0 }; struct quota_res quota_res = { 0 }; @@ -2746,9 +2746,11 @@ bkey_err: bch2_disk_reservation_put(c, &disk_res); if (ret == -EINTR) ret = 0; - if (ret) - goto err; } + bch2_trans_iter_put(&trans, iter); + + if (ret) + goto err; /* * Do we need to extend the file? @@ -2770,6 +2772,7 @@ bkey_err: ret = PTR_ERR_OR_ZERO(inode_iter); } while (ret == -EINTR); + bch2_trans_iter_put(&trans, inode_iter); bch2_trans_unlock(&trans); if (ret) @@ -3015,6 +3018,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) } else if (k.k->p.offset >> 9 > isize) break; } + bch2_trans_iter_put(&trans, iter); ret = bch2_trans_exit(&trans) ?: ret; if (ret) @@ -3118,6 +3122,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) offset = max(offset, bkey_start_offset(k.k) << 9); } } + bch2_trans_iter_put(&trans, iter); ret = bch2_trans_exit(&trans) ?: ret; if (ret) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 1fafd393912c..3acda0389da8 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -734,6 +734,8 @@ retry: BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); btree_err: + bch2_trans_iter_put(&trans, inode_iter); + if (ret == -EINTR) goto retry; if (unlikely(ret)) @@ -961,6 +963,7 @@ retry: ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), FIEMAP_EXTENT_LAST); + bch2_trans_iter_put(&trans, iter); ret = bch2_trans_exit(&trans) ?: ret; bch2_bkey_buf_exit(&cur, c); bch2_bkey_buf_exit(&prev, c); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 033d37891c60..f8e0b24d087a 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1485,11 +1485,12 @@ int bch2_fsck_walk_inodes_only(struct bch_fs *c) BCH_INODE_I_SECTORS_DIRTY| BCH_INODE_UNLINKED)) { ret = check_inode(&trans, NULL, iter, inode, NULL); - BUG_ON(ret == -EINTR); if (ret) break; } } + bch2_trans_iter_put(&trans, iter); + BUG_ON(ret == -EINTR); return bch2_trans_exit(&trans) ?: ret; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index a3d2bae0a652..aec0fc9228a3 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -620,6 +620,7 @@ retry: ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); + bch2_trans_iter_put(&trans, iter); err: if (ret == -EINTR) goto retry; diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 425502f7b1b8..b841b3da2510 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -414,6 +414,8 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end, ret = bch2_fpunch_at(&trans, iter, POS(inum, end), journal_seq, i_sectors_delta); + + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); if (ret == -EINTR) @@ -460,6 +462,7 @@ int bch2_write_index_default(struct bch_write_op *op) bch2_keylist_pop_front(keys); } while (!bch2_keylist_empty(keys)); + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); @@ -1659,6 +1662,7 @@ retry: goto err; out: bch2_rbio_done(rbio); + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); return; @@ -2259,7 +2263,7 @@ retry: k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) - goto err; + break; offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); @@ -2270,7 +2274,7 @@ retry: ret = bch2_read_indirect_extent(&trans, &data_btree, &offset_into_extent, &sk); if (ret) - goto err; + break; k = bkey_i_to_s_c(sk.k); @@ -2295,12 +2299,8 @@ retry: ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter->pos, data_btree, k, offset_into_extent, failed, flags); - switch (ret) { - case READ_RETRY: - goto retry; - case READ_ERR: - goto err; - }; + if (ret) + break; if (flags & BCH_READ_LAST_FRAGMENT) break; @@ -2308,19 +2308,19 @@ retry: swap(bvec_iter.bi_size, bytes); bio_advance_iter(&rbio->bio, &bvec_iter, bytes); } -out: - bch2_trans_exit(&trans); - bch2_bkey_buf_exit(&sk, c); - return; -err: - if (ret == -EINTR) + bch2_trans_iter_put(&trans, iter); + + if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID) goto retry; - bch_err_inum_ratelimited(c, inode, - "read error %i from btree lookup", ret); - rbio->bio.bi_status = BLK_STS_IOERR; - bch2_rbio_done(rbio); - goto out; + if (ret) { + bch_err_inum_ratelimited(c, inode, + "read error %i from btree lookup", ret); + rbio->bio.bi_status = BLK_STS_IOERR; + bch2_rbio_done(rbio); + } + bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&sk, c); } void bch2_fs_io_exit(struct bch_fs *c) diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 1db2c2d6b970..4d8b4169923d 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -88,6 +88,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags if (ret) break; } + bch2_trans_iter_put(&trans, iter); ret = bch2_trans_exit(&trans) ?: ret; bch2_bkey_buf_exit(&sk, c); @@ -135,20 +136,24 @@ retry: dev_idx, flags, true); if (ret) { bch_err(c, "Cannot drop device without losing data"); - goto err; + break; } ret = bch2_btree_node_update_key(c, iter, b, k.k); if (ret == -EINTR) { b = bch2_btree_iter_peek_node(iter); + ret = 0; goto retry; } if (ret) { bch_err(c, "Error updating btree node key: %i", ret); - goto err; + break; } } bch2_trans_iter_free(&trans, iter); + + if (ret) + goto err; } /* flush relevant btree updates */ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 3ff31d25f396..f7b0764d9c98 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -195,6 +195,7 @@ nomatch: goto next; } out: + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&_insert, c); bch2_bkey_buf_exit(&_new, c); @@ -641,6 +642,8 @@ next_nondata: bch2_trans_cond_resched(&trans); } out: + + bch2_trans_iter_put(&trans, iter); ret = bch2_trans_exit(&trans) ?: ret; bch2_bkey_buf_exit(&sk, c); diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 8e272519ce0e..35b409e0f366 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -372,6 +372,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) if (ret) break; } + bch2_trans_iter_put(&trans, iter); return bch2_trans_exit(&trans) ?: ret; } @@ -449,6 +450,8 @@ int bch2_fs_quota_read(struct bch_fs *c) KEY_TYPE_QUOTA_NOCHECK); } } + bch2_trans_iter_put(&trans, iter); + return bch2_trans_exit(&trans) ?: ret; } @@ -739,7 +742,9 @@ static int bch2_set_quota_trans(struct btree_trans *trans, if (qdq->d_fieldmask & QC_INO_HARD) new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit); - return bch2_trans_update(trans, iter, &new_quota->k_i, 0); + ret = bch2_trans_update(trans, iter, &new_quota->k_i, 0); + bch2_trans_iter_put(trans, iter); + return ret; } static int bch2_set_quota(struct super_block *sb, struct kqid qid, diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index e9a6a5f639b4..0978ad92614c 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -223,20 +223,18 @@ s64 bch2_remap_range(struct bch_fs *c, dst_iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, dst_start, BTREE_ITER_INTENT); - while (1) { + while (ret == 0 || ret == -EINTR) { bch2_trans_begin(&trans); - trans.mem_top = 0; - if (fatal_signal_pending(current)) { ret = -EINTR; - goto err; + break; } src_k = get_next_src(src_iter, src_end); ret = bkey_err(src_k); if (ret) - goto btree_err; + continue; src_done = bpos_min(src_iter->pos, src_end).offset - src_start.offset; @@ -245,8 +243,6 @@ s64 bch2_remap_range(struct bch_fs *c, if (bkey_cmp(dst_iter->pos, dst_want) < 0) { ret = bch2_fpunch_at(&trans, dst_iter, dst_want, journal_seq, i_sectors_delta); - if (ret) - goto btree_err; continue; } @@ -265,7 +261,7 @@ s64 bch2_remap_range(struct bch_fs *c, ret = bch2_make_extent_indirect(&trans, src_iter, new_src.k); if (ret) - goto btree_err; + continue; BUG_ON(src_k.k->type != KEY_TYPE_reflink_p); } @@ -294,20 +290,16 @@ s64 bch2_remap_range(struct bch_fs *c, NULL, journal_seq, new_i_size, i_sectors_delta); if (ret) - goto btree_err; + continue; dst_done = dst_iter->pos.offset - dst_start.offset; src_want = POS(src_start.inode, src_start.offset + dst_done); bch2_btree_iter_set_pos(src_iter, src_want); -btree_err: - if (ret == -EINTR) - ret = 0; - if (ret) - goto err; } + bch2_trans_iter_put(&trans, dst_iter); + bch2_trans_iter_put(&trans, src_iter); - BUG_ON(bkey_cmp(dst_iter->pos, dst_end)); -err: + BUG_ON(!ret && bkey_cmp(dst_iter->pos, dst_end)); BUG_ON(bkey_cmp(dst_iter->pos, dst_end) > 0); dst_done = dst_iter->pos.offset - dst_start.offset; @@ -329,6 +321,8 @@ err: ret2 = bch2_inode_write(&trans, inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, journal_seq, 0); } + + bch2_trans_iter_put(&trans, inode_iter); } while (ret2 == -EINTR); ret = bch2_trans_exit(&trans) ?: ret; diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index f25a27f26202..286587a118fe 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -67,6 +67,7 @@ static int test_delete(struct bch_fs *c, u64 nr) goto err; } err: + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); return ret; } @@ -106,6 +107,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr) goto err; } err: + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); return ret; } @@ -113,7 +115,7 @@ err: static int test_iterate(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter *iter = NULL; struct bkey_s_c k; u64 i; int ret = 0; @@ -159,6 +161,7 @@ static int test_iterate(struct bch_fs *c, u64 nr) BUG_ON(i); err: + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); return ret; } @@ -166,7 +169,7 @@ err: static int test_iterate_extents(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter *iter = NULL; struct bkey_s_c k; u64 i; int ret = 0; @@ -213,6 +216,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) BUG_ON(i); err: + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); return ret; } @@ -257,7 +261,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) BUG_ON(k.k->p.offset != i); i += 2; } - bch2_trans_iter_free(&trans, iter); + bch2_trans_iter_put(&trans, iter); BUG_ON(i != nr * 2); @@ -274,6 +278,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) if (i == nr * 2) break; } + bch2_trans_iter_put(&trans, iter); err: bch2_trans_exit(&trans); return ret; @@ -318,7 +323,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) BUG_ON(k.k->size != 8); i += 16; } - bch2_trans_iter_free(&trans, iter); + bch2_trans_iter_put(&trans, iter); BUG_ON(i != nr); @@ -337,6 +342,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) if (i == nr) break; } + bch2_trans_iter_put(&trans, iter); err: bch2_trans_exit(&trans); return 0; @@ -362,6 +368,8 @@ static int test_peek_end(struct bch_fs *c, u64 nr) k = bch2_btree_iter_peek(iter); BUG_ON(k.k); + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); return 0; } @@ -382,6 +390,8 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr) k = bch2_btree_iter_peek(iter); BUG_ON(k.k); + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); return 0; } @@ -508,7 +518,7 @@ static int rand_lookup(struct bch_fs *c, u64 nr) } } - bch2_trans_iter_free(&trans, iter); + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); return ret; } @@ -549,7 +559,7 @@ static int rand_mixed(struct bch_fs *c, u64 nr) } } - bch2_trans_iter_free(&trans, iter); + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); return ret; } @@ -630,6 +640,8 @@ static int seq_insert(struct bch_fs *c, u64 nr) if (++i == nr) break; } + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); return ret; } @@ -645,6 +657,8 @@ static int seq_lookup(struct bch_fs *c, u64 nr) for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN, 0, k, ret) ; + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); return ret; } @@ -671,6 +685,8 @@ static int seq_overwrite(struct bch_fs *c, u64 nr) break; } } + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); return ret; } diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 5692b47eb3c9..f18a795620d8 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -133,12 +133,9 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, inode->v.i_ino, &X_SEARCH(type, name, strlen(name)), 0); - if (IS_ERR(iter)) { - bch2_trans_exit(&trans); - BUG_ON(PTR_ERR(iter) == -EINTR); - - return PTR_ERR(iter) == -ENOENT ? -ENODATA : PTR_ERR(iter); - } + ret = PTR_ERR_OR_ZERO(iter); + if (ret) + goto err; xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter)); ret = le16_to_cpu(xattr.v->x_val_len); @@ -148,9 +145,12 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, else memcpy(buffer, xattr_val(xattr.v), ret); } - + bch2_trans_iter_put(&trans, iter); +err: bch2_trans_exit(&trans); - return ret; + + BUG_ON(ret == -EINTR); + return ret == -ENOENT ? -ENODATA : ret; } int bch2_xattr_set(struct btree_trans *trans, u64 inum, @@ -294,6 +294,8 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (ret) break; } + bch2_trans_iter_put(&trans, iter); + ret = bch2_trans_exit(&trans) ?: ret; if (ret) -- cgit v1.2.3 From e0ba3b6429a4b5995b06dc46afdf4d3530d156bb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 21 Mar 2021 16:55:25 -0400 Subject: bcachefs: Replace bch2_btree_iter_next() calls with bch2_btree_iter_advance The way btree iterators work internally has been changing, particularly with the iter->real_pos changes, and bch2_btree_iter_next() is no longer hyper optimized - it's just advance followed by peek, so it's more efficient to just call advance where we're not using the return value of bch2_btree_iter_next(). Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 2 +- fs/bcachefs/btree_iter.c | 16 ++++++++-------- fs/bcachefs/btree_iter.h | 4 ++-- fs/bcachefs/debug.c | 2 +- fs/bcachefs/ec.c | 4 ++-- fs/bcachefs/fs.c | 2 +- fs/bcachefs/fsck.c | 8 ++++---- fs/bcachefs/migrate.c | 2 +- fs/bcachefs/move.c | 2 +- 9 files changed, 21 insertions(+), 21 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index f75562bf8e21..483360fbda18 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1208,7 +1208,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) } } - bch2_btree_iter_next(iter); + bch2_btree_iter_advance(iter); } bch2_trans_iter_put(&trans, iter); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 073157f5fbed..d6ef08b27858 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1501,7 +1501,7 @@ void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) btree_iter_set_search_pos(iter, btree_iter_search_key(iter)); } -inline bool bch2_btree_iter_advance_pos(struct btree_iter *iter) +inline bool bch2_btree_iter_advance(struct btree_iter *iter) { struct bpos pos = iter->k.p; bool ret = bkey_cmp(pos, POS_MAX) != 0; @@ -1512,7 +1512,7 @@ inline bool bch2_btree_iter_advance_pos(struct btree_iter *iter) return ret; } -inline bool bch2_btree_iter_rewind_pos(struct btree_iter *iter) +inline bool bch2_btree_iter_rewind(struct btree_iter *iter) { struct bpos pos = bkey_start_pos(&iter->k); bool ret = bkey_cmp(pos, POS_MIN) != 0; @@ -1637,7 +1637,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) */ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) { - if (!bch2_btree_iter_advance_pos(iter)) + if (!bch2_btree_iter_advance(iter)) return bkey_s_c_null; return bch2_btree_iter_peek(iter); @@ -1691,7 +1691,7 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter) k = __bch2_btree_iter_peek_with_updates(iter); if (k.k && bkey_deleted(k.k)) { - if (!bch2_btree_iter_advance_pos(iter)) + if (!bch2_btree_iter_advance(iter)) return bkey_s_c_null; continue; } @@ -1716,7 +1716,7 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter) struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter) { - if (!bch2_btree_iter_advance_pos(iter)) + if (!bch2_btree_iter_advance(iter)) return bkey_s_c_null; return bch2_btree_iter_peek_with_updates(iter); @@ -1793,7 +1793,7 @@ no_key: */ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) { - if (!bch2_btree_iter_rewind_pos(iter)) + if (!bch2_btree_iter_rewind(iter)) return bkey_s_c_null; return bch2_btree_iter_peek_prev(iter); @@ -1885,7 +1885,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) { - if (!bch2_btree_iter_advance_pos(iter)) + if (!bch2_btree_iter_advance(iter)) return bkey_s_c_null; return bch2_btree_iter_peek_slot(iter); @@ -1893,7 +1893,7 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter) { - if (!bch2_btree_iter_rewind_pos(iter)) + if (!bch2_btree_iter_rewind(iter)) return bkey_s_c_null; return bch2_btree_iter_peek_slot(iter); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index c839bfe6ffa4..1276d8aaf652 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -175,8 +175,8 @@ struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *); struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *); -bool bch2_btree_iter_advance_pos(struct btree_iter *); -bool bch2_btree_iter_rewind_pos(struct btree_iter *); +bool bch2_btree_iter_advance(struct btree_iter *); +bool bch2_btree_iter_rewind(struct btree_iter *); void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos); /* Sort order for locking btree iterators: */ diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 2c2d58514c68..8b837ac69d74 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -356,7 +356,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, if (err) break; - bch2_btree_iter_next(iter); + bch2_btree_iter_advance(iter); i->from = iter->pos; err = flush_buf(i); diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index bdce37981c5c..370f9e6916f3 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -842,13 +842,13 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, struct bch_extent_ptr *ptr, *ec_ptr = NULL; if (extent_has_stripe_ptr(k, s->key.k.p.offset)) { - bch2_btree_iter_next(iter); + bch2_btree_iter_advance(iter); continue; } block = bkey_matches_stripe(&s->key.v, k); if (block < 0) { - bch2_btree_iter_next(iter); + bch2_btree_iter_advance(iter); continue; } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3acda0389da8..77db405e3418 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -914,7 +914,7 @@ retry: if (!bkey_extent_is_data(k.k) && k.k->type != KEY_TYPE_reservation) { - bch2_btree_iter_next(iter); + bch2_btree_iter_advance(iter); continue; } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index f8e0b24d087a..ffb30ef7ef00 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -547,7 +547,7 @@ retry: i_sectors += k.k->size; bch2_bkey_buf_reassemble(&prev, c, k); - bch2_btree_iter_advance_pos(iter); + bch2_btree_iter_advance(iter); } fsck_err: if (ret == -EINTR) @@ -703,7 +703,7 @@ retry: } - bch2_btree_iter_advance_pos(iter); + bch2_btree_iter_advance(iter); } hash_stop_chain(&trans, &h); @@ -762,7 +762,7 @@ retry: if (ret) break; - bch2_btree_iter_advance_pos(iter); + bch2_btree_iter_advance(iter); } fsck_err: if (ret == -EINTR) @@ -1389,7 +1389,7 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); if (nlinks_pos == iter->pos.offset) genradix_iter_advance(&nlinks_iter, links); - bch2_btree_iter_advance_pos(iter); + bch2_btree_iter_advance(iter); bch2_trans_cond_resched(&trans); } fsck_err: diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 4d8b4169923d..ef69a19f494a 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -53,7 +53,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k))) { if (!bch2_bkey_has_device(k, dev_idx)) { - bch2_btree_iter_next(iter); + bch2_btree_iter_advance(iter); continue; } diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index f7b0764d9c98..87307670fd4a 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -638,7 +638,7 @@ next: atomic64_add(k.k->size * bch2_bkey_nr_ptrs_allocated(k), &stats->sectors_seen); next_nondata: - bch2_btree_iter_next(iter); + bch2_btree_iter_advance(iter); bch2_trans_cond_resched(&trans); } out: -- cgit v1.2.3 From 050197b1c1df1cfee84523bf2183c8674e06d10f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 28 Apr 2021 19:36:12 -0400 Subject: bcachefs: Ensure that fpunch updates inode timestamps Fixes xfstests generic/059 Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 14 ++++++++++++++ fs/bcachefs/fs.c | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index a7cd85647354..fbf171a4c191 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -2407,6 +2407,15 @@ err: /* fallocate: */ +static int inode_update_times_fn(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, void *p) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + + bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); + return 0; +} + static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len) { struct bch_fs *c = inode->v.i_sb->s_fs_info; @@ -2444,6 +2453,11 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len &i_sectors_delta); i_sectors_acct(c, inode, NULL, i_sectors_delta); } + + mutex_lock(&inode->ei_update_lock); + ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, + ATTR_MTIME|ATTR_CTIME) ?: ret; + mutex_unlock(&inode->ei_update_lock); err: bch2_pagecache_block_put(&inode->ei_pagecache_lock); inode_unlock(&inode->v); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 77db405e3418..67e9a354ad37 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -145,7 +145,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, struct bch_inode_unpacked inode_u; int ret; - bch2_trans_init(&trans, c, 0, 0); + bch2_trans_init(&trans, c, 0, 256); retry: bch2_trans_begin(&trans); -- cgit v1.2.3 From 595c1e9bab7fd5512250d0e297e50a549af59b1f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 28 Apr 2021 22:51:42 -0400 Subject: bcachefs: Fix time handling There were some overflows in the time conversion functions - fix this by converting tv_sec and tv_nsec separately. Also, set sb->time_min and sb->time_max. Fixes xfstest generic/258. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 23 ++++++++++++++--------- fs/bcachefs/fs.c | 4 +++- fs/bcachefs/super-io.c | 10 ++++++++-- 3 files changed, 25 insertions(+), 12 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 323705f352de..c47e69931b8a 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -605,11 +605,13 @@ struct bch_fs { u64 time_base_lo; u32 time_base_hi; - u32 time_precision; + unsigned time_units_per_sec; + unsigned nsec_per_time_unit; u64 features; u64 compat; } sb; + struct bch_sb_handle disk_sb; unsigned short block_bits; /* ilog2(block_size) */ @@ -872,19 +874,22 @@ static inline unsigned block_bytes(const struct bch_fs *c) return c->opts.block_size << 9; } -static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, u64 time) +static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, s64 time) { - return ns_to_timespec64(time * c->sb.time_precision + c->sb.time_base_lo); + struct timespec64 t; + s32 rem; + + time += c->sb.time_base_lo; + + t.tv_sec = div_s64_rem(time, c->sb.time_units_per_sec, &rem); + t.tv_nsec = rem * c->sb.nsec_per_time_unit; + return t; } static inline s64 timespec_to_bch2_time(struct bch_fs *c, struct timespec64 ts) { - s64 ns = timespec64_to_ns(&ts) - c->sb.time_base_lo; - - if (c->sb.time_precision == 1) - return ns; - - return div_s64(ns, c->sb.time_precision); + return (ts.tv_sec * c->sb.time_units_per_sec + + (int) ts.tv_nsec / c->sb.nsec_per_time_unit) - c->sb.time_base_lo; } static inline s64 bch2_current_time(struct bch_fs *c) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 67e9a354ad37..b00f35201132 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1565,7 +1565,9 @@ got_sb: #endif sb->s_xattr = bch2_xattr_handlers; sb->s_magic = BCACHEFS_STATFS_MAGIC; - sb->s_time_gran = c->sb.time_precision; + sb->s_time_gran = c->sb.nsec_per_time_unit; + sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; + sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); c->vfs_sb = sb; strlcpy(sb->s_id, c->name, sizeof(sb->s_id)); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index e0de6f0c0cb4..4c7cea4cfc2b 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -373,9 +373,15 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.clean = BCH_SB_CLEAN(src); c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src); c->sb.encoded_extent_max= 1 << BCH_SB_ENCODED_EXTENT_MAX_BITS(src); - c->sb.time_base_lo = le64_to_cpu(src->time_base_lo); + + c->sb.nsec_per_time_unit = le32_to_cpu(src->time_precision); + c->sb.time_units_per_sec = NSEC_PER_SEC / c->sb.nsec_per_time_unit; + + /* XXX this is wrong, we need a 96 or 128 bit integer type */ + c->sb.time_base_lo = div_u64(le64_to_cpu(src->time_base_lo), + c->sb.nsec_per_time_unit); c->sb.time_base_hi = le32_to_cpu(src->time_base_hi); - c->sb.time_precision = le32_to_cpu(src->time_precision); + c->sb.features = le64_to_cpu(src->features[0]); c->sb.compat = le64_to_cpu(src->compat[0]); -- cgit v1.2.3 From ffcf9ec78c133fb85ff13d8119ff404e11820834 Mon Sep 17 00:00:00 2001 From: Stijn Tintel Date: Thu, 13 May 2021 23:08:47 +0300 Subject: bcachefs: avoid out-of-bounds in split_devs Calling mount with an empty source string causes an out-of-bounds error in split_devs. Check the length of the source string to avoid this. Signed-off-by: Stijn Tintel Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b00f35201132..5eef67358cfb 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -32,6 +32,7 @@ #include #include #include +#include #include static struct kmem_cache *bch2_inode_cache; @@ -1324,6 +1325,9 @@ static char **split_devs(const char *_dev_name, unsigned *nr) char *dev_name = NULL, **devs = NULL, *s; size_t i, nr_devs = 0; + if (strlen(_dev_name) == 0) + return NULL; + dev_name = kstrdup(_dev_name, GFP_KERNEL); if (!dev_name) return NULL; -- cgit v1.2.3 From ed34341189478344eb54588ce73f190b86da4d5e Mon Sep 17 00:00:00 2001 From: Dan Robertson Date: Tue, 18 May 2021 20:36:20 -0400 Subject: bcachefs: statfs resports incorrect avail blocks The current implementation of bch_statfs does not scale the number of available blocks provided in f_bavail by the reserve factor. This causes an allocation of a file of this size to fail. Signed-off-by: Dan Robertson Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 7 ------- fs/bcachefs/buckets.h | 7 +++++++ fs/bcachefs/fs.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index afee0594efae..b37cdf7279de 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -261,18 +261,11 @@ void bch2_fs_usage_to_text(struct printbuf *out, } } -#define RESERVE_FACTOR 6 - static u64 reserve_factor(u64 r) { return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR); } -static u64 avail_factor(u64 r) -{ - return div_u64(r << RESERVE_FACTOR, (1 << RESERVE_FACTOR) + 1); -} - u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage_online *fs_usage) { return min(fs_usage->u.hidden + diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 7463e6420b14..04a2a9310cdd 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -294,6 +294,13 @@ static inline int bch2_disk_reservation_get(struct bch_fs *c, return bch2_disk_reservation_add(c, res, sectors * nr_replicas, flags); } +#define RESERVE_FACTOR 6 + +static inline u64 avail_factor(u64 r) +{ + return div_u64(r << RESERVE_FACTOR, (1 << RESERVE_FACTOR) + 1); +} + int bch2_dev_buckets_resize(struct bch_fs *, struct bch_dev *, u64); void bch2_dev_buckets_free(struct bch_dev *); int bch2_dev_buckets_alloc(struct bch_fs *, struct bch_dev *); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 5eef67358cfb..9a595c205dbf 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1274,8 +1274,8 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_type = BCACHEFS_STATFS_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = usage.capacity >> shift; - buf->f_bfree = (usage.capacity - usage.used) >> shift; - buf->f_bavail = buf->f_bfree; + buf->f_bfree = usage.free >> shift; + buf->f_bavail = avail_factor(usage.free) >> shift; buf->f_files = usage.nr_inodes + avail_inodes; buf->f_ffree = avail_inodes; -- cgit v1.2.3 From ddc7dd62f0971d5c46c155134c647e7d493b2045 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 27 May 2021 19:15:44 -0400 Subject: bcachefs: Don't use uuid in tracepoints %pU for printing out pointers to uuids doesn't work in perf trace Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/fs.c | 2 ++ fs/bcachefs/trace.h | 93 +++++++++++++++++++++++--------------------------- 3 files changed, 45 insertions(+), 51 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 2b0c9b1c841b..2ed795a58c81 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -568,6 +568,7 @@ struct bch_fs { int minor; struct device *chardev; struct super_block *vfs_sb; + dev_t dev; char name[40]; /* ro/rw, add/remove/resize devices: */ diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 9a595c205dbf..b1b7ee722f2c 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1591,6 +1591,8 @@ got_sb: break; } + c->dev = sb->s_dev; + #ifdef CONFIG_BCACHEFS_POSIX_ACL if (c->opts.acl) sb->s_flags |= SB_POSIXACL; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 1f62d82624bd..d447b79bd6ee 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -49,14 +49,14 @@ DECLARE_EVENT_CLASS(bch_fs, TP_ARGS(c), TP_STRUCT__entry( - __array(char, uuid, 16 ) + __field(dev_t, dev ) ), TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); + __entry->dev = c->dev; ), - TP_printk("%pU", __entry->uuid) + TP_printk("%d,%d", MAJOR(__entry->dev), MINOR(__entry->dev)) ); DECLARE_EVENT_CLASS(bio, @@ -131,7 +131,7 @@ TRACE_EVENT(journal_reclaim_start, btree_key_cache_dirty, btree_key_cache_total), TP_STRUCT__entry( - __array(char, uuid, 16 ) + __field(dev_t, dev ) __field(u64, min_nr ) __field(u64, prereserved ) __field(u64, prereserved_total ) @@ -142,7 +142,7 @@ TRACE_EVENT(journal_reclaim_start, ), TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); + __entry->dev = c->dev; __entry->min_nr = min_nr; __entry->prereserved = prereserved; __entry->prereserved_total = prereserved_total; @@ -152,8 +152,8 @@ TRACE_EVENT(journal_reclaim_start, __entry->btree_key_cache_total = btree_key_cache_total; ), - TP_printk("%pU min %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu", - __entry->uuid, + TP_printk("%d,%d min %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->min_nr, __entry->prereserved, __entry->prereserved_total, @@ -168,16 +168,18 @@ TRACE_EVENT(journal_reclaim_finish, TP_ARGS(c, nr_flushed), TP_STRUCT__entry( - __array(char, uuid, 16 ) - __field(u64, nr_flushed ) + __field(dev_t, dev ) + __field(u64, nr_flushed ) ), TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); - __entry->nr_flushed = nr_flushed; + __entry->dev = c->dev; + __entry->nr_flushed = nr_flushed; ), - TP_printk("%pU flushed %llu", __entry->uuid, __entry->nr_flushed) + TP_printk("%d%d flushed %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->nr_flushed) ); /* bset.c: */ @@ -194,7 +196,7 @@ DECLARE_EVENT_CLASS(btree_node, TP_ARGS(c, b), TP_STRUCT__entry( - __array(char, uuid, 16 ) + __field(dev_t, dev ) __field(u8, level ) __field(u8, id ) __field(u64, inode ) @@ -202,15 +204,16 @@ DECLARE_EVENT_CLASS(btree_node, ), TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); + __entry->dev = c->dev; __entry->level = b->c.level; __entry->id = b->c.btree_id; __entry->inode = b->key.k.p.inode; __entry->offset = b->key.k.p.offset; ), - TP_printk("%pU %u id %u %llu:%llu", - __entry->uuid, __entry->level, __entry->id, + TP_printk("%d,%d %u id %u %llu:%llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->level, __entry->id, __entry->inode, __entry->offset) ); @@ -254,32 +257,17 @@ DEFINE_EVENT(btree_node, btree_node_reap, TP_ARGS(c, b) ); -DECLARE_EVENT_CLASS(btree_node_cannibalize_lock, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c), - - TP_STRUCT__entry( - __array(char, uuid, 16 ) - ), - - TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); - ), - - TP_printk("%pU", __entry->uuid) -); - -DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock_fail, +DEFINE_EVENT(bch_fs, btree_node_cannibalize_lock_fail, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); -DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize_lock, +DEFINE_EVENT(bch_fs, btree_node_cannibalize_lock, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); -DEFINE_EVENT(btree_node_cannibalize_lock, btree_node_cannibalize, +DEFINE_EVENT(bch_fs, btree_node_cannibalize, TP_PROTO(struct bch_fs *c), TP_ARGS(c) ); @@ -294,18 +282,19 @@ TRACE_EVENT(btree_reserve_get_fail, TP_ARGS(c, required, cl), TP_STRUCT__entry( - __array(char, uuid, 16 ) + __field(dev_t, dev ) __field(size_t, required ) __field(struct closure *, cl ) ), TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); + __entry->dev = c->dev; __entry->required = required; __entry->cl = cl; ), - TP_printk("%pU required %zu by %p", __entry->uuid, + TP_printk("%d,%d required %zu by %p", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->required, __entry->cl) ); @@ -483,19 +472,20 @@ TRACE_EVENT(move_data, TP_ARGS(c, sectors_moved, keys_moved), TP_STRUCT__entry( - __array(char, uuid, 16 ) + __field(dev_t, dev ) __field(u64, sectors_moved ) __field(u64, keys_moved ) ), TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); + __entry->dev = c->dev; __entry->sectors_moved = sectors_moved; __entry->keys_moved = keys_moved; ), - TP_printk("%pU sectors_moved %llu keys_moved %llu", - __entry->uuid, __entry->sectors_moved, __entry->keys_moved) + TP_printk("%d,%d sectors_moved %llu keys_moved %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->sectors_moved, __entry->keys_moved) ); TRACE_EVENT(copygc, @@ -507,7 +497,7 @@ TRACE_EVENT(copygc, buckets_moved, buckets_not_moved), TP_STRUCT__entry( - __array(char, uuid, 16 ) + __field(dev_t, dev ) __field(u64, sectors_moved ) __field(u64, sectors_not_moved ) __field(u64, buckets_moved ) @@ -515,17 +505,17 @@ TRACE_EVENT(copygc, ), TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); + __entry->dev = c->dev; __entry->sectors_moved = sectors_moved; __entry->sectors_not_moved = sectors_not_moved; __entry->buckets_moved = buckets_moved; __entry->buckets_not_moved = buckets_moved; ), - TP_printk("%pU sectors moved %llu remain %llu buckets moved %llu remain %llu", - __entry->uuid, - __entry->sectors_moved, __entry->sectors_not_moved, - __entry->buckets_moved, __entry->buckets_not_moved) + TP_printk("%d,%d sectors moved %llu remain %llu buckets moved %llu remain %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->sectors_moved, __entry->sectors_not_moved, + __entry->buckets_moved, __entry->buckets_not_moved) ); TRACE_EVENT(copygc_wait, @@ -534,19 +524,20 @@ TRACE_EVENT(copygc_wait, TP_ARGS(c, wait_amount, until), TP_STRUCT__entry( - __array(char, uuid, 16 ) + __field(dev_t, dev ) __field(u64, wait_amount ) __field(u64, until ) ), TP_fast_assign( - memcpy(__entry->uuid, c->sb.user_uuid.b, 16); + __entry->dev = c->dev; __entry->wait_amount = wait_amount; __entry->until = until; ), - TP_printk("%pU waiting for %llu sectors until %llu", - __entry->uuid, __entry->wait_amount, __entry->until) + TP_printk("%d,%u waiting for %llu sectors until %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->wait_amount, __entry->until) ); TRACE_EVENT(trans_get_iter, -- cgit v1.2.3 From f7beb4ca04521f26d9ac2e9bee1bfbf99c55fabb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Jun 2021 23:31:42 -0400 Subject: bcachefs: Preallocate transaction mem This helps avoid transaction restarts. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 4 ++-- fs/bcachefs/fs.c | 2 +- fs/bcachefs/inode.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 08746a6cbc31..132ff0497b39 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -2531,7 +2531,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, } bch2_bkey_buf_init(©); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); src = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(inode->v.i_ino, src_start >> 9), BTREE_ITER_INTENT); @@ -2651,7 +2651,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, unsigned replicas = io_opts(c, &inode->ei_inode).data_replicas; int ret = 0; - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512); iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(inode->v.i_ino, start_sector), diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b1b7ee722f2c..99885fb97aa2 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -146,7 +146,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, struct bch_inode_unpacked inode_u; int ret; - bch2_trans_init(&trans, c, 0, 256); + bch2_trans_init(&trans, c, 0, 512); retry: bch2_trans_begin(&trans); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 18b568887144..c65bfee1897e 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -579,7 +579,7 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached) struct bkey_s_c k; int ret; - bch2_trans_init(&trans, c, 0, 0); + bch2_trans_init(&trans, c, 0, 1024); /* * If this was a directory, there shouldn't be any real dirents left - -- cgit v1.2.3 From 044c8c9e05bc87cdc610eb320e8fdc694f3866df Mon Sep 17 00:00:00 2001 From: Dan Robertson Date: Thu, 10 Jun 2021 07:52:42 -0400 Subject: bcachefs: mount: fix null deref with null devname - Fix null deref on mount when given a null device name. - Move the dev_name checks to return EINVAL when it is invalid. Signed-off-by: Dan Robertson Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 99885fb97aa2..efb467316756 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1325,9 +1325,6 @@ static char **split_devs(const char *_dev_name, unsigned *nr) char *dev_name = NULL, **devs = NULL, *s; size_t i, nr_devs = 0; - if (strlen(_dev_name) == 0) - return NULL; - dev_name = kstrdup(_dev_name, GFP_KERNEL); if (!dev_name) return NULL; @@ -1503,6 +1500,9 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, if (ret) return ERR_PTR(ret); + if (!dev_name || strlen(dev_name) == 0) + return ERR_PTR(-EINVAL); + devs = split_devs(dev_name, &nr_devs); if (!devs) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From 68a507a2e8cdc9b90599bb5d220a696abdc54838 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 14 Jun 2021 22:29:54 -0400 Subject: bcachefs: fix truncate with ATTR_MODE After the v5.12 rebase, we started oopsing when truncate was passed ATTR_MODE, due to not passing mnt_userns to setattr_copy(). This refactors things so that truncate/extend finish by using bch2_setattr_nonsize(), which solves the problem. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 43 +++++++++++++++++++++++-------------------- fs/bcachefs/fs-io.h | 3 ++- fs/bcachefs/fs.c | 11 +++++++---- fs/bcachefs/fs.h | 4 ++++ 4 files changed, 36 insertions(+), 25 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 0ffc3971d1b2..a25c3b70ef74 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -2252,11 +2252,11 @@ static int bch2_truncate_page(struct bch_inode_info *inode, loff_t from) from, round_up(from, PAGE_SIZE)); } -static int bch2_extend(struct bch_inode_info *inode, +static int bch2_extend(struct mnt_idmap *idmap, + struct bch_inode_info *inode, struct bch_inode_unpacked *inode_u, struct iattr *iattr) { - struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; int ret; @@ -2270,25 +2270,15 @@ static int bch2_extend(struct bch_inode_info *inode, return ret; truncate_setsize(&inode->v, iattr->ia_size); - /* ATTR_MODE will never be set here, ns argument isn't needed: */ - setattr_copy(NULL, &inode->v, iattr); - - mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode_size(c, inode, inode->v.i_size, - ATTR_MTIME|ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); - return ret; + return bch2_setattr_nonsize(idmap, inode, iattr); } static int bch2_truncate_finish_fn(struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { - struct bch_fs *c = inode->v.i_sb->s_fs_info; - bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; - bi->bi_mtime = bi->bi_ctime = bch2_current_time(c); return 0; } @@ -2302,7 +2292,8 @@ static int bch2_truncate_start_fn(struct bch_inode_info *inode, return 0; } -int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) +int bch2_truncate(struct mnt_idmap *idmap, + struct bch_inode_info *inode, struct iattr *iattr) { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; @@ -2313,6 +2304,18 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) s64 i_sectors_delta = 0; int ret = 0; + /* + * Don't update timestamps if we're not doing anything: + */ + if (iattr->ia_size == inode->v.i_size) + return 0; + + if (!(iattr->ia_valid & ATTR_MTIME)) + ktime_get_coarse_real_ts64(&iattr->ia_mtime); + if (!(iattr->ia_valid & ATTR_CTIME)) + ktime_get_coarse_real_ts64(&iattr->ia_ctime); + iattr->ia_valid |= ATTR_MTIME|ATTR_CTIME; + inode_dio_wait(&inode->v); bch2_pagecache_block_get(&inode->ei_pagecache_lock); @@ -2342,10 +2345,12 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) inode->v.i_size < inode_u.bi_size); if (iattr->ia_size > inode->v.i_size) { - ret = bch2_extend(inode, &inode_u, iattr); + ret = bch2_extend(idmap, inode, &inode_u, iattr); goto err; } + iattr->ia_valid &= ~ATTR_SIZE; + ret = bch2_truncate_page(inode, iattr->ia_size); if (unlikely(ret)) goto err; @@ -2389,13 +2394,11 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) if (unlikely(ret)) goto err; - /* ATTR_MODE will never be set here, ns argument isn't needed: */ - setattr_copy(NULL, &inode->v, iattr); - mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, - ATTR_MTIME|ATTR_CTIME); + ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, 0); mutex_unlock(&inode->ei_update_lock); + + ret = bch2_setattr_nonsize(idmap, inode, iattr); err: bch2_pagecache_block_put(&inode->ei_pagecache_lock); return ret; diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h index 2a2df58a46bb..64b16b44e25a 100644 --- a/fs/bcachefs/fs-io.h +++ b/fs/bcachefs/fs-io.h @@ -31,7 +31,8 @@ ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *); int bch2_fsync(struct file *, loff_t, loff_t, int); -int bch2_truncate(struct bch_inode_info *, struct iattr *); +int bch2_truncate(struct mnt_idmap *, + struct bch_inode_info *, struct iattr *); long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t); loff_t bch2_remap_file_range(struct file *, loff_t, struct file *, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index efb467316756..71e738b98967 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -662,6 +662,9 @@ static void bch2_setattr_copy(struct mnt_idmap *idmap, if (ia_valid & ATTR_GID) bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid); + if (ia_valid & ATTR_SIZE) + bi->bi_size = attr->ia_size; + if (ia_valid & ATTR_ATIME) bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime); if (ia_valid & ATTR_MTIME) @@ -682,9 +685,9 @@ static void bch2_setattr_copy(struct mnt_idmap *idmap, } } -static int bch2_setattr_nonsize(struct mnt_idmap *idmap, - struct bch_inode_info *inode, - struct iattr *attr) +int bch2_setattr_nonsize(struct mnt_idmap *idmap, + struct bch_inode_info *inode, + struct iattr *attr) { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_qid qid; @@ -808,7 +811,7 @@ static int bch2_setattr(struct mnt_idmap *idmap, return ret; return iattr->ia_valid & ATTR_SIZE - ? bch2_truncate(inode, iattr) + ? bch2_truncate(idmap, inode, iattr) : bch2_setattr_nonsize(idmap, inode, iattr); } diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index f3072780af51..c08a828d66cd 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -166,6 +166,10 @@ void bch2_inode_update_after_write(struct bch_fs *, int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *, inode_set_fn, void *, unsigned); +int bch2_setattr_nonsize(struct mnt_idmap *, + struct bch_inode_info *, + struct iattr *); + void bch2_vfs_exit(void); int bch2_vfs_init(void); -- cgit v1.2.3 From e8e9607f3c1bb927002b7582b68d36c7eb3e92e2 Mon Sep 17 00:00:00 2001 From: Dan Robertson Date: Wed, 23 Jun 2021 19:25:00 -0400 Subject: bcachefs: statfs bfree and bavail should be the same The value of f_bfree and f_bavail should be the same. The value of f_bfree is not currently scaled by the availability factor. Signed-off-by: Dan Robertson Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 71e738b98967..1b0d63219c3b 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1277,8 +1277,8 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_type = BCACHEFS_STATFS_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = usage.capacity >> shift; - buf->f_bfree = usage.free >> shift; - buf->f_bavail = avail_factor(usage.free) >> shift; + buf->f_bfree = avail_factor(usage.free) >> shift; + buf->f_bavail = buf->f_bfree; buf->f_files = usage.nr_inodes + avail_inodes; buf->f_ffree = avail_inodes; -- cgit v1.2.3 From 47924527e643e6160c6726669b90cad8aeb6d977 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Sep 2023 23:35:02 -0400 Subject: Revert "bcachefs: statfs bfree and bavail should be the same" This reverts commit 664f9847bec525d396d62d2db094ca9020289ae0. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 1b0d63219c3b..71e738b98967 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1277,8 +1277,8 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_type = BCACHEFS_STATFS_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = usage.capacity >> shift; - buf->f_bfree = avail_factor(usage.free) >> shift; - buf->f_bavail = buf->f_bfree; + buf->f_bfree = usage.free >> shift; + buf->f_bavail = avail_factor(usage.free) >> shift; buf->f_files = usage.nr_inodes + avail_inodes; buf->f_ffree = avail_inodes; -- cgit v1.2.3 From 700c25b32a776a70849c025d898ba1a7431279e4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 24 Jul 2021 20:24:10 -0400 Subject: bcachefs: Use bch2_trans_begin() more consistently Upcoming patch will require that a transaction restart is always immediately followed by bch2_trans_begin(). Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update.h | 2 +- fs/bcachefs/fs-io.c | 4 ++++ fs/bcachefs/fs.c | 2 ++ fs/bcachefs/io.c | 17 +++++++++-------- fs/bcachefs/move.c | 7 ++++--- fs/bcachefs/reflink.c | 4 ++-- 6 files changed, 22 insertions(+), 14 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index bab135fae0b0..b5f35a419004 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -114,7 +114,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans, _ret = (_do); \ if (_ret != -EINTR) \ break; \ - bch2_trans_reset(_trans, 0); \ + bch2_trans_begin(_trans); \ } \ \ _ret; \ diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 1a06f77101ab..30e5acd2e97c 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -784,6 +784,8 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, bch2_bkey_buf_init(&sk); retry: + bch2_trans_begin(trans); + while (1) { struct bkey_s_c k; unsigned bytes, sectors, offset_into_extent; @@ -2541,6 +2543,8 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, struct bpos atomic_end; unsigned trigger_flags = 0; + bch2_trans_begin(&trans); + k = insert ? bch2_btree_iter_peek_prev(src) : bch2_btree_iter_peek(src); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 71e738b98967..b2d6e80bbb78 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -911,6 +911,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(ei->v.i_ino, start >> 9), 0); retry: + bch2_trans_begin(&trans); + while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k)) && bkey_cmp(iter->pos, end) < 0) { diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 5ff8fea80fba..f293876e0bbc 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -376,14 +376,13 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k; int ret = 0, ret2 = 0; - while ((k = bch2_btree_iter_peek(iter)).k && + while ((bch2_trans_begin(trans), + (k = bch2_btree_iter_peek(iter)).k) && bkey_cmp(iter->pos, end) < 0) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete; - bch2_trans_begin(trans); - ret = bkey_err(k); if (ret) goto btree_err; @@ -2278,12 +2277,13 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); -retry: - bch2_trans_begin(&trans); iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(inode, bvec_iter.bi_sector), BTREE_ITER_SLOTS); +retry: + bch2_trans_begin(&trans); + while (1) { unsigned bytes, sectors, offset_into_extent; enum btree_id data_btree = BTREE_ID_extents; @@ -2339,19 +2339,20 @@ retry: swap(bvec_iter.bi_size, bytes); bio_advance_iter(&rbio->bio, &bvec_iter, bytes); } - bch2_trans_iter_put(&trans, iter); if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID) goto retry; + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&sk, c); + if (ret) { bch_err_inum_ratelimited(c, inode, "read error %i from btree lookup", ret); rbio->bio.bi_status = BLK_STS_IOERR; bch2_rbio_done(rbio); } - bch2_trans_exit(&trans); - bch2_bkey_buf_exit(&sk, c); } void bch2_fs_io_exit(struct bch_fs *c) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 7dea6637ae58..9a423a3e4570 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -83,7 +83,7 @@ int bch2_migrate_index_update(struct bch_write_op *op) bool extending = false, should_check_enospc; s64 i_sectors_delta = 0, disk_sectors_delta = 0; - bch2_trans_reset(&trans, 0); + bch2_trans_begin(&trans); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); @@ -597,6 +597,8 @@ static int __bch2_move_data(struct bch_fs *c, } } while (delay); + bch2_trans_begin(&trans); + k = bch2_btree_iter_peek(iter); stats->pos = iter->pos; @@ -652,8 +654,7 @@ static int __bch2_move_data(struct bch_fs *c, data_cmd, data_opts); if (ret2) { if (ret2 == -EINTR) { - bch2_trans_reset(&trans, 0); - bch2_trans_cond_resched(&trans); + bch2_trans_begin(&trans); continue; } diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 8b168246ca38..3d9c5c5b0eba 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -305,12 +305,12 @@ s64 bch2_remap_range(struct bch_fs *c, dst_done = dst_iter->pos.offset - dst_start.offset; new_i_size = min(dst_iter->pos.offset << 9, new_i_size); - bch2_trans_begin(&trans); - do { struct bch_inode_unpacked inode_u; struct btree_iter *inode_iter; + bch2_trans_begin(&trans); + inode_iter = bch2_inode_peek(&trans, &inode_u, dst_start.inode, BTREE_ITER_INTENT); ret2 = PTR_ERR_OR_ZERO(inode_iter); -- cgit v1.2.3 From 1a488e73067c65086191a63fe61e57692383fb27 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 27 Jul 2021 22:15:04 -0400 Subject: bcachefs: Kill BTREE_INSERT_NOUNLOCK With the recent transaction restart changes, it's no longer needed - all transaction commits have BTREE_INSERT_NOUNLOCK semantics. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 3 +-- fs/bcachefs/btree_key_cache.c | 1 - fs/bcachefs/btree_update.h | 6 ------ fs/bcachefs/btree_update_interior.c | 3 +-- fs/bcachefs/btree_update_leaf.c | 4 ++-- fs/bcachefs/fs.c | 12 +++--------- fs/bcachefs/fsck.c | 3 +-- fs/bcachefs/quota.c | 2 +- 8 files changed, 9 insertions(+), 25 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index d3e2e24758ba..39ac6d2e178d 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -336,8 +336,7 @@ retry: ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, - &inode->ei_journal_seq, - BTREE_INSERT_NOUNLOCK); + &inode->ei_journal_seq, 0); btree_err: bch2_trans_iter_put(&trans, inode_iter); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 8fb18ad2e1ae..ac8f40810d7a 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -406,7 +406,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| BTREE_TRIGGER_NORUN) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE| diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index a1f2d9822821..217b52e1a168 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -15,7 +15,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *, void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64); enum btree_insert_flags { - __BTREE_INSERT_NOUNLOCK, __BTREE_INSERT_NOFAIL, __BTREE_INSERT_NOCHECK_RW, __BTREE_INSERT_LAZY_RW, @@ -29,11 +28,6 @@ enum btree_insert_flags { __BCH_HASH_SET_MUST_REPLACE, }; -/* - * Don't drop locks _after_ successfully updating btree: - */ -#define BTREE_INSERT_NOUNLOCK (1 << __BTREE_INSERT_NOUNLOCK) - /* Don't check for -ENOSPC: */ #define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 2e8697196ac9..2594738f3d53 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1927,8 +1927,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, BTREE_INSERT_NOFAIL| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_JOURNAL_RECLAIM| - BTREE_INSERT_JOURNAL_RESERVED| - BTREE_INSERT_NOUNLOCK); + BTREE_INSERT_JOURNAL_RESERVED); if (ret) goto err; diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 7ce94340f817..882b9da8fe61 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -524,8 +524,8 @@ static noinline int maybe_do_btree_merge(struct btree_trans *trans, struct btree if (u64s_delta > 0) return 0; - return bch2_foreground_maybe_merge(trans, iter, iter->level, - trans->flags & ~BTREE_INSERT_NOUNLOCK); + return bch2_foreground_maybe_merge(trans, iter, + iter->level, trans->flags); } /* diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b2d6e80bbb78..f6c058540712 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -157,7 +157,6 @@ retry: bch2_inode_write(&trans, iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, - BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); /* @@ -295,8 +294,7 @@ retry: if (unlikely(ret)) goto err_before_quota; - ret = bch2_trans_commit(&trans, NULL, &journal_seq, - BTREE_INSERT_NOUNLOCK); + ret = bch2_trans_commit(&trans, NULL, &journal_seq, 0); if (unlikely(ret)) { bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, KEY_TYPE_QUOTA_WARN); @@ -417,8 +415,7 @@ static int __bch2_link(struct bch_fs *c, mutex_lock(&inode->ei_update_lock); bch2_trans_init(&trans, c, 4, 1024); - ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, - BTREE_INSERT_NOUNLOCK, + ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, 0, bch2_link_trans(&trans, dir->v.i_ino, inode->v.i_ino, &dir_u, &inode_u, @@ -470,7 +467,6 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) bch2_trans_init(&trans, c, 4, 1024); ret = __bch2_trans_do(&trans, NULL, &dir->ei_journal_seq, - BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL, bch2_unlink_trans(&trans, dir->v.i_ino, &dir_u, @@ -591,8 +587,7 @@ static int bch2_rename2(struct mnt_idmap *idmap, goto err; } - ret = __bch2_trans_do(&trans, NULL, &journal_seq, - BTREE_INSERT_NOUNLOCK, + ret = __bch2_trans_do(&trans, NULL, &journal_seq, 0, bch2_rename_trans(&trans, src_dir->v.i_ino, &src_dir_u, dst_dir->v.i_ino, &dst_dir_u, @@ -735,7 +730,6 @@ retry: ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, - BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); btree_err: bch2_trans_iter_put(&trans, inode_iter); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 63d42542c194..36eba46d566e 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -803,8 +803,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ret = __write_inode(trans, &target, target_snapshot) ?: bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_NOUNLOCK); + BTREE_INSERT_LAZY_RW); if (ret) return ret; return -EINTR; diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 35b409e0f366..7861781a4a7f 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -760,7 +760,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, bkey_quota_init(&new_quota.k_i); new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid)); - ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOUNLOCK, + ret = bch2_trans_do(c, NULL, NULL, 0, bch2_set_quota_trans(&trans, &new_quota, qdq)) ?: __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i)); -- cgit v1.2.3 From 67e0dd8f0d8b4bf09098c4692abcb43a20089dff Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 30 Aug 2021 15:18:31 -0400 Subject: bcachefs: btree_path This splits btree_iter into two components: btree_iter is now the externally visible componont, and it points to a btree_path which is now reference counted. This means we no longer have to clone iterators up front if they might be mutated - btree_path can be shared by multiple iterators, and cloned if an iterator would mutate a shared btree_path. This will help us use iterators more efficiently, as well as slimming down the main long lived state in btree_trans, and significantly cleans up the logic for iterator lifetimes. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 41 +- fs/bcachefs/alloc_background.c | 55 +- fs/bcachefs/bcachefs.h | 8 +- fs/bcachefs/bset.c | 4 +- fs/bcachefs/btree_cache.c | 36 +- fs/bcachefs/btree_cache.h | 7 +- fs/bcachefs/btree_gc.c | 36 +- fs/bcachefs/btree_iter.c | 2068 ++++++++++++++++++----------------- fs/bcachefs/btree_iter.h | 265 ++--- fs/bcachefs/btree_key_cache.c | 135 ++- fs/bcachefs/btree_key_cache.h | 5 +- fs/bcachefs/btree_locking.h | 117 +- fs/bcachefs/btree_types.h | 94 +- fs/bcachefs/btree_update.h | 13 +- fs/bcachefs/btree_update_interior.c | 217 ++-- fs/bcachefs/btree_update_interior.h | 20 +- fs/bcachefs/btree_update_leaf.c | 216 ++-- fs/bcachefs/buckets.c | 177 +-- fs/bcachefs/buckets.h | 6 +- fs/bcachefs/debug.c | 32 +- fs/bcachefs/dirent.c | 77 +- fs/bcachefs/dirent.h | 3 +- fs/bcachefs/ec.c | 76 +- fs/bcachefs/extent_update.c | 10 +- fs/bcachefs/extents.c | 4 +- fs/bcachefs/fs-common.c | 113 +- fs/bcachefs/fs-io.c | 82 +- fs/bcachefs/fs.c | 44 +- fs/bcachefs/fsck.c | 159 +-- fs/bcachefs/inode.c | 61 +- fs/bcachefs/inode.h | 8 +- fs/bcachefs/io.c | 98 +- fs/bcachefs/journal_seq_blacklist.c | 4 +- fs/bcachefs/migrate.c | 26 +- fs/bcachefs/move.c | 62 +- fs/bcachefs/quota.c | 20 +- fs/bcachefs/recovery.c | 34 +- fs/bcachefs/reflink.c | 76 +- fs/bcachefs/str_hash.h | 65 +- fs/bcachefs/super.c | 8 +- fs/bcachefs/sysfs.c | 3 +- fs/bcachefs/tests.c | 189 ++-- fs/bcachefs/trace.h | 4 +- fs/bcachefs/xattr.c | 26 +- 44 files changed, 2385 insertions(+), 2419 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 39ac6d2e178d..93b78e4e6e0d 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -219,7 +219,7 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap, struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter = { NULL }; struct bkey_s_c_xattr xattr; struct posix_acl *acl = NULL; struct bkey_s_c k; @@ -229,20 +229,19 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap, retry: bch2_trans_begin(&trans); - iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc, + ret = bch2_hash_lookup(&trans, &iter, bch2_xattr_hash_desc, &hash, inode->v.i_ino, &X_SEARCH(acl_to_xattr_type(type), "", 0), 0); - if (IS_ERR(iter)) { - if (PTR_ERR(iter) == -EINTR) + if (ret) { + if (ret == -EINTR) goto retry; - - if (PTR_ERR(iter) != -ENOENT) - acl = ERR_CAST(iter); + if (ret != -ENOENT) + acl = ERR_PTR(ret); goto out; } - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) { acl = ERR_PTR(ret); @@ -255,8 +254,8 @@ retry: if (!IS_ERR(acl)) set_cached_acl(&inode->v, type, acl); - bch2_trans_iter_put(&trans, iter); out: + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return acl; } @@ -298,7 +297,7 @@ int bch2_set_acl(struct mnt_idmap *idmap, struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct btree_trans trans; - struct btree_iter *inode_iter; + struct btree_iter inode_iter = { NULL }; struct bch_inode_unpacked inode_u; struct bch_hash_info hash_info; struct posix_acl *acl; @@ -311,9 +310,8 @@ retry: bch2_trans_begin(&trans); acl = _acl; - inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(inode_iter); + ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode->v.i_ino, + BTREE_ITER_INTENT); if (ret) goto btree_err; @@ -334,11 +332,11 @@ retry: inode_u.bi_ctime = bch2_current_time(c); inode_u.bi_mode = mode; - ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: + ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, 0); btree_err: - bch2_trans_iter_put(&trans, inode_iter); + bch2_trans_iter_exit(&trans, &inode_iter); if (ret == -EINTR) goto retry; @@ -362,22 +360,21 @@ int bch2_acl_chmod(struct btree_trans *trans, struct posix_acl **new_acl) { struct bch_hash_info hash_info = bch2_hash_info_init(trans->c, inode); - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c_xattr xattr; struct bkey_i_xattr *new; struct posix_acl *acl; struct bkey_s_c k; int ret; - iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, + ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash_info, inode->bi_inum, &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(iter); if (ret) return ret == -ENOENT ? 0 : ret; - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); xattr = bkey_s_c_to_xattr(k); if (ret) goto err; @@ -398,12 +395,12 @@ int bch2_acl_chmod(struct btree_trans *trans, goto err; } - new->k.p = iter->pos; - ret = bch2_trans_update(trans, iter, &new->k_i, 0); + new->k.p = iter.pos; + ret = bch2_trans_update(trans, &iter, &new->k_i, 0); *new_acl = acl; acl = NULL; err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); if (!IS_ERR_OR_NULL(acl)) kfree(acl); return ret; diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 932a8176dff7..54fbfb22d671 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -353,32 +353,32 @@ err: int bch2_alloc_write(struct bch_fs *c, unsigned flags) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bch_dev *ca; unsigned i; int ret = 0; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); for_each_member_device(ca, c, i) { - bch2_btree_iter_set_pos(iter, + bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, ca->mi.first_bucket)); - while (iter->pos.offset < ca->mi.nbuckets) { + while (iter.pos.offset < ca->mi.nbuckets) { bch2_trans_cond_resched(&trans); - ret = bch2_alloc_write_key(&trans, iter, flags); + ret = bch2_alloc_write_key(&trans, &iter, flags); if (ret) { percpu_ref_put(&ca->ref); goto err; } - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); } } err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } @@ -390,18 +390,18 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, dev); - struct btree_iter *iter; + struct btree_iter iter; struct bucket *g; struct bkey_alloc_buf *a; struct bkey_alloc_unpacked u; u64 *time, now; int ret = 0; - iter = bch2_trans_get_iter(trans, BTREE_ID_alloc, POS(dev, bucket_nr), - BTREE_ITER_CACHED| - BTREE_ITER_CACHED_NOFILL| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(dev, bucket_nr), + BTREE_ITER_CACHED| + BTREE_ITER_CACHED_NOFILL| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&iter); if (ret) goto out; @@ -412,7 +412,7 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, percpu_down_read(&c->mark_lock); g = bucket(ca, bucket_nr); - u = alloc_mem_to_key(iter, g, READ_ONCE(g->mark)); + u = alloc_mem_to_key(&iter, g, READ_ONCE(g->mark)); percpu_up_read(&c->mark_lock); time = rw == READ ? &u.read_time : &u.write_time; @@ -423,10 +423,10 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, *time = now; bch2_alloc_pack(c, a, u); - ret = bch2_trans_update(trans, iter, &a->k, 0) ?: + ret = bch2_trans_update(trans, &iter, &a->k, 0) ?: bch2_trans_commit(trans, NULL, NULL, 0); out: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -695,27 +695,28 @@ static int bucket_invalidate_btree(struct btree_trans *trans, struct bkey_alloc_unpacked u; struct bucket *g; struct bucket_mark m; - struct btree_iter *iter = - bch2_trans_get_iter(trans, BTREE_ID_alloc, - POS(ca->dev_idx, b), - BTREE_ITER_CACHED| - BTREE_ITER_CACHED_NOFILL| - BTREE_ITER_INTENT); + struct btree_iter iter; int ret; + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, + POS(ca->dev_idx, b), + BTREE_ITER_CACHED| + BTREE_ITER_CACHED_NOFILL| + BTREE_ITER_INTENT); + a = bch2_trans_kmalloc(trans, sizeof(*a)); ret = PTR_ERR_OR_ZERO(a); if (ret) goto err; - ret = bch2_btree_iter_traverse(iter); + ret = bch2_btree_iter_traverse(&iter); if (ret) goto err; percpu_down_read(&c->mark_lock); g = bucket(ca, b); m = READ_ONCE(g->mark); - u = alloc_mem_to_key(iter, g, m); + u = alloc_mem_to_key(&iter, g, m); percpu_up_read(&c->mark_lock); u.gen++; @@ -726,10 +727,10 @@ static int bucket_invalidate_btree(struct btree_trans *trans, u.write_time = atomic64_read(&c->io_clock[WRITE].now); bch2_alloc_pack(c, a, u); - ret = bch2_trans_update(trans, iter, &a->k, + ret = bch2_trans_update(trans, &iter, &a->k, BTREE_TRIGGER_BUCKET_INVALIDATE); err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index e2aac1da18ae..114ae77a8a02 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -558,8 +558,8 @@ struct journal_keys { u64 journal_seq_base; }; -struct btree_iter_buf { - struct btree_iter *iter; +struct btree_path_buf { + struct btree_path *path; }; #define REPLICAS_DELTA_LIST_MAX (1U << 16) @@ -667,9 +667,9 @@ struct bch_fs { /* btree_iter.c: */ struct mutex btree_trans_lock; struct list_head btree_trans_list; - mempool_t btree_iters_pool; + mempool_t btree_paths_pool; mempool_t btree_trans_mem_pool; - struct btree_iter_buf __percpu *btree_iters_bufs; + struct btree_path_buf __percpu *btree_paths_bufs; struct srcu_struct btree_trans_barrier; diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index 9484f28be6de..2e0ad3a4fa67 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -185,9 +185,11 @@ void bch2_btree_node_iter_verify(struct btree_node_iter *iter, return; /* Verify no duplicates: */ - btree_node_iter_for_each(iter, set) + btree_node_iter_for_each(iter, set) { + BUG_ON(set->k > set->end); btree_node_iter_for_each(iter, s2) BUG_ON(set != s2 && set->end == s2->end); + } /* Verify that set->end is correct: */ btree_node_iter_for_each(iter, set) { diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index c94ed4da1ca4..d45218d5fd35 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -641,7 +641,7 @@ err: /* Slowpath, don't want it inlined into btree_iter_traverse() */ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, const struct bkey_i *k, enum btree_id btree_id, unsigned level, @@ -657,7 +657,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, * Parent node must be locked, else we could read in a btree node that's * been freed: */ - if (trans && !bch2_btree_node_relock(trans, iter, level + 1)) { + if (trans && !bch2_btree_node_relock(trans, path, level + 1)) { btree_trans_restart(trans); return ERR_PTR(-EINTR); } @@ -699,7 +699,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, if (trans && (!bch2_trans_relock(trans) || - !bch2_btree_iter_relock_intent(trans, iter))) { + !bch2_btree_path_relock_intent(trans, path))) { BUG_ON(!trans->restarted); return ERR_PTR(-EINTR); } @@ -763,7 +763,7 @@ static inline void btree_check_header(struct bch_fs *c, struct btree *b) * The btree node will have either a read or a write lock held, depending on * the @write parameter. */ -struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_iter *iter, +struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, unsigned level, enum six_lock_type lock_type, unsigned long trace_ip) @@ -788,7 +788,7 @@ retry: * else we could read in a btree node from disk that's been * freed: */ - b = bch2_btree_node_fill(c, trans, iter, k, iter->btree_id, + b = bch2_btree_node_fill(c, trans, path, k, path->btree_id, level, lock_type, true); /* We raced and found the btree node in the cache */ @@ -827,10 +827,10 @@ lock_node: * the parent was modified, when the pointer to the node we want * was removed - and we'll bail out: */ - if (btree_node_read_locked(iter, level + 1)) - btree_node_unlock(iter, level + 1); + if (btree_node_read_locked(path, level + 1)) + btree_node_unlock(path, level + 1); - if (!btree_node_lock(trans, iter, b, k->k.p, level, lock_type, + if (!btree_node_lock(trans, path, b, k->k.p, level, lock_type, lock_node_check_fn, (void *) k, trace_ip)) { if (!trans->restarted) goto retry; @@ -841,13 +841,13 @@ lock_node: b->c.level != level || race_fault())) { six_unlock_type(&b->c.lock, lock_type); - if (bch2_btree_node_relock(trans, iter, level + 1)) + if (bch2_btree_node_relock(trans, path, level + 1)) goto retry; trace_trans_restart_btree_node_reused(trans->ip, trace_ip, - iter->btree_id, - &iter->real_pos); + path->btree_id, + &path->pos); btree_trans_restart(trans); return ERR_PTR(-EINTR); } @@ -862,12 +862,12 @@ lock_node: bch2_btree_node_wait_on_read(b); /* - * should_be_locked is not set on this iterator yet, so we need - * to relock it specifically: + * should_be_locked is not set on this path yet, so we need to + * relock it specifically: */ if (trans && (!bch2_trans_relock(trans) || - !bch2_btree_iter_relock_intent(trans, iter))) { + !bch2_btree_path_relock_intent(trans, path))) { BUG_ON(!trans->restarted); return ERR_PTR(-EINTR); } @@ -895,7 +895,7 @@ lock_node: return ERR_PTR(-EIO); } - EBUG_ON(b->c.btree_id != iter->btree_id); + EBUG_ON(b->c.btree_id != path->btree_id); EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); btree_check_header(c, b); @@ -986,21 +986,21 @@ out: int bch2_btree_node_prefetch(struct bch_fs *c, struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, const struct bkey_i *k, enum btree_id btree_id, unsigned level) { struct btree_cache *bc = &c->btree_cache; struct btree *b; - BUG_ON(trans && !btree_node_locked(iter, level + 1)); + BUG_ON(trans && !btree_node_locked(path, level + 1)); BUG_ON(level >= BTREE_MAX_DEPTH); b = btree_cache_find(bc, k); if (b) return 0; - b = bch2_btree_node_fill(c, trans, iter, k, btree_id, + b = bch2_btree_node_fill(c, trans, path, k, btree_id, level, SIX_LOCK_read, false); return PTR_ERR_OR_ZERO(b); } diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 3b671cf0056d..2f6e0ea87616 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -20,16 +20,15 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *); struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *); struct btree *bch2_btree_node_mem_alloc(struct bch_fs *); -struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_iter *, +struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *, const struct bkey_i *, unsigned, enum six_lock_type, unsigned long); struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *, enum btree_id, unsigned, bool); -int bch2_btree_node_prefetch(struct bch_fs *, struct btree_trans *, - struct btree_iter *, const struct bkey_i *, - enum btree_id, unsigned); +int bch2_btree_node_prefetch(struct bch_fs *, struct btree_trans *, struct btree_path *, + const struct bkey_i *, enum btree_id, unsigned); void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 4a3f39a619a1..66367ab9f20a 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -775,7 +775,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, bool initial, bool metadata_only) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct btree *b; unsigned depth = metadata_only ? 1 : bch2_expensive_debug_checks ? 0 @@ -800,13 +800,13 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, if (!initial) { if (max_stale > 64) - bch2_btree_node_rewrite(&trans, iter, + bch2_btree_node_rewrite(&trans, &iter, b->data->keys.seq, BTREE_INSERT_NOWAIT| BTREE_INSERT_GC_LOCK_HELD); else if (!bch2_btree_gc_rewrite_disabled && (bch2_btree_gc_always_rewrite || max_stale > 16)) - bch2_btree_node_rewrite(&trans, iter, + bch2_btree_node_rewrite(&trans, &iter, b->data->keys.seq, BTREE_INSERT_NOWAIT| BTREE_INSERT_GC_LOCK_HELD); @@ -814,7 +814,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, bch2_trans_cond_resched(&trans); } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; if (ret) @@ -1414,7 +1414,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial, bool metadata_only) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct reflink_gc *r; size_t idx = 0; @@ -1480,7 +1480,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial, } } fsck_err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); out: genradix_free(&c->reflink_gc_table); @@ -1512,7 +1512,7 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial, bool metadata_only) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct reflink_gc *r; int ret; @@ -1547,7 +1547,7 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial, r->size = k.k->size; r->refcount = 0; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return 0; @@ -1722,7 +1722,7 @@ static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k) static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_buf sk; int ret = 0, commit_err = 0; @@ -1730,13 +1730,13 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, - BTREE_ITER_PREFETCH| - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_ALL_SNAPSHOTS); + bch2_trans_iter_init(&trans, &iter, btree_id, POS_MIN, + BTREE_ITER_PREFETCH| + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_ALL_SNAPSHOTS); while ((bch2_trans_begin(&trans), - k = bch2_btree_iter_peek(iter)).k) { + k = bch2_btree_iter_peek(&iter)).k) { ret = bkey_err(k); if (ret == -EINTR) @@ -1744,7 +1744,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) if (ret) break; - c->gc_gens_pos = iter->pos; + c->gc_gens_pos = iter.pos; if (gc_btree_gens_key(c, k) && !commit_err) { bch2_bkey_buf_reassemble(&sk, c, k); @@ -1752,7 +1752,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) commit_err = - bch2_trans_update(&trans, iter, sk.k, 0) ?: + bch2_trans_update(&trans, &iter, sk.k, 0) ?: bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOWAIT| BTREE_INSERT_NOFAIL); @@ -1762,9 +1762,9 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) } } - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 7acec1e6db3d..06379f3e40a6 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -17,21 +17,30 @@ #include -static void btree_iter_set_search_pos(struct btree_iter *, struct bpos); -static inline void btree_trans_sort_iters(struct btree_trans *); -static struct btree_iter *btree_iter_child_alloc(struct btree_trans *, - struct btree_iter *, unsigned long); -static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *, - struct btree_iter *); -static void btree_iter_copy(struct btree_trans *, struct btree_iter *, struct btree_iter *); +static inline void btree_trans_sort_paths(struct btree_trans *); -static inline int btree_iter_cmp(const struct btree_iter *l, - const struct btree_iter *r) +static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *); +static inline void btree_path_list_add(struct btree_trans *, struct btree_path *, + struct btree_path *); + +static struct btree_path *btree_path_alloc(struct btree_trans *, struct btree_path *); + +static inline int __btree_path_cmp(const struct btree_path *l, + enum btree_id r_btree_id, + bool r_cached, + struct bpos r_pos, + unsigned r_level) { - return cmp_int(l->btree_id, r->btree_id) ?: - -cmp_int(l->cached, r->cached) ?: - bkey_cmp(l->real_pos, r->real_pos) ?: - -cmp_int(l->level, r->level); + return cmp_int(l->btree_id, r_btree_id) ?: + cmp_int(l->cached, r_cached) ?: + bpos_cmp(l->pos, r_pos) ?: + -cmp_int(l->level, r_level); +} + +static inline int btree_path_cmp(const struct btree_path *l, + const struct btree_path *r) +{ + return __btree_path_cmp(l, r->btree_id, r->cached, r->pos, r->level); } static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p) @@ -60,10 +69,10 @@ static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos return p; } -static inline bool is_btree_node(struct btree_iter *iter, unsigned l) +static inline bool is_btree_node(struct btree_path *path, unsigned l) { return l < BTREE_MAX_DEPTH && - (unsigned long) iter->l[l].b >= 128; + (unsigned long) path->l[l].b >= 128; } static inline struct bpos btree_iter_search_key(struct btree_iter *iter) @@ -76,42 +85,42 @@ static inline struct bpos btree_iter_search_key(struct btree_iter *iter) return pos; } -static inline bool btree_iter_pos_before_node(struct btree_iter *iter, +static inline bool btree_path_pos_before_node(struct btree_path *path, struct btree *b) { - return bpos_cmp(iter->real_pos, b->data->min_key) < 0; + return bpos_cmp(path->pos, b->data->min_key) < 0; } -static inline bool btree_iter_pos_after_node(struct btree_iter *iter, +static inline bool btree_path_pos_after_node(struct btree_path *path, struct btree *b) { - return bpos_cmp(b->key.k.p, iter->real_pos) < 0; + return bpos_cmp(b->key.k.p, path->pos) < 0; } -static inline bool btree_iter_pos_in_node(struct btree_iter *iter, +static inline bool btree_path_pos_in_node(struct btree_path *path, struct btree *b) { - return iter->btree_id == b->c.btree_id && - !btree_iter_pos_before_node(iter, b) && - !btree_iter_pos_after_node(iter, b); + return path->btree_id == b->c.btree_id && + !btree_path_pos_before_node(path, b) && + !btree_path_pos_after_node(path, b); } /* Btree node locking: */ void bch2_btree_node_unlock_write(struct btree_trans *trans, - struct btree_iter *iter, struct btree *b) + struct btree_path *path, struct btree *b) { - bch2_btree_node_unlock_write_inlined(trans, iter, b); + bch2_btree_node_unlock_write_inlined(trans, path, b); } void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b) { - struct btree_iter *iter; + struct btree_path *linked; unsigned readers = 0; - trans_for_each_iter(trans, iter) - if (iter->l[b->c.level].b == b && - btree_node_read_locked(iter, b->c.level)) + trans_for_each_path(trans, linked) + if (linked->l[b->c.level].b == b && + btree_node_read_locked(linked, b->c.level)) readers++; /* @@ -136,21 +145,21 @@ void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b) } bool __bch2_btree_node_relock(struct btree_trans *trans, - struct btree_iter *iter, unsigned level) + struct btree_path *path, unsigned level) { - struct btree *b = btree_iter_node(iter, level); - int want = __btree_lock_want(iter, level); + struct btree *b = btree_path_node(path, level); + int want = __btree_lock_want(path, level); - if (!is_btree_node(iter, level)) + if (!is_btree_node(path, level)) return false; if (race_fault()) return false; - if (six_relock_type(&b->c.lock, want, iter->l[level].lock_seq) || - (btree_node_lock_seq_matches(iter, b, level) && + if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) || + (btree_node_lock_seq_matches(path, b, level) && btree_node_lock_increment(trans, b, level, want))) { - mark_btree_node_locked(iter, level, want); + mark_btree_node_locked(path, level, want); return true; } else { return false; @@ -158,88 +167,88 @@ bool __bch2_btree_node_relock(struct btree_trans *trans, } static bool bch2_btree_node_upgrade(struct btree_trans *trans, - struct btree_iter *iter, unsigned level) + struct btree_path *path, unsigned level) { - struct btree *b = iter->l[level].b; + struct btree *b = path->l[level].b; - EBUG_ON(btree_lock_want(iter, level) != BTREE_NODE_INTENT_LOCKED); + EBUG_ON(btree_lock_want(path, level) != BTREE_NODE_INTENT_LOCKED); - if (!is_btree_node(iter, level)) + if (!is_btree_node(path, level)) return false; - if (btree_node_intent_locked(iter, level)) + if (btree_node_intent_locked(path, level)) return true; if (race_fault()) return false; - if (btree_node_locked(iter, level) + if (btree_node_locked(path, level) ? six_lock_tryupgrade(&b->c.lock) - : six_relock_type(&b->c.lock, SIX_LOCK_intent, iter->l[level].lock_seq)) + : six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq)) goto success; - if (btree_node_lock_seq_matches(iter, b, level) && + if (btree_node_lock_seq_matches(path, b, level) && btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) { - btree_node_unlock(iter, level); + btree_node_unlock(path, level); goto success; } return false; success: - mark_btree_node_intent_locked(iter, level); + mark_btree_node_intent_locked(path, level); return true; } -static inline bool btree_iter_get_locks(struct btree_trans *trans, - struct btree_iter *iter, +static inline bool btree_path_get_locks(struct btree_trans *trans, + struct btree_path *path, bool upgrade, unsigned long trace_ip) { - unsigned l = iter->level; + unsigned l = path->level; int fail_idx = -1; do { - if (!btree_iter_node(iter, l)) + if (!btree_path_node(path, l)) break; if (!(upgrade - ? bch2_btree_node_upgrade(trans, iter, l) - : bch2_btree_node_relock(trans, iter, l))) { + ? bch2_btree_node_upgrade(trans, path, l) + : bch2_btree_node_relock(trans, path, l))) { (upgrade ? trace_node_upgrade_fail : trace_node_relock_fail)(trans->ip, trace_ip, - iter->cached, - iter->btree_id, &iter->real_pos, - l, iter->l[l].lock_seq, - is_btree_node(iter, l) + path->cached, + path->btree_id, &path->pos, + l, path->l[l].lock_seq, + is_btree_node(path, l) ? 0 - : (unsigned long) iter->l[l].b, - is_btree_node(iter, l) - ? iter->l[l].b->c.lock.state.seq + : (unsigned long) path->l[l].b, + is_btree_node(path, l) + ? path->l[l].b->c.lock.state.seq : 0); fail_idx = l; - btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); } l++; - } while (l < iter->locks_want); + } while (l < path->locks_want); /* * When we fail to get a lock, we have to ensure that any child nodes - * can't be relocked so bch2_btree_iter_traverse has to walk back up to + * can't be relocked so bch2_btree_path_traverse has to walk back up to * the node that we failed to relock: */ while (fail_idx >= 0) { - btree_node_unlock(iter, fail_idx); - iter->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS; + btree_node_unlock(path, fail_idx); + path->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS; --fail_idx; } - if (iter->uptodate == BTREE_ITER_NEED_RELOCK) - iter->uptodate = BTREE_ITER_UPTODATE; + if (path->uptodate == BTREE_ITER_NEED_RELOCK) + path->uptodate = BTREE_ITER_UPTODATE; bch2_trans_verify_locks(trans); - return iter->uptodate < BTREE_ITER_NEED_RELOCK; + return path->uptodate < BTREE_ITER_NEED_RELOCK; } static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b, @@ -252,19 +261,20 @@ static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b, /* Slowpath: */ bool __bch2_btree_node_lock(struct btree_trans *trans, - struct btree_iter *iter, - struct btree *b, struct bpos pos, unsigned level, + struct btree_path *path, + struct btree *b, + struct bpos pos, unsigned level, enum six_lock_type type, six_lock_should_sleep_fn should_sleep_fn, void *p, unsigned long ip) { - struct btree_iter *linked, *deadlock_iter = NULL; + struct btree_path *linked, *deadlock_path = NULL; u64 start_time = local_clock(); unsigned reason = 9; bool ret; /* Check if it's safe to block: */ - trans_for_each_iter(trans, linked) { + trans_for_each_path(trans, linked) { if (!linked->nodes_locked) continue; @@ -282,25 +292,25 @@ bool __bch2_btree_node_lock(struct btree_trans *trans, */ if (type == SIX_LOCK_intent && linked->nodes_locked != linked->nodes_intent_locked) { - deadlock_iter = linked; + deadlock_path = linked; reason = 1; } - if (linked->btree_id != iter->btree_id) { - if (linked->btree_id > iter->btree_id) { - deadlock_iter = linked; + if (linked->btree_id != path->btree_id) { + if (linked->btree_id > path->btree_id) { + deadlock_path = linked; reason = 3; } continue; } /* - * Within the same btree, cached iterators come before non - * cached iterators: + * Within the same btree, cached paths come before non + * cached paths: */ - if (linked->cached != iter->cached) { - if (iter->cached) { - deadlock_iter = linked; + if (linked->cached != path->cached) { + if (path->cached) { + deadlock_path = linked; reason = 4; } continue; @@ -308,11 +318,11 @@ bool __bch2_btree_node_lock(struct btree_trans *trans, /* * Interior nodes must be locked before their descendants: if - * another iterator has possible descendants locked of the node + * another path has possible descendants locked of the node * we're about to lock, it must have the ancestors locked too: */ if (level > __fls(linked->nodes_locked)) { - deadlock_iter = linked; + deadlock_path = linked; reason = 5; } @@ -320,19 +330,19 @@ bool __bch2_btree_node_lock(struct btree_trans *trans, if (btree_node_locked(linked, level) && bpos_cmp(pos, btree_node_pos((void *) linked->l[level].b, linked->cached)) <= 0) { - deadlock_iter = linked; + deadlock_path = linked; reason = 7; } } - if (unlikely(deadlock_iter)) { + if (unlikely(deadlock_path)) { trace_trans_restart_would_deadlock(trans->ip, ip, trans->in_traverse_all, reason, - deadlock_iter->btree_id, - deadlock_iter->cached, - &deadlock_iter->real_pos, - iter->btree_id, - iter->cached, + deadlock_path->btree_id, + deadlock_path->cached, + &deadlock_path->pos, + path->btree_id, + path->cached, &pos); btree_trans_restart(trans); return false; @@ -342,9 +352,9 @@ bool __bch2_btree_node_lock(struct btree_trans *trans, return true; #ifdef CONFIG_BCACHEFS_DEBUG - trans->locking_iter_idx = iter->idx; + trans->locking_path_idx = path->idx; trans->locking_pos = pos; - trans->locking_btree_id = iter->btree_id; + trans->locking_btree_id = path->btree_id; trans->locking_level = level; trans->locking = b; #endif @@ -363,54 +373,57 @@ bool __bch2_btree_node_lock(struct btree_trans *trans, /* Btree iterator locking: */ #ifdef CONFIG_BCACHEFS_DEBUG -static void bch2_btree_iter_verify_locks(struct btree_iter *iter) + +static void bch2_btree_path_verify_locks(struct btree_path *path) { unsigned l; - for (l = 0; btree_iter_node(iter, l); l++) { - if (iter->uptodate >= BTREE_ITER_NEED_RELOCK && - !btree_node_locked(iter, l)) + for (l = 0; btree_path_node(path, l); l++) { + if (path->uptodate >= BTREE_ITER_NEED_RELOCK && + !btree_node_locked(path, l)) continue; - BUG_ON(btree_lock_want(iter, l) != - btree_node_locked_type(iter, l)); + BUG_ON(btree_lock_want(path, l) != + btree_node_locked_type(path, l)); } } void bch2_trans_verify_locks(struct btree_trans *trans) { - struct btree_iter *iter; + struct btree_path *path; - trans_for_each_iter(trans, iter) - bch2_btree_iter_verify_locks(iter); + trans_for_each_path(trans, path) + bch2_btree_path_verify_locks(path); } #else -static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {} +static inline void bch2_btree_path_verify_locks(struct btree_path *path) {} #endif +/* Btree path locking: */ + /* * Only for btree_cache.c - only relocks intent locks */ -bool bch2_btree_iter_relock_intent(struct btree_trans *trans, - struct btree_iter *iter) +bool bch2_btree_path_relock_intent(struct btree_trans *trans, + struct btree_path *path) { unsigned l; - for (l = iter->level; - l < iter->locks_want && btree_iter_node(iter, l); + for (l = path->level; + l < path->locks_want && btree_path_node(path, l); l++) { - if (!bch2_btree_node_relock(trans, iter, l)) { + if (!bch2_btree_node_relock(trans, path, l)) { trace_node_relock_fail(trans->ip, _RET_IP_, - iter->cached, - iter->btree_id, &iter->real_pos, - l, iter->l[l].lock_seq, - is_btree_node(iter, l) + path->cached, + path->btree_id, &path->pos, + l, path->l[l].lock_seq, + is_btree_node(path, l) ? 0 - : (unsigned long) iter->l[l].b, - is_btree_node(iter, l) - ? iter->l[l].b->c.lock.state.seq + : (unsigned long) path->l[l].b, + is_btree_node(path, l) + ? path->l[l].b->c.lock.state.seq : 0); - btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); btree_trans_restart(trans); return false; } @@ -420,27 +433,27 @@ bool bch2_btree_iter_relock_intent(struct btree_trans *trans, } __flatten -static bool bch2_btree_iter_relock(struct btree_trans *trans, - struct btree_iter *iter, unsigned long trace_ip) +static bool bch2_btree_path_relock(struct btree_trans *trans, + struct btree_path *path, unsigned long trace_ip) { - bool ret = btree_iter_get_locks(trans, iter, false, trace_ip); + bool ret = btree_path_get_locks(trans, path, false, trace_ip); if (!ret) btree_trans_restart(trans); return ret; } -bool __bch2_btree_iter_upgrade(struct btree_trans *trans, - struct btree_iter *iter, +bool __bch2_btree_path_upgrade(struct btree_trans *trans, + struct btree_path *path, unsigned new_locks_want) { - struct btree_iter *linked; + struct btree_path *linked; - EBUG_ON(iter->locks_want >= new_locks_want); + EBUG_ON(path->locks_want >= new_locks_want); - iter->locks_want = new_locks_want; + path->locks_want = new_locks_want; - if (btree_iter_get_locks(trans, iter, true, _THIS_IP_)) + if (btree_path_get_locks(trans, path, true, _THIS_IP_)) return true; /* @@ -448,7 +461,7 @@ bool __bch2_btree_iter_upgrade(struct btree_trans *trans, * iterators in the btree_trans here. * * On failure to upgrade the iterator, setting iter->locks_want and - * calling get_locks() is sufficient to make bch2_btree_iter_traverse() + * calling get_locks() is sufficient to make bch2_btree_path_traverse() * get the locks we want on transaction restart. * * But if this iterator was a clone, on transaction restart what we did @@ -460,75 +473,67 @@ bool __bch2_btree_iter_upgrade(struct btree_trans *trans, * * The code below used to be needed to ensure ancestor nodes get locked * before interior nodes - now that's handled by - * bch2_btree_iter_traverse_all(). + * bch2_btree_path_traverse_all(). */ - trans_for_each_iter(trans, linked) - if (linked != iter && - linked->cached == iter->cached && - linked->btree_id == iter->btree_id && + trans_for_each_path(trans, linked) + if (linked != path && + linked->cached == path->cached && + linked->btree_id == path->btree_id && linked->locks_want < new_locks_want) { linked->locks_want = new_locks_want; - btree_iter_get_locks(trans, linked, true, _THIS_IP_); + btree_path_get_locks(trans, linked, true, _THIS_IP_); } - if (iter->should_be_locked) - btree_trans_restart(trans); return false; } -void __bch2_btree_iter_downgrade(struct btree_iter *iter, +void __bch2_btree_path_downgrade(struct btree_path *path, unsigned new_locks_want) { unsigned l; - EBUG_ON(iter->locks_want < new_locks_want); + EBUG_ON(path->locks_want < new_locks_want); - iter->locks_want = new_locks_want; + path->locks_want = new_locks_want; - while (iter->nodes_locked && - (l = __fls(iter->nodes_locked)) >= iter->locks_want) { - if (l > iter->level) { - btree_node_unlock(iter, l); + while (path->nodes_locked && + (l = __fls(path->nodes_locked)) >= path->locks_want) { + if (l > path->level) { + btree_node_unlock(path, l); } else { - if (btree_node_intent_locked(iter, l)) { - six_lock_downgrade(&iter->l[l].b->c.lock); - iter->nodes_intent_locked ^= 1 << l; + if (btree_node_intent_locked(path, l)) { + six_lock_downgrade(&path->l[l].b->c.lock); + path->nodes_intent_locked ^= 1 << l; } break; } } - bch2_btree_iter_verify_locks(iter); + bch2_btree_path_verify_locks(path); } void bch2_trans_downgrade(struct btree_trans *trans) { - struct btree_iter *iter; + struct btree_path *path; - trans_for_each_iter(trans, iter) - bch2_btree_iter_downgrade(iter); + trans_for_each_path(trans, path) + bch2_btree_path_downgrade(path); } /* Btree transaction locking: */ -static inline bool btree_iter_should_be_locked(struct btree_iter *iter) -{ - return (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) || - iter->should_be_locked; -} - bool bch2_trans_relock(struct btree_trans *trans) { - struct btree_iter *iter; + struct btree_path *path; if (unlikely(trans->restarted)) return false; - trans_for_each_iter(trans, iter) - if (btree_iter_should_be_locked(iter) && - !bch2_btree_iter_relock(trans, iter, _RET_IP_)) { + trans_for_each_path(trans, path) + if (path->should_be_locked && + !bch2_btree_path_relock(trans, path, _RET_IP_)) { trace_trans_restart_relock(trans->ip, _RET_IP_, - iter->btree_id, &iter->real_pos); + path->btree_id, &path->pos); BUG_ON(!trans->restarted); return false; } @@ -537,37 +542,37 @@ bool bch2_trans_relock(struct btree_trans *trans) void bch2_trans_unlock(struct btree_trans *trans) { - struct btree_iter *iter; + struct btree_path *path; - trans_for_each_iter(trans, iter) - __bch2_btree_iter_unlock(iter); + trans_for_each_path(trans, path) + __bch2_btree_path_unlock(path); } /* Btree iterator: */ #ifdef CONFIG_BCACHEFS_DEBUG -static void bch2_btree_iter_verify_cached(struct btree_trans *trans, - struct btree_iter *iter) +static void bch2_btree_path_verify_cached(struct btree_trans *trans, + struct btree_path *path) { struct bkey_cached *ck; - bool locked = btree_node_locked(iter, 0); + bool locked = btree_node_locked(path, 0); - if (!bch2_btree_node_relock(trans, iter, 0)) + if (!bch2_btree_node_relock(trans, path, 0)) return; - ck = (void *) iter->l[0].b; - BUG_ON(ck->key.btree_id != iter->btree_id || - bkey_cmp(ck->key.pos, iter->pos)); + ck = (void *) path->l[0].b; + BUG_ON(ck->key.btree_id != path->btree_id || + bkey_cmp(ck->key.pos, path->pos)); if (!locked) - btree_node_unlock(iter, 0); + btree_node_unlock(path, 0); } -static void bch2_btree_iter_verify_level(struct btree_trans *trans, - struct btree_iter *iter, unsigned level) +static void bch2_btree_path_verify_level(struct btree_trans *trans, + struct btree_path *path, unsigned level) { - struct btree_iter_level *l; + struct btree_path_level *l; struct btree_node_iter tmp; bool locked; struct bkey_packed *p, *k; @@ -577,25 +582,23 @@ static void bch2_btree_iter_verify_level(struct btree_trans *trans, if (!bch2_debug_check_iterators) return; - l = &iter->l[level]; + l = &path->l[level]; tmp = l->iter; - locked = btree_node_locked(iter, level); + locked = btree_node_locked(path, level); - if (iter->cached) { + if (path->cached) { if (!level) - bch2_btree_iter_verify_cached(trans, iter); + bch2_btree_path_verify_cached(trans, path); return; } - BUG_ON(iter->level < iter->min_depth); - - if (!btree_iter_node(iter, level)) + if (!btree_path_node(path, level)) return; - if (!bch2_btree_node_relock(trans, iter, level)) + if (!bch2_btree_node_relock(trans, path, level)) return; - BUG_ON(!btree_iter_pos_in_node(iter, l->b)); + BUG_ON(!btree_path_pos_in_node(path, l->b)); bch2_btree_node_iter_verify(&l->iter, l->b); @@ -606,29 +609,29 @@ static void bch2_btree_iter_verify_level(struct btree_trans *trans, * For extents, the iterator may have skipped past deleted keys (but not * whiteouts) */ - p = level || btree_node_type_is_extents(iter->btree_id) + p = level || btree_node_type_is_extents(path->btree_id) ? bch2_btree_node_iter_prev(&tmp, l->b) : bch2_btree_node_iter_prev_all(&tmp, l->b); k = bch2_btree_node_iter_peek_all(&l->iter, l->b); - if (p && bkey_iter_pos_cmp(l->b, p, &iter->real_pos) >= 0) { + if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) { msg = "before"; goto err; } - if (k && bkey_iter_pos_cmp(l->b, k, &iter->real_pos) < 0) { + if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) { msg = "after"; goto err; } if (!locked) - btree_node_unlock(iter, level); + btree_node_unlock(path, level); return; err: strcpy(buf2, "(none)"); strcpy(buf3, "(none)"); - bch2_bpos_to_text(&PBUF(buf1), iter->real_pos); + bch2_bpos_to_text(&PBUF(buf1), path->pos); if (p) { struct bkey uk = bkey_unpack_key(l->b, p); @@ -640,20 +643,51 @@ err: bch2_bkey_to_text(&PBUF(buf3), &uk); } - panic("iterator should be %s key at level %u:\n" - "iter pos %s\n" + panic("path should be %s key at level %u:\n" + "path pos %s\n" "prev key %s\n" "cur key %s\n", msg, level, buf1, buf2, buf3); } -static void bch2_btree_iter_verify(struct btree_iter *iter) +static void bch2_btree_path_verify(struct btree_trans *trans, + struct btree_path *path) { - struct btree_trans *trans = iter->trans; struct bch_fs *c = trans->c; unsigned i; - EBUG_ON(iter->btree_id >= BTREE_ID_NR); + EBUG_ON(path->btree_id >= BTREE_ID_NR); + + for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) { + if (!path->l[i].b) { + BUG_ON(c->btree_roots[path->btree_id].b->c.level > i); + break; + } + + bch2_btree_path_verify_level(trans, path, i); + } + + bch2_btree_path_verify_locks(path); +} + +void bch2_trans_verify_paths(struct btree_trans *trans) +{ + struct btree_path *path; + + if (!bch2_debug_check_iterators) + return; + + trans_for_each_path(trans, path) + bch2_btree_path_verify(trans, path); +} + +static void bch2_btree_iter_verify(struct btree_iter *iter) +{ + struct btree_trans *trans = iter->trans; + + BUG_ON(iter->btree_id >= BTREE_ID_NR); + + BUG_ON(!!(iter->flags & BTREE_ITER_CACHED) != iter->path->cached); BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && iter->pos.snapshot != iter->snapshot); @@ -665,16 +699,7 @@ static void bch2_btree_iter_verify(struct btree_iter *iter) (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && !btree_type_has_snapshots(iter->btree_id)); - for (i = 0; i < (!iter->cached ? BTREE_MAX_DEPTH : 1); i++) { - if (!iter->l[i].b) { - BUG_ON(c->btree_roots[iter->btree_id].b->c.level > i); - break; - } - - bch2_btree_iter_verify_level(trans, iter, i); - } - - bch2_btree_iter_verify_locks(iter); + bch2_btree_path_verify(trans, iter->path); } static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) @@ -686,26 +711,19 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) bkey_cmp(iter->pos, iter->k.p) > 0); } -void bch2_trans_verify_iters(struct btree_trans *trans, struct btree *b) -{ - struct btree_iter *iter; - - if (!bch2_debug_check_iterators) - return; - - trans_for_each_iter_with_node(trans, b, iter) - bch2_btree_iter_verify_level(trans, iter, b->c.level); -} - #else -static inline void bch2_btree_iter_verify_level(struct btree_trans *trans, - struct btree_iter *iter, unsigned l) {} +static inline void bch2_btree_path_verify_level(struct btree_trans *trans, + struct btree_path *path, unsigned l) {} +static inline void bch2_btree_path_verify(struct btree_trans *trans, + struct btree_path *path) {} static inline void bch2_btree_iter_verify(struct btree_iter *iter) {} static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {} #endif +/* Btree path: fixups after btree updates */ + static void btree_node_iter_set_set_pos(struct btree_node_iter *iter, struct btree *b, struct bset_tree *t, @@ -723,38 +741,38 @@ static void btree_node_iter_set_set_pos(struct btree_node_iter *iter, bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t)); } -static void __bch2_btree_iter_fix_key_modified(struct btree_iter *iter, +static void __bch2_btree_path_fix_key_modified(struct btree_path *path, struct btree *b, struct bkey_packed *where) { - struct btree_iter_level *l = &iter->l[b->c.level]; + struct btree_path_level *l = &path->l[b->c.level]; if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b)) return; - if (bkey_iter_pos_cmp(l->b, where, &iter->real_pos) < 0) + if (bkey_iter_pos_cmp(l->b, where, &path->pos) < 0) bch2_btree_node_iter_advance(&l->iter, l->b); } -void bch2_btree_iter_fix_key_modified(struct btree_trans *trans, +void bch2_btree_path_fix_key_modified(struct btree_trans *trans, struct btree *b, struct bkey_packed *where) { - struct btree_iter *iter; + struct btree_path *path; - trans_for_each_iter_with_node(trans, b, iter) { - __bch2_btree_iter_fix_key_modified(iter, b, where); - bch2_btree_iter_verify_level(trans, iter, b->c.level); + trans_for_each_path_with_node(trans, b, path) { + __bch2_btree_path_fix_key_modified(path, b, where); + bch2_btree_path_verify_level(trans, path, b->c.level); } } -static void __bch2_btree_node_iter_fix(struct btree_iter *iter, - struct btree *b, - struct btree_node_iter *node_iter, - struct bset_tree *t, - struct bkey_packed *where, - unsigned clobber_u64s, - unsigned new_u64s) +static void __bch2_btree_node_iter_fix(struct btree_path *path, + struct btree *b, + struct btree_node_iter *node_iter, + struct bset_tree *t, + struct bkey_packed *where, + unsigned clobber_u64s, + unsigned new_u64s) { const struct bkey_packed *end = btree_bkey_last(b, t); struct btree_node_iter_set *set; @@ -772,7 +790,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter, /* didn't find the bset in the iterator - might have to readd it: */ if (new_u64s && - bkey_iter_pos_cmp(b, where, &iter->real_pos) >= 0) { + bkey_iter_pos_cmp(b, where, &path->pos) >= 0) { bch2_btree_node_iter_push(node_iter, b, where, end); goto fixup_done; } else { @@ -787,7 +805,7 @@ found: return; if (new_u64s && - bkey_iter_pos_cmp(b, where, &iter->real_pos) >= 0) { + bkey_iter_pos_cmp(b, where, &path->pos) >= 0) { set->k = offset; } else if (set->k < offset + clobber_u64s) { set->k = offset + new_u64s; @@ -814,7 +832,7 @@ fixup_done: if (!bch2_btree_node_iter_end(node_iter) && iter_current_key_modified && (b->c.level || - btree_node_type_is_extents(iter->btree_id))) { + btree_node_type_is_extents(path->btree_id))) { struct bset_tree *t; struct bkey_packed *k, *k2, *p; @@ -842,7 +860,7 @@ fixup_done: } void bch2_btree_node_iter_fix(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, struct btree *b, struct btree_node_iter *node_iter, struct bkey_packed *where, @@ -850,26 +868,28 @@ void bch2_btree_node_iter_fix(struct btree_trans *trans, unsigned new_u64s) { struct bset_tree *t = bch2_bkey_to_bset_inlined(b, where); - struct btree_iter *linked; + struct btree_path *linked; - if (node_iter != &iter->l[b->c.level].iter) { - __bch2_btree_node_iter_fix(iter, b, node_iter, t, + if (node_iter != &path->l[b->c.level].iter) { + __bch2_btree_node_iter_fix(path, b, node_iter, t, where, clobber_u64s, new_u64s); if (bch2_debug_check_iterators) bch2_btree_node_iter_verify(node_iter, b); } - trans_for_each_iter_with_node(trans, b, linked) { + trans_for_each_path_with_node(trans, b, linked) { __bch2_btree_node_iter_fix(linked, b, &linked->l[b->c.level].iter, t, where, clobber_u64s, new_u64s); - bch2_btree_iter_verify_level(trans, linked, b->c.level); + bch2_btree_path_verify_level(trans, linked, b->c.level); } } -static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter, - struct btree_iter_level *l, +/* Btree path level: pointer to a particular btree node and node iter */ + +static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c, + struct btree_path_level *l, struct bkey *u, struct bkey_packed *k) { @@ -894,50 +914,54 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter, * assertion here: */ if (bch2_debug_check_bkeys && !bkey_deleted(ret.k)) - bch2_bkey_debugcheck(iter->trans->c, l->b, ret); + bch2_bkey_debugcheck(c, l->b, ret); return ret; } -/* peek_all() doesn't skip deleted keys */ -static inline struct bkey_s_c btree_iter_level_peek_all(struct btree_iter *iter, - struct btree_iter_level *l) +static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c, + struct btree_path_level *l, + struct bkey *u) { - return __btree_iter_unpack(iter, l, &iter->k, + return __btree_iter_unpack(c, l, u, bch2_btree_node_iter_peek_all(&l->iter, l->b)); } -static inline struct bkey_s_c btree_iter_level_peek(struct btree_iter *iter, - struct btree_iter_level *l) +static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans, + struct btree_path *path, + struct btree_path_level *l, + struct bkey *u) { - struct bkey_s_c k = __btree_iter_unpack(iter, l, &iter->k, + struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u, bch2_btree_node_iter_peek(&l->iter, l->b)); - iter->real_pos = k.k ? k.k->p : l->b->key.k.p; - iter->trans->iters_sorted = false; + path->pos = k.k ? k.k->p : l->b->key.k.p; + trans->paths_sorted = false; return k; } -static inline struct bkey_s_c btree_iter_level_prev(struct btree_iter *iter, - struct btree_iter_level *l) +static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans, + struct btree_path *path, + struct btree_path_level *l, + struct bkey *u) { - struct bkey_s_c k = __btree_iter_unpack(iter, l, &iter->k, + struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u, bch2_btree_node_iter_prev(&l->iter, l->b)); - iter->real_pos = k.k ? k.k->p : l->b->data->min_key; - iter->trans->iters_sorted = false; + path->pos = k.k ? k.k->p : l->b->data->min_key; + trans->paths_sorted = false; return k; } -static inline bool btree_iter_advance_to_pos(struct btree_iter *iter, - struct btree_iter_level *l, +static inline bool btree_path_advance_to_pos(struct btree_path *path, + struct btree_path_level *l, int max_advance) { struct bkey_packed *k; int nr_advanced = 0; while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) && - bkey_iter_pos_cmp(l->b, k, &iter->real_pos) < 0) { + bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) { if (max_advance > 0 && nr_advanced >= max_advance) return false; @@ -951,10 +975,10 @@ static inline bool btree_iter_advance_to_pos(struct btree_iter *iter, /* * Verify that iterator for parent node points to child node: */ -static void btree_iter_verify_new_node(struct btree_trans *trans, - struct btree_iter *iter, struct btree *b) +static void btree_path_verify_new_node(struct btree_trans *trans, + struct btree_path *path, struct btree *b) { - struct btree_iter_level *l; + struct btree_path_level *l; unsigned plevel; bool parent_locked; struct bkey_packed *k; @@ -963,15 +987,15 @@ static void btree_iter_verify_new_node(struct btree_trans *trans, return; plevel = b->c.level + 1; - if (!btree_iter_node(iter, plevel)) + if (!btree_path_node(path, plevel)) return; - parent_locked = btree_node_locked(iter, plevel); + parent_locked = btree_node_locked(path, plevel); - if (!bch2_btree_node_relock(trans, iter, plevel)) + if (!bch2_btree_node_relock(trans, path, plevel)) return; - l = &iter->l[plevel]; + l = &path->l[plevel]; k = bch2_btree_node_iter_peek_all(&l->iter, l->b); if (!k || bkey_deleted(k) || @@ -983,7 +1007,7 @@ static void btree_iter_verify_new_node(struct btree_trans *trans, struct bkey uk = bkey_unpack_key(b, k); bch2_dump_btree_node(trans->c, l->b); - bch2_bpos_to_text(&PBUF(buf1), iter->real_pos); + bch2_bpos_to_text(&PBUF(buf1), path->pos); bch2_bkey_to_text(&PBUF(buf2), &uk); bch2_bpos_to_text(&PBUF(buf3), b->data->min_key); bch2_bpos_to_text(&PBUF(buf3), b->data->max_key); @@ -991,20 +1015,20 @@ static void btree_iter_verify_new_node(struct btree_trans *trans, "iter pos %s %s\n" "iter key %s\n" "new node %s-%s\n", - bch2_btree_ids[iter->btree_id], buf1, + bch2_btree_ids[path->btree_id], buf1, buf2, buf3, buf4); } if (!parent_locked) - btree_node_unlock(iter, b->c.level + 1); + btree_node_unlock(path, b->c.level + 1); } -static inline void __btree_iter_level_init(struct btree_iter *iter, +static inline void __btree_path_level_init(struct btree_path *path, unsigned level) { - struct btree_iter_level *l = &iter->l[level]; + struct btree_path_level *l = &path->l[level]; - bch2_btree_node_iter_init(&l->iter, l->b, &iter->real_pos); + bch2_btree_node_iter_init(&l->iter, l->b, &path->pos); /* * Iterators to interior nodes should always be pointed at the first non @@ -1014,22 +1038,24 @@ static inline void __btree_iter_level_init(struct btree_iter *iter, bch2_btree_node_iter_peek(&l->iter, l->b); } -static inline void btree_iter_level_init(struct btree_trans *trans, - struct btree_iter *iter, +static inline void btree_path_level_init(struct btree_trans *trans, + struct btree_path *path, struct btree *b) { - BUG_ON(iter->cached); + BUG_ON(path->cached); - btree_iter_verify_new_node(trans, iter, b); + btree_path_verify_new_node(trans, path, b); - EBUG_ON(!btree_iter_pos_in_node(iter, b)); + EBUG_ON(!btree_path_pos_in_node(path, b)); EBUG_ON(b->c.lock.state.seq & 1); - iter->l[b->c.level].lock_seq = b->c.lock.state.seq; - iter->l[b->c.level].b = b; - __btree_iter_level_init(iter, b->c.level); + path->l[b->c.level].lock_seq = b->c.lock.state.seq; + path->l[b->c.level].b = b; + __btree_path_level_init(path, b->c.level); } +/* Btree path: fixups after btree node updates: */ + /* * A btree node is being replaced - update the iterator to point to the new * node: @@ -1037,37 +1063,37 @@ static inline void btree_iter_level_init(struct btree_trans *trans, void bch2_trans_node_add(struct btree_trans *trans, struct btree *b) { enum btree_node_locked_type t; - struct btree_iter *iter; + struct btree_path *path; - trans_for_each_iter(trans, iter) - if (!iter->cached && - btree_iter_pos_in_node(iter, b)) { + trans_for_each_path(trans, path) + if (!path->cached && + btree_path_pos_in_node(path, b)) { /* - * bch2_trans_node_drop() has already been called - + * bch2_btree_path_node_drop() has already been called - * the old node we're replacing has already been * unlocked and the pointer invalidated */ - BUG_ON(btree_node_locked(iter, b->c.level)); + BUG_ON(btree_node_locked(path, b->c.level)); - t = btree_lock_want(iter, b->c.level); + t = btree_lock_want(path, b->c.level); if (t != BTREE_NODE_UNLOCKED) { six_lock_increment(&b->c.lock, (enum six_lock_type) t); - mark_btree_node_locked(iter, b->c.level, (enum six_lock_type) t); + mark_btree_node_locked(path, b->c.level, (enum six_lock_type) t); } - btree_iter_level_init(trans, iter, b); + btree_path_level_init(trans, path, b); } } void bch2_trans_node_drop(struct btree_trans *trans, struct btree *b) { - struct btree_iter *iter; + struct btree_path *path; unsigned level = b->c.level; - trans_for_each_iter(trans, iter) - if (iter->l[level].b == b) { - btree_node_unlock(iter, level); - iter->l[level].b = BTREE_ITER_NO_NODE_DROP; + trans_for_each_path(trans, path) + if (path->l[level].b == b) { + btree_node_unlock(path, level); + path->l[level].b = BTREE_ITER_NO_NODE_DROP; } } @@ -1077,12 +1103,14 @@ void bch2_trans_node_drop(struct btree_trans *trans, struct btree *b) */ void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b) { - struct btree_iter *iter; + struct btree_path *path; - trans_for_each_iter_with_node(trans, b, iter) - __btree_iter_level_init(iter, b->c.level); + trans_for_each_path_with_node(trans, b, path) + __btree_path_level_init(path, b->c.level); } +/* Btree path: traverse, set_pos: */ + static int lock_root_check_fn(struct six_lock *lock, void *p) { struct btree *b = container_of(lock, struct btree, c.lock); @@ -1091,38 +1119,38 @@ static int lock_root_check_fn(struct six_lock *lock, void *p) return b == *rootp ? 0 : -1; } -static inline int btree_iter_lock_root(struct btree_trans *trans, - struct btree_iter *iter, +static inline int btree_path_lock_root(struct btree_trans *trans, + struct btree_path *path, unsigned depth_want, unsigned long trace_ip) { struct bch_fs *c = trans->c; - struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b; + struct btree *b, **rootp = &c->btree_roots[path->btree_id].b; enum six_lock_type lock_type; unsigned i; - EBUG_ON(iter->nodes_locked); + EBUG_ON(path->nodes_locked); while (1) { b = READ_ONCE(*rootp); - iter->level = READ_ONCE(b->c.level); + path->level = READ_ONCE(b->c.level); - if (unlikely(iter->level < depth_want)) { + if (unlikely(path->level < depth_want)) { /* * the root is at a lower depth than the depth we want: * got to the end of the btree, or we're walking nodes * greater than some depth and there are no nodes >= * that depth */ - iter->level = depth_want; - for (i = iter->level; i < BTREE_MAX_DEPTH; i++) - iter->l[i].b = NULL; + path->level = depth_want; + for (i = path->level; i < BTREE_MAX_DEPTH; i++) + path->l[i].b = NULL; return 1; } - lock_type = __btree_lock_want(iter, iter->level); - if (unlikely(!btree_node_lock(trans, iter, b, SPOS_MAX, - iter->level, lock_type, + lock_type = __btree_lock_want(path, path->level); + if (unlikely(!btree_node_lock(trans, path, b, SPOS_MAX, + path->level, lock_type, lock_root_check_fn, rootp, trace_ip))) { if (trans->restarted) @@ -1131,16 +1159,16 @@ static inline int btree_iter_lock_root(struct btree_trans *trans, } if (likely(b == READ_ONCE(*rootp) && - b->c.level == iter->level && + b->c.level == path->level && !race_fault())) { - for (i = 0; i < iter->level; i++) - iter->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT; - iter->l[iter->level].b = b; - for (i = iter->level + 1; i < BTREE_MAX_DEPTH; i++) - iter->l[i].b = NULL; - - mark_btree_node_locked(iter, iter->level, lock_type); - btree_iter_level_init(trans, iter, b); + for (i = 0; i < path->level; i++) + path->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT; + path->l[path->level].b = b; + for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++) + path->l[i].b = NULL; + + mark_btree_node_locked(path, path->level, lock_type); + btree_path_level_init(trans, path, b); return 0; } @@ -1149,23 +1177,23 @@ static inline int btree_iter_lock_root(struct btree_trans *trans, } noinline -static int btree_iter_prefetch(struct btree_trans *trans, struct btree_iter *iter) +static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *path) { struct bch_fs *c = trans->c; - struct btree_iter_level *l = &iter->l[iter->level]; + struct btree_path_level *l = path_l(path); struct btree_node_iter node_iter = l->iter; struct bkey_packed *k; struct bkey_buf tmp; unsigned nr = test_bit(BCH_FS_STARTED, &c->flags) - ? (iter->level > 1 ? 0 : 2) - : (iter->level > 1 ? 1 : 16); - bool was_locked = btree_node_locked(iter, iter->level); + ? (path->level > 1 ? 0 : 2) + : (path->level > 1 ? 1 : 16); + bool was_locked = btree_node_locked(path, path->level); int ret = 0; bch2_bkey_buf_init(&tmp); while (nr && !ret) { - if (!bch2_btree_node_relock(trans, iter, iter->level)) + if (!bch2_btree_node_relock(trans, path, path->level)) break; bch2_btree_node_iter_advance(&node_iter, l->b); @@ -1174,27 +1202,27 @@ static int btree_iter_prefetch(struct btree_trans *trans, struct btree_iter *ite break; bch2_bkey_buf_unpack(&tmp, c, l->b, k); - ret = bch2_btree_node_prefetch(c, trans, iter, tmp.k, - iter->btree_id, iter->level - 1); + ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id, + path->level - 1); } if (!was_locked) - btree_node_unlock(iter, iter->level); + btree_node_unlock(path, path->level); bch2_bkey_buf_exit(&tmp, c); return ret; } static noinline void btree_node_mem_ptr_set(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, unsigned plevel, struct btree *b) { - struct btree_iter_level *l = &iter->l[plevel]; - bool locked = btree_node_locked(iter, plevel); + struct btree_path_level *l = &path->l[plevel]; + bool locked = btree_node_locked(path, plevel); struct bkey_packed *k; struct bch_btree_ptr_v2 *bp; - if (!bch2_btree_node_relock(trans, iter, plevel)) + if (!bch2_btree_node_relock(trans, path, plevel)) return; k = bch2_btree_node_iter_peek_all(&l->iter, l->b); @@ -1204,60 +1232,61 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans, bp->mem_ptr = (unsigned long)b; if (!locked) - btree_node_unlock(iter, plevel); + btree_node_unlock(path, plevel); } -static __always_inline int btree_iter_down(struct btree_trans *trans, - struct btree_iter *iter, +static __always_inline int btree_path_down(struct btree_trans *trans, + struct btree_path *path, + unsigned flags, unsigned long trace_ip) { struct bch_fs *c = trans->c; - struct btree_iter_level *l = &iter->l[iter->level]; + struct btree_path_level *l = path_l(path); struct btree *b; - unsigned level = iter->level - 1; - enum six_lock_type lock_type = __btree_lock_want(iter, level); + unsigned level = path->level - 1; + enum six_lock_type lock_type = __btree_lock_want(path, level); struct bkey_buf tmp; int ret; - EBUG_ON(!btree_node_locked(iter, iter->level)); + EBUG_ON(!btree_node_locked(path, path->level)); bch2_bkey_buf_init(&tmp); bch2_bkey_buf_unpack(&tmp, c, l->b, bch2_btree_node_iter_peek(&l->iter, l->b)); - b = bch2_btree_node_get(trans, iter, tmp.k, level, lock_type, trace_ip); + b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip); ret = PTR_ERR_OR_ZERO(b); if (unlikely(ret)) goto err; - mark_btree_node_locked(iter, level, lock_type); - btree_iter_level_init(trans, iter, b); + mark_btree_node_locked(path, level, lock_type); + btree_path_level_init(trans, path, b); if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 && unlikely(b != btree_node_mem_ptr(tmp.k))) - btree_node_mem_ptr_set(trans, iter, level + 1, b); + btree_node_mem_ptr_set(trans, path, level + 1, b); - if (iter->flags & BTREE_ITER_PREFETCH) - ret = btree_iter_prefetch(trans, iter); + if (flags & BTREE_ITER_PREFETCH) + ret = btree_path_prefetch(trans, path); - if (btree_node_read_locked(iter, level + 1)) - btree_node_unlock(iter, level + 1); - iter->level = level; + if (btree_node_read_locked(path, level + 1)) + btree_node_unlock(path, level + 1); + path->level = level; - bch2_btree_iter_verify_locks(iter); + bch2_btree_path_verify_locks(path); err: bch2_bkey_buf_exit(&tmp, c); return ret; } -static int btree_iter_traverse_one(struct btree_trans *, - struct btree_iter *, unsigned long); +static int btree_path_traverse_one(struct btree_trans *, struct btree_path *, + unsigned, unsigned long); -static int __btree_iter_traverse_all(struct btree_trans *trans, int ret, +static int __btree_path_traverse_all(struct btree_trans *trans, int ret, unsigned long trace_ip) { struct bch_fs *c = trans->c; - struct btree_iter *iter, *prev = NULL; + struct btree_path *path, *prev = NULL; int i; if (trans->in_traverse_all) @@ -1267,21 +1296,21 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret, retry_all: trans->restarted = false; - trans_for_each_iter(trans, iter) - iter->should_be_locked = false; + trans_for_each_path(trans, path) + path->should_be_locked = false; - btree_trans_sort_iters(trans); + btree_trans_sort_paths(trans); - trans_for_each_iter_inorder_reverse(trans, iter, i) { + trans_for_each_path_inorder_reverse(trans, path, i) { if (prev) { - if (iter->btree_id == prev->btree_id && - iter->locks_want < prev->locks_want) - __bch2_btree_iter_upgrade(trans, iter, prev->locks_want); - else if (!iter->locks_want && prev->locks_want) - __bch2_btree_iter_upgrade(trans, iter, 1); + if (path->btree_id == prev->btree_id && + path->locks_want < prev->locks_want) + __bch2_btree_path_upgrade(trans, path, prev->locks_want); + else if (!path->locks_want && prev->locks_want) + __bch2_btree_path_upgrade(trans, path, 1); } - prev = iter; + prev = path; } bch2_trans_unlock(trans); @@ -1308,27 +1337,27 @@ retry_all: /* Now, redo traversals in correct order: */ i = 0; while (i < trans->nr_sorted) { - iter = trans->iters + trans->sorted[i]; + path = trans->paths + trans->sorted[i]; - EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx))); + EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); - ret = btree_iter_traverse_one(trans, iter, _THIS_IP_); + ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_); if (ret) goto retry_all; - EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx))); + EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); - if (iter->nodes_locked) + if (path->nodes_locked) i++; } /* * BTREE_ITER_NEED_RELOCK is ok here - if we called bch2_trans_unlock() - * and relock(), relock() won't relock since iter->should_be_locked + * and relock(), relock() won't relock since path->should_be_locked * isn't set yet, which is all fine */ - trans_for_each_iter(trans, iter) - BUG_ON(iter->uptodate >= BTREE_ITER_NEED_TRAVERSE); + trans_for_each_path(trans, path) + BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE); out: bch2_btree_cache_cannibalize_unlock(c); @@ -1338,36 +1367,36 @@ out: return ret; } -static int bch2_btree_iter_traverse_all(struct btree_trans *trans) +static int bch2_btree_path_traverse_all(struct btree_trans *trans) { - return __btree_iter_traverse_all(trans, 0, _RET_IP_); + return __btree_path_traverse_all(trans, 0, _RET_IP_); } -static inline bool btree_iter_good_node(struct btree_trans *trans, - struct btree_iter *iter, +static inline bool btree_path_good_node(struct btree_trans *trans, + struct btree_path *path, unsigned l, int check_pos) { - if (!is_btree_node(iter, l) || - !bch2_btree_node_relock(trans, iter, l)) + if (!is_btree_node(path, l) || + !bch2_btree_node_relock(trans, path, l)) return false; - if (check_pos < 0 && btree_iter_pos_before_node(iter, iter->l[l].b)) + if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b)) return false; - if (check_pos > 0 && btree_iter_pos_after_node(iter, iter->l[l].b)) + if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b)) return false; return true; } -static inline unsigned btree_iter_up_until_good_node(struct btree_trans *trans, - struct btree_iter *iter, +static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans, + struct btree_path *path, int check_pos) { - unsigned l = iter->level; + unsigned l = path->level; - while (btree_iter_node(iter, l) && - !btree_iter_good_node(trans, iter, l, check_pos)) { - btree_node_unlock(iter, l); - iter->l[l].b = BTREE_ITER_NO_NODE_UP; + while (btree_path_node(path, l) && + !btree_path_good_node(trans, path, l, check_pos)) { + btree_node_unlock(path, l); + path->l[l].b = BTREE_ITER_NO_NODE_UP; l++; } @@ -1383,53 +1412,54 @@ static inline unsigned btree_iter_up_until_good_node(struct btree_trans *trans, * On error, caller (peek_node()/peek_key()) must return NULL; the error is * stashed in the iterator and returned from bch2_trans_exit(). */ -static int btree_iter_traverse_one(struct btree_trans *trans, - struct btree_iter *iter, +static int btree_path_traverse_one(struct btree_trans *trans, + struct btree_path *path, + unsigned flags, unsigned long trace_ip) { - unsigned l, depth_want = iter->level; + unsigned l, depth_want = path->level; int ret = 0; /* - * Ensure we obey iter->should_be_locked: if it's set, we can't unlock - * and re-traverse the iterator without a transaction restart: + * Ensure we obey path->should_be_locked: if it's set, we can't unlock + * and re-traverse the path without a transaction restart: */ - if (iter->should_be_locked) { - ret = bch2_btree_iter_relock(trans, iter, trace_ip) ? 0 : -EINTR; + if (path->should_be_locked) { + ret = bch2_btree_path_relock(trans, path, trace_ip) ? 0 : -EINTR; goto out; } - if (iter->cached) { - ret = bch2_btree_iter_traverse_cached(trans, iter); + if (path->cached) { + ret = bch2_btree_path_traverse_cached(trans, path, flags); goto out; } - if (unlikely(iter->level >= BTREE_MAX_DEPTH)) + if (unlikely(path->level >= BTREE_MAX_DEPTH)) goto out; - iter->level = btree_iter_up_until_good_node(trans, iter, 0); + path->level = btree_path_up_until_good_node(trans, path, 0); /* If we need intent locks, take them too: */ - for (l = iter->level + 1; - l < iter->locks_want && btree_iter_node(iter, l); + for (l = path->level + 1; + l < path->locks_want && btree_path_node(path, l); l++) - if (!bch2_btree_node_relock(trans, iter, l)) - while (iter->level <= l) { - btree_node_unlock(iter, iter->level); - iter->l[iter->level].b = BTREE_ITER_NO_NODE_UP; - iter->level++; + if (!bch2_btree_node_relock(trans, path, l)) + while (path->level <= l) { + btree_node_unlock(path, path->level); + path->l[path->level].b = BTREE_ITER_NO_NODE_UP; + path->level++; } /* - * Note: iter->nodes[iter->level] may be temporarily NULL here - that + * Note: path->nodes[path->level] may be temporarily NULL here - that * would indicate to other code that we got to the end of the btree, * here it indicates that relocking the root failed - it's critical that - * btree_iter_lock_root() comes next and that it can't fail + * btree_path_lock_root() comes next and that it can't fail */ - while (iter->level > depth_want) { - ret = btree_iter_node(iter, iter->level) - ? btree_iter_down(trans, iter, trace_ip) - : btree_iter_lock_root(trans, iter, depth_want, trace_ip); + while (path->level > depth_want) { + ret = btree_path_node(path, path->level) + ? btree_path_down(trans, path, flags, trace_ip) + : btree_path_lock_root(trans, path, depth_want, trace_ip); if (unlikely(ret)) { if (ret == 1) { /* @@ -1440,74 +1470,405 @@ static int btree_iter_traverse_one(struct btree_trans *trans, goto out; } - __bch2_btree_iter_unlock(iter); - iter->level = depth_want; + __bch2_btree_path_unlock(path); + path->level = depth_want; - if (ret == -EIO) { - iter->flags |= BTREE_ITER_ERROR; - iter->l[iter->level].b = + if (ret == -EIO) + path->l[path->level].b = BTREE_ITER_NO_NODE_ERROR; - } else { - iter->l[iter->level].b = + else + path->l[path->level].b = BTREE_ITER_NO_NODE_DOWN; - } goto out; } } - iter->uptodate = BTREE_ITER_UPTODATE; + path->uptodate = BTREE_ITER_UPTODATE; out: BUG_ON((ret == -EINTR) != !!trans->restarted); trace_iter_traverse(trans->ip, trace_ip, - iter->cached, - iter->btree_id, &iter->real_pos, ret); - bch2_btree_iter_verify(iter); + path->cached, + path->btree_id, &path->pos, ret); + bch2_btree_path_verify(trans, path); return ret; } -static int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) +static int __btree_path_traverse_all(struct btree_trans *, int, unsigned long); + +int __must_check bch2_btree_path_traverse(struct btree_trans *trans, + struct btree_path *path, unsigned flags) { - struct btree_trans *trans = iter->trans; int ret; + if (path->uptodate < BTREE_ITER_NEED_RELOCK) + return 0; + ret = bch2_trans_cond_resched(trans) ?: - btree_iter_traverse_one(trans, iter, _RET_IP_); - if (unlikely(ret) && hweight64(trans->iters_linked) == 1) { - ret = __btree_iter_traverse_all(trans, ret, _RET_IP_); + btree_path_traverse_one(trans, path, flags, _RET_IP_); + if (unlikely(ret) && hweight64(trans->paths_allocated) == 1) { + ret = __btree_path_traverse_all(trans, ret, _RET_IP_); BUG_ON(ret == -EINTR); } return ret; } -/* - * Note: - * bch2_btree_iter_traverse() is for external users, btree_iter_traverse() is - * for internal btree iterator users - * - * bch2_btree_iter_traverse sets iter->real_pos to iter->pos, - * btree_iter_traverse() does not: - */ -static inline int __must_check -btree_iter_traverse(struct btree_iter *iter) +static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst, + struct btree_path *src) +{ + unsigned i, offset = offsetof(struct btree_path, pos); + + memcpy((void *) dst + offset, + (void *) src + offset, + sizeof(struct btree_path) - offset); + + for (i = 0; i < BTREE_MAX_DEPTH; i++) + if (btree_node_locked(dst, i)) + six_lock_increment(&dst->l[i].b->c.lock, + __btree_lock_want(dst, i)); + + trans->paths_sorted = false; +} + +struct btree_path * __must_check +__bch2_btree_path_make_mut(struct btree_trans *trans, + struct btree_path *path, bool intent) +{ + struct btree_path *new = btree_path_alloc(trans, path); + + btree_path_copy(trans, new, path); + __btree_path_get(new, intent); + __btree_path_put(path, intent); + path = new; + path->preserve = false; +#ifdef CONFIG_BCACHEFS_DEBUG + path->ip_allocated = _RET_IP_; +#endif + return path; +} + +static struct btree_path * __must_check +__bch2_btree_path_set_pos(struct btree_trans *trans, + struct btree_path *path, struct bpos new_pos, + bool intent, int cmp) +{ +#ifdef CONFIG_BCACHEFS_DEBUG + struct bpos old_pos = path->pos; +#endif + unsigned l = path->level; + + EBUG_ON(trans->restarted); + EBUG_ON(!path->ref); + + path = bch2_btree_path_make_mut(trans, path, intent); + + path->pos = new_pos; + path->should_be_locked = false; + trans->paths_sorted = false; + + if (unlikely(path->cached)) { + btree_node_unlock(path, 0); + path->l[0].b = BTREE_ITER_NO_NODE_CACHED; + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); + goto out; + } + + l = btree_path_up_until_good_node(trans, path, cmp); + + if (btree_path_node(path, l)) { + /* + * We might have to skip over many keys, or just a few: try + * advancing the node iterator, and if we have to skip over too + * many keys just reinit it (or if we're rewinding, since that + * is expensive). + */ + if (cmp < 0 || + !btree_path_advance_to_pos(path, &path->l[l], 8)) + __btree_path_level_init(path, l); + + /* Don't leave it locked if we're not supposed to: */ + if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED) + btree_node_unlock(path, l); + } + + if (l != path->level) + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); +out: + bch2_btree_path_verify(trans, path); +#ifdef CONFIG_BCACHEFS_DEBUG + trace_path_set_pos(trans->ip, _RET_IP_, path->btree_id, + &old_pos, &new_pos, l); +#endif + return path; +} + +static inline struct btree_path * __must_check +btree_path_set_pos(struct btree_trans *trans, + struct btree_path *path, struct bpos new_pos, + bool intent) +{ + int cmp = bpos_cmp(new_pos, path->pos); + + return cmp + ? __bch2_btree_path_set_pos(trans, path, new_pos, intent, cmp) + : path; +} + +/* Btree path: main interface: */ + +static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path) +{ + struct btree_path *next; + + next = prev_btree_path(trans, path); + if (next && !btree_path_cmp(next, path)) + return next; + + next = next_btree_path(trans, path); + if (next && !btree_path_cmp(next, path)) + return next; + + return NULL; +} + +static bool have_node_at_pos(struct btree_trans *trans, struct btree_path *path) +{ + struct btree_path *next; + + next = prev_btree_path(trans, path); + if (next && path_l(next)->b == path_l(path)->b) + return true; + + next = next_btree_path(trans, path); + if (next && path_l(next)->b == path_l(path)->b) + return true; + + return false; +} + +static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path) { - return iter->uptodate >= BTREE_ITER_NEED_RELOCK - ? __bch2_btree_iter_traverse(iter) - : 0; + __bch2_btree_path_unlock(path); + btree_path_list_remove(trans, path); + trans->paths_allocated &= ~(1ULL << path->idx); } +void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool intent) +{ + struct btree_path *dup; + + EBUG_ON(trans->paths + path->idx != path); + EBUG_ON(!path->ref); + + if (!__btree_path_put(path, intent)) + return; + + /* + * Perhaps instead we should check for duplicate paths in traverse_all: + */ + if (path->preserve && + (dup = have_path_at_pos(trans, path))) { + dup->preserve = true; + path->preserve = false; + } + + if (!path->preserve && + have_node_at_pos(trans, path)) + __bch2_path_free(trans, path); +} + +noinline __cold +void bch2_dump_trans_paths_updates(struct btree_trans *trans) +{ + struct btree_path *path; + struct btree_insert_entry *i; + unsigned idx; + char buf[300]; + + btree_trans_sort_paths(trans); + + trans_for_each_path_inorder(trans, path, idx) + printk(KERN_ERR "path: idx %u ref %u:%u%s btree %s pos %s %pS\n", + path->idx, path->ref, path->intent_ref, + path->preserve ? " preserve" : "", + bch2_btree_ids[path->btree_id], + (bch2_bpos_to_text(&PBUF(buf), path->pos), buf), +#ifdef CONFIG_BCACHEFS_DEBUG + (void *) path->ip_allocated +#else + NULL +#endif + ); + + trans_for_each_update(trans, i) + printk(KERN_ERR "update: btree %s %s %pS\n", + bch2_btree_ids[i->btree_id], + (bch2_bkey_val_to_text(&PBUF(buf), trans->c, bkey_i_to_s_c(i->k)), buf), + (void *) i->ip_allocated); +} + +static struct btree_path *btree_path_alloc(struct btree_trans *trans, + struct btree_path *pos) +{ + struct btree_path *path; + unsigned idx; + + if (unlikely(trans->paths_allocated == + ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) { + bch2_dump_trans_paths_updates(trans); + panic("trans path oveflow\n"); + } + + idx = __ffs64(~trans->paths_allocated); + trans->paths_allocated |= 1ULL << idx; + + path = &trans->paths[idx]; + + path->idx = idx; + path->ref = 0; + path->intent_ref = 0; + path->nodes_locked = 0; + path->nodes_intent_locked = 0; + + btree_path_list_add(trans, pos, path); + return path; +} + +struct btree_path *bch2_path_get(struct btree_trans *trans, bool cached, + enum btree_id btree_id, struct bpos pos, + unsigned locks_want, unsigned level, + bool intent) +{ + struct btree_path *path, *best = NULL; + struct bpos pos_min = POS_MIN; + int i; + + BUG_ON(trans->restarted); + + trans_for_each_path(trans, path) { + if (path->cached != cached || + path->btree_id != btree_id || + path->level != level) + continue; + + if (best) { + int cmp = bkey_cmp(bpos_diff(best->pos, pos), + bpos_diff(path->pos, pos)); + + if (cmp < 0 || + ((cmp == 0 && (path->ref || path->preserve)))) + continue; + } + + best = path; + } + + if (best) { + __btree_path_get(best, intent); + path = btree_path_set_pos(trans, best, pos, intent); + path->preserve = true; + } else { + path = btree_path_alloc(trans, NULL); + + __btree_path_get(path, intent); + path->pos = pos; + path->btree_id = btree_id; + path->cached = cached; + path->preserve = true; + path->uptodate = BTREE_ITER_NEED_TRAVERSE; + path->should_be_locked = false; + path->level = level; + path->locks_want = locks_want; + path->nodes_locked = 0; + path->nodes_intent_locked = 0; + for (i = 0; i < ARRAY_SIZE(path->l); i++) + path->l[i].b = BTREE_ITER_NO_NODE_INIT; +#ifdef CONFIG_BCACHEFS_DEBUG + path->ip_allocated = _RET_IP_; +#endif + trans->paths_sorted = false; + } + + if (path->intent_ref) + locks_want = max(locks_want, level + 1); + + /* + * If the path has locks_want greater than requested, we don't downgrade + * it here - on transaction restart because btree node split needs to + * upgrade locks, we might be putting/getting the iterator again. + * Downgrading iterators only happens via bch2_trans_downgrade(), after + * a successful transaction commit. + */ + + locks_want = min(locks_want, BTREE_MAX_DEPTH); + if (locks_want > path->locks_want) { + path->locks_want = locks_want; + btree_path_get_locks(trans, path, true, _THIS_IP_); + } + + trace_trans_get_path(_RET_IP_, trans->ip, btree_id, + &pos, locks_want, path->uptodate, + best ? &best->pos : &pos_min, + best ? best->locks_want : U8_MAX, + best ? best->uptodate : U8_MAX); + + return path; +} + +inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u) +{ + + struct bkey_s_c k; + + BUG_ON(path->uptodate != BTREE_ITER_UPTODATE); + + if (!path->cached) { + struct btree_path_level *l = path_l(path); + struct bkey_packed *_k = + bch2_btree_node_iter_peek_all(&l->iter, l->b); + + k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null; + + EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0); + + if (!k.k || bpos_cmp(path->pos, k.k->p)) + goto hole; + } else { + struct bkey_cached *ck = (void *) path->l[0].b; + + EBUG_ON(path->btree_id != ck->key.btree_id || + bkey_cmp(path->pos, ck->key.pos)); + + /* BTREE_ITER_CACHED_NOFILL? */ + if (unlikely(!ck->valid)) + goto hole; + + k = bkey_i_to_s_c(ck->k); + } + + return k; +hole: + bkey_init(u); + u->p = path->pos; + return (struct bkey_s_c) { u, NULL }; +} + +/* Btree iterators: */ + int __must_check bch2_btree_iter_traverse(struct btree_iter *iter) { int ret; - btree_iter_set_search_pos(iter, btree_iter_search_key(iter)); + iter->path = btree_path_set_pos(iter->trans, iter->path, + btree_iter_search_key(iter), + iter->flags & BTREE_ITER_INTENT); - ret = btree_iter_traverse(iter); + ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); if (ret) return ret; - iter->should_be_locked = true; + iter->path->should_be_locked = true; return 0; } @@ -1518,23 +1879,22 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) struct btree *b = NULL; int ret; - EBUG_ON(iter->cached); + EBUG_ON(iter->path->cached); bch2_btree_iter_verify(iter); - ret = btree_iter_traverse(iter); + ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); if (ret) goto out; - b = btree_iter_node(iter, iter->level); + b = btree_path_node(iter->path, iter->path->level); if (!b) goto out; BUG_ON(bpos_cmp(b->key.k.p, iter->pos) < 0); bkey_init(&iter->k); - iter->k.p = iter->pos = iter->real_pos = b->key.k.p; - iter->trans->iters_sorted = false; - iter->should_be_locked = true; + iter->k.p = iter->pos = b->key.k.p; + iter->path->should_be_locked = true; out: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); @@ -1544,29 +1904,31 @@ out: struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) { + struct btree_trans *trans = iter->trans; + struct btree_path *path = iter->path; struct btree *b = NULL; int ret; - EBUG_ON(iter->cached); + EBUG_ON(iter->path->cached); bch2_btree_iter_verify(iter); /* already got to end? */ - if (!btree_iter_node(iter, iter->level)) + if (!btree_path_node(path, path->level)) goto out; - bch2_trans_cond_resched(iter->trans); + bch2_trans_cond_resched(trans); - btree_node_unlock(iter, iter->level); - iter->l[iter->level].b = BTREE_ITER_NO_NODE_UP; - iter->level++; + btree_node_unlock(path, path->level); + path->l[path->level].b = BTREE_ITER_NO_NODE_UP; + path->level++; - btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); - ret = btree_iter_traverse(iter); + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); + ret = bch2_btree_path_traverse(trans, path, iter->flags); if (ret) goto out; /* got to end? */ - b = btree_iter_node(iter, iter->level); + b = btree_path_node(path, path->level); if (!b) goto out; @@ -1575,28 +1937,29 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) * Haven't gotten to the end of the parent node: go back down to * the next child node */ - btree_iter_set_search_pos(iter, bpos_successor(iter->pos)); + path = iter->path = + btree_path_set_pos(trans, path, bpos_successor(iter->pos), + iter->flags & BTREE_ITER_INTENT); /* Unlock to avoid screwing up our lock invariants: */ - btree_node_unlock(iter, iter->level); + btree_node_unlock(path, path->level); - iter->level = iter->min_depth; - btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); + path->level = iter->min_depth; + btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); bch2_btree_iter_verify(iter); - ret = btree_iter_traverse(iter); + ret = bch2_btree_path_traverse(trans, path, iter->flags); if (ret) { b = NULL; goto out; } - b = iter->l[iter->level].b; + b = path->l[path->level].b; } bkey_init(&iter->k); - iter->k.p = iter->pos = iter->real_pos = b->key.k.p; - iter->trans->iters_sorted = false; - iter->should_be_locked = true; + iter->k.p = iter->pos = b->key.k.p; + iter->path->should_be_locked = true; out: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); @@ -1606,60 +1969,6 @@ out: /* Iterate across keys (in leaf nodes only) */ -static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_pos) -{ - struct btree_trans *trans = iter->trans; -#ifdef CONFIG_BCACHEFS_DEBUG - struct bpos old_pos = iter->real_pos; -#endif - int cmp = bpos_cmp(new_pos, iter->real_pos); - unsigned l = iter->level; - - EBUG_ON(trans->restarted); - - if (!cmp) - goto out; - - iter->real_pos = new_pos; - iter->should_be_locked = false; - trans->iters_sorted = false; - - if (unlikely(iter->cached)) { - btree_node_unlock(iter, 0); - iter->l[0].b = BTREE_ITER_NO_NODE_CACHED; - btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); - return; - } - - l = btree_iter_up_until_good_node(trans, iter, cmp); - - if (btree_iter_node(iter, l)) { - /* - * We might have to skip over many keys, or just a few: try - * advancing the node iterator, and if we have to skip over too - * many keys just reinit it (or if we're rewinding, since that - * is expensive). - */ - if (cmp < 0 || - !btree_iter_advance_to_pos(iter, &iter->l[l], 8)) - __btree_iter_level_init(iter, l); - - /* Don't leave it locked if we're not supposed to: */ - if (btree_lock_want(iter, l) == BTREE_NODE_UNLOCKED) - btree_node_unlock(iter, l); - } -out: - if (l != iter->level) - btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); - - bch2_btree_iter_verify(iter); -#ifdef CONFIG_BCACHEFS_DEBUG - trace_iter_set_search_pos(trans->ip, _RET_IP_, - iter->btree_id, - &old_pos, &new_pos, l); -#endif -} - inline bool bch2_btree_iter_advance(struct btree_iter *iter) { struct bpos pos = iter->k.p; @@ -1684,7 +1993,7 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter) return ret; } -static noinline struct bkey_i *__btree_trans_peek_updates(struct btree_iter *iter) +struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter) { struct btree_insert_entry *i; struct bkey_i *ret = NULL; @@ -1694,7 +2003,7 @@ static noinline struct bkey_i *__btree_trans_peek_updates(struct btree_iter *ite continue; if (i->btree_id > iter->btree_id) break; - if (bpos_cmp(i->k->k.p, iter->real_pos) < 0) + if (bpos_cmp(i->k->k.p, iter->path->pos) < 0) continue; if (!ret || bpos_cmp(i->k->k.p, ret->k.p) < 0) ret = i->k; @@ -1703,33 +2012,27 @@ static noinline struct bkey_i *__btree_trans_peek_updates(struct btree_iter *ite return ret; } -static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter) -{ - return iter->flags & BTREE_ITER_WITH_UPDATES - ? __btree_trans_peek_updates(iter) - : NULL; -} - /** * bch2_btree_iter_peek: returns first key greater than or equal to iterator's * current position */ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) { - struct btree_iter_level *l = &iter->l[0]; + struct btree_trans *trans = iter->trans; struct bpos search_key = btree_iter_search_key(iter); struct bkey_i *next_update; struct bkey_s_c k; - int ret; + int ret, cmp; - EBUG_ON(iter->cached || iter->level); + EBUG_ON(iter->path->cached || iter->path->level); bch2_btree_iter_verify(iter); bch2_btree_iter_verify_entry_exit(iter); while (1) { - btree_iter_set_search_pos(iter, search_key); + iter->path = btree_path_set_pos(trans, iter->path, search_key, + iter->flags & BTREE_ITER_INTENT); - ret = btree_iter_traverse(iter); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) { /* ensure that iter->k is consistent with iter->pos: */ bch2_btree_iter_set_pos(iter, iter->pos); @@ -1738,7 +2041,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) } next_update = btree_trans_peek_updates(iter); - k = btree_iter_level_peek_all(iter, l); + k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k); /* * In the btree, deleted keys sort before non deleted: */ if (k.k && bkey_deleted(k.k) && @@ -1750,7 +2053,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) if (next_update && bpos_cmp(next_update->k.p, - k.k ? k.k->p : l->b->key.k.p) <= 0) { + k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) { iter->k = next_update->k; k = bkey_i_to_s_c(next_update); } @@ -1761,13 +2064,12 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) /* Advance to next key: */ search_key = bkey_successor(iter, k.k->p); - } else if (likely(bpos_cmp(l->b->key.k.p, SPOS_MAX))) { + } else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) { /* Advance to next leaf node: */ - search_key = bpos_successor(l->b->key.k.p); + search_key = bpos_successor(iter->path->l[0].b->key.k.p); } else { /* End of btree: */ bch2_btree_iter_set_pos(iter, SPOS_MAX); - iter->real_pos = SPOS_MAX; k = bkey_s_c_null; goto out; } @@ -1781,9 +2083,15 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) iter->pos = k.k->p; else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) iter->pos = bkey_start_pos(k.k); - iter->real_pos = k.k->p; + + cmp = bpos_cmp(k.k->p, iter->path->pos); + if (cmp) { + iter->path->pos = k.k->p; + trans->paths_sorted = false; + } out: - iter->should_be_locked = true; + iter->path->should_be_locked = true; + bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); return k; @@ -1807,20 +2115,21 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) */ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) { + struct btree_trans *trans = iter->trans; struct bpos search_key = iter->pos; - struct btree_iter_level *l = &iter->l[0]; struct bkey_s_c k; int ret; - EBUG_ON(iter->cached || iter->level); + EBUG_ON(iter->path->cached || iter->path->level); EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES); bch2_btree_iter_verify(iter); bch2_btree_iter_verify_entry_exit(iter); while (1) { - btree_iter_set_search_pos(iter, search_key); + iter->path = btree_path_set_pos(trans, iter->path, search_key, + iter->flags & BTREE_ITER_INTENT); - ret = btree_iter_traverse(iter); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) { /* ensure that iter->k is consistent with iter->pos: */ bch2_btree_iter_set_pos(iter, iter->pos); @@ -1828,18 +2137,20 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) goto out; } - k = btree_iter_level_peek(iter, l); + k = btree_path_level_peek(trans, iter->path, + &iter->path->l[0], &iter->k); if (!k.k || ((iter->flags & BTREE_ITER_IS_EXTENTS) ? bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0 : bkey_cmp(k.k->p, iter->pos) > 0)) - k = btree_iter_level_prev(iter, l); + k = btree_path_level_prev(trans, iter->path, + &iter->path->l[0], &iter->k); if (likely(k.k)) { break; - } else if (likely(bpos_cmp(l->b->data->min_key, POS_MIN))) { + } else if (likely(bpos_cmp(iter->path->l[0].b->data->min_key, POS_MIN))) { /* Advance to previous leaf node: */ - search_key = bpos_predecessor(l->b->data->min_key); + search_key = bpos_predecessor(iter->path->l[0].b->data->min_key); } else { /* Start of btree: */ bch2_btree_iter_set_pos(iter, POS_MIN); @@ -1854,9 +2165,11 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) if (bkey_cmp(k.k->p, iter->pos) < 0) iter->pos = k.k->p; out: - iter->should_be_locked = true; + iter->path->should_be_locked = true; + bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); + return k; } @@ -1879,7 +2192,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) struct bkey_s_c k; int ret; - EBUG_ON(iter->level); + EBUG_ON(iter->path->level); bch2_btree_iter_verify(iter); bch2_btree_iter_verify_entry_exit(iter); @@ -1893,9 +2206,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } search_key = btree_iter_search_key(iter); - btree_iter_set_search_pos(iter, search_key); + iter->path = btree_path_set_pos(trans, iter->path, search_key, + iter->flags & BTREE_ITER_INTENT); - ret = btree_iter_traverse(iter); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) return bkey_s_c_err(ret); @@ -1903,23 +2217,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) struct bkey_i *next_update; next_update = btree_trans_peek_updates(iter); - - if (!iter->cached) { - k = btree_iter_level_peek_all(iter, &iter->l[0]); - EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, iter->pos) == 0); - } else { - struct bkey_cached *ck = (void *) iter->l[0].b; - EBUG_ON(iter->btree_id != ck->key.btree_id || - bkey_cmp(iter->pos, ck->key.pos)); - BUG_ON(!ck->valid); - - k = bkey_i_to_s_c(ck->k); - } - if (next_update && - (!k.k || bpos_cmp(next_update->k.p, k.k->p) <= 0)) { + !bpos_cmp(next_update->k.p, iter->pos)) { iter->k = next_update->k; k = bkey_i_to_s_c(next_update); + } else { + k = bch2_btree_path_peek_slot(iter->path, &iter->k); } if (!k.k || @@ -1934,14 +2237,16 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) struct bpos next; if (iter->flags & BTREE_ITER_INTENT) { - struct btree_iter *child = - btree_iter_child_alloc(trans, iter, _THIS_IP_); + struct btree_iter iter2; - btree_iter_copy(trans, child, iter); - k = bch2_btree_iter_peek(child); + bch2_trans_copy_iter(&iter2, iter); + k = bch2_btree_iter_peek(&iter2); - if (k.k && !bkey_err(k)) - iter->k = child->k; + if (k.k && !bkey_err(k)) { + iter->k = iter2.k; + k.k = &iter->k; + } + bch2_trans_iter_exit(trans, &iter2); } else { struct bpos pos = iter->pos; @@ -1969,9 +2274,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } } + iter->path->should_be_locked = true; + bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(iter); - iter->should_be_locked = true; return k; } @@ -1992,47 +2298,26 @@ struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter) return bch2_btree_iter_peek_slot(iter); } -static inline void bch2_btree_iter_init(struct btree_trans *trans, - struct btree_iter *iter, enum btree_id btree_id) -{ - struct bch_fs *c = trans->c; - unsigned i; - - iter->trans = trans; - iter->uptodate = BTREE_ITER_NEED_TRAVERSE; - iter->btree_id = btree_id; - iter->real_pos = POS_MIN; - iter->level = 0; - iter->min_depth = 0; - iter->locks_want = 0; - iter->nodes_locked = 0; - iter->nodes_intent_locked = 0; - for (i = 0; i < ARRAY_SIZE(iter->l); i++) - iter->l[i].b = BTREE_ITER_NO_NODE_INIT; - - prefetch(c->btree_roots[btree_id].b); -} - /* new transactional stuff: */ #ifdef CONFIG_BCACHEFS_DEBUG static void btree_trans_verify_sorted_refs(struct btree_trans *trans) { - struct btree_iter *iter; + struct btree_path *path; unsigned i; - BUG_ON(trans->nr_sorted != hweight64(trans->iters_linked)); + BUG_ON(trans->nr_sorted != hweight64(trans->paths_allocated)); - trans_for_each_iter(trans, iter) { - BUG_ON(iter->sorted_idx >= trans->nr_sorted); - BUG_ON(trans->sorted[iter->sorted_idx] != iter->idx); + trans_for_each_path(trans, path) { + BUG_ON(path->sorted_idx >= trans->nr_sorted); + BUG_ON(trans->sorted[path->sorted_idx] != path->idx); } for (i = 0; i < trans->nr_sorted; i++) { unsigned idx = trans->sorted[i]; - EBUG_ON(!(trans->iters_linked & (1ULL << idx))); - BUG_ON(trans->iters[idx].sorted_idx != i); + EBUG_ON(!(trans->paths_allocated & (1ULL << idx))); + BUG_ON(trans->paths[idx].sorted_idx != i); } } #else @@ -2042,17 +2327,17 @@ static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) {} static void btree_trans_verify_sorted(struct btree_trans *trans) { #ifdef CONFIG_BCACHEFS_DEBUG - struct btree_iter *iter, *prev = NULL; + struct btree_path *path, *prev = NULL; unsigned i; - trans_for_each_iter_inorder(trans, iter, i) { - BUG_ON(prev && btree_iter_cmp(prev, iter) > 0); - prev = iter; + trans_for_each_path_inorder(trans, path, i) { + BUG_ON(prev && btree_path_cmp(prev, path) > 0); + prev = path; } #endif } -static noinline void __btree_trans_sort_iters(struct btree_trans *trans) +static noinline void __btree_trans_sort_paths(struct btree_trans *trans) { int i, l = 0, r = trans->nr_sorted, inc = 1; bool swapped; @@ -2067,11 +2352,11 @@ static noinline void __btree_trans_sort_iters(struct btree_trans *trans) for (i = inc > 0 ? l : r - 2; i + 1 < r && i >= l; i += inc) { - if (btree_iter_cmp(trans->iters + trans->sorted[i], - trans->iters + trans->sorted[i + 1]) > 0) { + if (btree_path_cmp(trans->paths + trans->sorted[i], + trans->paths + trans->sorted[i + 1]) > 0) { swap(trans->sorted[i], trans->sorted[i + 1]); - trans->iters[trans->sorted[i]].sorted_idx = i; - trans->iters[trans->sorted[i + 1]].sorted_idx = i + 1; + trans->paths[trans->sorted[i]].sorted_idx = i; + trans->paths[trans->sorted[i + 1]].sorted_idx = i + 1; swapped = true; } } @@ -2083,246 +2368,82 @@ static noinline void __btree_trans_sort_iters(struct btree_trans *trans) inc = -inc; } while (swapped); - trans->iters_sorted = true; + trans->paths_sorted = true; btree_trans_verify_sorted(trans); } -static inline void btree_trans_sort_iters(struct btree_trans *trans) +static inline void btree_trans_sort_paths(struct btree_trans *trans) { btree_trans_verify_sorted_refs(trans); - if (trans->iters_sorted) { + if (trans->paths_sorted) { if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) btree_trans_verify_sorted(trans); return; } - __btree_trans_sort_iters(trans); + __btree_trans_sort_paths(trans); } -static inline void btree_iter_list_remove(struct btree_trans *trans, - struct btree_iter *iter) +static inline void btree_path_list_remove(struct btree_trans *trans, + struct btree_path *path) { unsigned i; - EBUG_ON(iter->sorted_idx >= trans->nr_sorted); + EBUG_ON(path->sorted_idx >= trans->nr_sorted); #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS trans->nr_sorted--; - memmove_u64s_down_small(trans->sorted + iter->sorted_idx, - trans->sorted + iter->sorted_idx + 1, - DIV_ROUND_UP(trans->nr_sorted - iter->sorted_idx, 8)); + memmove_u64s_down_small(trans->sorted + path->sorted_idx, + trans->sorted + path->sorted_idx + 1, + DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); #else - array_remove_item(trans->sorted, trans->nr_sorted, iter->sorted_idx); + array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx); #endif - for (i = iter->sorted_idx; i < trans->nr_sorted; i++) - trans->iters[trans->sorted[i]].sorted_idx = i; + for (i = path->sorted_idx; i < trans->nr_sorted; i++) + trans->paths[trans->sorted[i]].sorted_idx = i; - iter->sorted_idx = U8_MAX; + path->sorted_idx = U8_MAX; } -static inline void btree_iter_list_add(struct btree_trans *trans, - struct btree_iter *pos, - struct btree_iter *iter) +static inline void btree_path_list_add(struct btree_trans *trans, + struct btree_path *pos, + struct btree_path *path) { unsigned i; - iter->sorted_idx = pos ? pos->sorted_idx + 1 : 0; + path->sorted_idx = pos ? pos->sorted_idx + 1 : 0; #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - memmove_u64s_up_small(trans->sorted + iter->sorted_idx + 1, - trans->sorted + iter->sorted_idx, - DIV_ROUND_UP(trans->nr_sorted - iter->sorted_idx, 8)); + memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1, + trans->sorted + path->sorted_idx, + DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); trans->nr_sorted++; - trans->sorted[iter->sorted_idx] = iter->idx; + trans->sorted[path->sorted_idx] = path->idx; #else - array_insert_item(trans->sorted, trans->nr_sorted, iter->sorted_idx, iter->idx); + array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx); #endif - for (i = iter->sorted_idx; i < trans->nr_sorted; i++) - trans->iters[trans->sorted[i]].sorted_idx = i; - - btree_trans_verify_sorted_refs(trans); -} - -static void btree_iter_child_free(struct btree_trans *trans, struct btree_iter *iter) -{ - struct btree_iter *child = btree_iter_child(trans, iter); - - if (child) { - bch2_trans_iter_free(trans, child); - iter->child_idx = U8_MAX; - } -} - -static struct btree_iter *btree_iter_child_alloc(struct btree_trans *trans, - struct btree_iter *iter, - unsigned long ip) -{ - struct btree_iter *child = btree_iter_child(trans, iter); - - if (!child) { - child = btree_trans_iter_alloc(trans, iter); - child->ip_allocated = ip; - iter->child_idx = child->idx; - - trans->iters_live |= 1ULL << child->idx; - trans->iters_touched |= 1ULL << child->idx; - } - - return child; -} - -static inline void __bch2_trans_iter_free(struct btree_trans *trans, - unsigned idx) -{ - btree_iter_child_free(trans, &trans->iters[idx]); - - btree_iter_list_remove(trans, &trans->iters[idx]); - - __bch2_btree_iter_unlock(&trans->iters[idx]); - trans->iters_linked &= ~(1ULL << idx); - trans->iters_live &= ~(1ULL << idx); - trans->iters_touched &= ~(1ULL << idx); - - btree_trans_verify_sorted_refs(trans); -} - -static bool have_iter_at_pos(struct btree_trans *trans, - struct btree_iter *iter) -{ - struct btree_iter *n; - - n = prev_btree_iter(trans, iter); - if (n && !btree_iter_cmp(n, iter)) - return true; - - n = next_btree_iter(trans, iter); - if (n && !btree_iter_cmp(n, iter)) - return true; - - return false; -} - -int bch2_trans_iter_put(struct btree_trans *trans, - struct btree_iter *iter) -{ - int ret; - - if (IS_ERR_OR_NULL(iter)) - return 0; - - BUG_ON(trans->iters + iter->idx != iter); - BUG_ON(!btree_iter_live(trans, iter)); - - ret = btree_iter_err(iter); - - if (!(iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) && - (!(trans->iters_touched & (1ULL << iter->idx)) || - have_iter_at_pos(trans, iter))) - __bch2_trans_iter_free(trans, iter->idx); - - trans->iters_live &= ~(1ULL << iter->idx); - return ret; -} - -int bch2_trans_iter_free(struct btree_trans *trans, - struct btree_iter *iter) -{ - if (IS_ERR_OR_NULL(iter)) - return 0; - - set_btree_iter_dontneed(trans, iter); - - return bch2_trans_iter_put(trans, iter); -} - -noinline __cold -void bch2_dump_trans_iters_updates(struct btree_trans *trans) -{ - struct btree_iter *iter; - struct btree_insert_entry *i; - unsigned idx; - char buf1[300], buf2[100]; - - btree_trans_sort_iters(trans); - - trans_for_each_iter_inorder(trans, iter, idx) - printk(KERN_ERR "iter: btree %s pos %s real_pos %s%s%s%s %pS\n", - bch2_btree_ids[iter->btree_id], - (bch2_bpos_to_text(&PBUF(buf1), iter->pos), buf1), - (bch2_bpos_to_text(&PBUF(buf2), iter->real_pos), buf2), - btree_iter_live(trans, iter) ? " live" : "", - (trans->iters_touched & (1ULL << iter->idx)) ? " touched" : "", - iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT ? " keep" : "", - (void *) iter->ip_allocated); - - trans_for_each_update(trans, i) - printk(KERN_ERR "update: btree %s %s %pS\n", - bch2_btree_ids[i->btree_id], - (bch2_bkey_val_to_text(&PBUF(buf1), trans->c, bkey_i_to_s_c(i->k)), buf1), - (void *) i->ip_allocated); -} - -static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans, - struct btree_iter *pos) -{ - struct btree_iter *iter; - unsigned idx; + for (i = path->sorted_idx; i < trans->nr_sorted; i++) + trans->paths[trans->sorted[i]].sorted_idx = i; btree_trans_verify_sorted_refs(trans); - - if (unlikely(trans->iters_linked == - ~((~0ULL << 1) << (BTREE_ITER_MAX - 1)))) { - bch2_dump_trans_iters_updates(trans); - panic("trans iter oveflow\n"); - } - - idx = __ffs64(~trans->iters_linked); - iter = &trans->iters[idx]; - - iter->trans = trans; - iter->idx = idx; - iter->child_idx = U8_MAX; - iter->sorted_idx = U8_MAX; - iter->flags = 0; - iter->nodes_locked = 0; - iter->nodes_intent_locked = 0; - trans->iters_linked |= 1ULL << idx; - - btree_iter_list_add(trans, pos, iter); - return iter; } -static void btree_iter_copy(struct btree_trans *trans, struct btree_iter *dst, - struct btree_iter *src) +void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter) { - unsigned i, offset = offsetof(struct btree_iter, flags); - - __bch2_btree_iter_unlock(dst); - btree_iter_child_free(trans, dst); - - memcpy((void *) dst + offset, - (void *) src + offset, - sizeof(struct btree_iter) - offset); - - for (i = 0; i < BTREE_MAX_DEPTH; i++) - if (btree_node_locked(dst, i)) - six_lock_increment(&dst->l[i].b->c.lock, - __btree_lock_want(dst, i)); - - dst->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT; - trans->iters_sorted = false; + if (iter->path) + bch2_path_put(trans, iter->path, + iter->flags & BTREE_ITER_INTENT); + iter->path = NULL; } -struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, - enum btree_id btree_id, struct bpos pos, - unsigned locks_want, - unsigned depth, - unsigned flags) +static void __bch2_trans_iter_init(struct btree_trans *trans, + struct btree_iter *iter, + enum btree_id btree_id, struct bpos pos, + unsigned locks_want, + unsigned depth, + unsigned flags) { - struct btree_iter *iter, *best = NULL; - struct bpos real_pos, pos_min = POS_MIN; - EBUG_ON(trans->restarted); if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) && @@ -2337,127 +2458,58 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, pos.snapshot = btree_type_has_snapshots(btree_id) ? U32_MAX : 0; - real_pos = pos; - - if ((flags & BTREE_ITER_IS_EXTENTS) && - bkey_cmp(pos, POS_MAX)) - real_pos = bpos_nosnap_successor(pos); - - trans_for_each_iter(trans, iter) { - if (iter->cached != (flags & BTREE_ITER_CACHED)) - continue; - - if (iter->btree_id != btree_id) - continue; - - if (best) { - int cmp = bkey_cmp(bpos_diff(best->real_pos, real_pos), - bpos_diff(iter->real_pos, real_pos)); - - if (cmp < 0 || - ((cmp == 0 && btree_iter_keep(trans, iter)))) - continue; - } - - best = iter; - } - - if (!best) { - iter = btree_trans_iter_alloc(trans, best); - bch2_btree_iter_init(trans, iter, btree_id); - } else if (btree_iter_keep(trans, best)) { - iter = btree_trans_iter_alloc(trans, best); - btree_iter_copy(trans, iter, best); - } else { - iter = best; - } - - trans->iters_live |= 1ULL << iter->idx; - trans->iters_touched |= 1ULL << iter->idx; - - iter->cached = flags & BTREE_ITER_CACHED; + iter->trans = trans; + iter->path = NULL; + iter->btree_id = btree_id; + iter->min_depth = depth; iter->flags = flags; iter->snapshot = pos.snapshot; + iter->pos = pos; + iter->k.type = KEY_TYPE_deleted; + iter->k.p = pos; + iter->k.size = 0; - /* - * If the iterator has locks_want greater than requested, we explicitly - * do not downgrade it here - on transaction restart because btree node - * split needs to upgrade locks, we might be putting/getting the - * iterator again. Downgrading iterators only happens via an explicit - * bch2_trans_downgrade(). - */ - - locks_want = min(locks_want, BTREE_MAX_DEPTH); - if (locks_want > iter->locks_want) { - iter->locks_want = locks_want; - btree_iter_get_locks(trans, iter, true, _THIS_IP_); - } - - while (iter->level != depth) { - btree_node_unlock(iter, iter->level); - iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT; - iter->uptodate = BTREE_ITER_NEED_TRAVERSE; - if (iter->level < depth) - iter->level++; - else - iter->level--; - } - - iter->min_depth = depth; - - bch2_btree_iter_set_pos(iter, pos); - btree_iter_set_search_pos(iter, real_pos); - - trace_trans_get_iter(_RET_IP_, trans->ip, - btree_id, - &real_pos, locks_want, iter->uptodate, - best ? &best->real_pos : &pos_min, - best ? best->locks_want : U8_MAX, - best ? best->uptodate : U8_MAX); - - return iter; + iter->path = bch2_path_get(trans, + flags & BTREE_ITER_CACHED, + btree_id, + btree_iter_search_key(iter), + locks_want, + depth, + flags & BTREE_ITER_INTENT); } -struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans, - enum btree_id btree_id, - struct bpos pos, - unsigned locks_want, - unsigned depth, - unsigned flags) +void bch2_trans_iter_init(struct btree_trans *trans, + struct btree_iter *iter, + unsigned btree_id, struct bpos pos, + unsigned flags) { - struct btree_iter *iter = - __bch2_trans_get_iter(trans, btree_id, pos, - locks_want, depth, - BTREE_ITER_NOT_EXTENTS| - __BTREE_ITER_ALL_SNAPSHOTS| - BTREE_ITER_ALL_SNAPSHOTS| - flags); - - BUG_ON(bkey_cmp(iter->pos, pos)); - BUG_ON(iter->locks_want != min(locks_want, BTREE_MAX_DEPTH)); - BUG_ON(iter->level != depth); - BUG_ON(iter->min_depth != depth); - iter->ip_allocated = _RET_IP_; - - return iter; + __bch2_trans_iter_init(trans, iter, btree_id, pos, + 0, 0, flags); } -struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans, - struct btree_iter *src) +void bch2_trans_node_iter_init(struct btree_trans *trans, + struct btree_iter *iter, + enum btree_id btree_id, + struct bpos pos, + unsigned locks_want, + unsigned depth, + unsigned flags) { - struct btree_iter *iter; - - iter = btree_trans_iter_alloc(trans, src); - btree_iter_copy(trans, iter, src); - - trans->iters_live |= 1ULL << iter->idx; - /* - * We don't need to preserve this iter since it's cheap to copy it - * again - this will cause trans_iter_put() to free it right away: - */ - set_btree_iter_dontneed(trans, iter); + __bch2_trans_iter_init(trans, iter, btree_id, pos, locks_want, depth, + BTREE_ITER_NOT_EXTENTS| + __BTREE_ITER_ALL_SNAPSHOTS| + BTREE_ITER_ALL_SNAPSHOTS| + flags); + BUG_ON(iter->path->locks_want < min(locks_want, BTREE_MAX_DEPTH)); + BUG_ON(iter->path->level != depth); + BUG_ON(iter->min_depth != depth); +} - return iter; +void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src) +{ + *dst = *src; + if (src->path) + __btree_path_get(src->path, src->flags & BTREE_ITER_INTENT); } void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) @@ -2498,20 +2550,6 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) return p; } -inline void bch2_trans_unlink_iters(struct btree_trans *trans) -{ - u64 iters = trans->iters_linked & - ~trans->iters_touched & - ~trans->iters_live; - - while (iters) { - unsigned idx = __ffs64(iters); - - iters &= ~(1ULL << idx); - __bch2_trans_iter_free(trans, idx); - } -} - /** * bch2_trans_begin() - reset a transaction after a interrupted attempt * @trans: transaction to reset @@ -2522,17 +2560,11 @@ inline void bch2_trans_unlink_iters(struct btree_trans *trans) */ void bch2_trans_begin(struct btree_trans *trans) { - struct btree_iter *iter; - - trans_for_each_iter(trans, iter) - iter->flags &= ~BTREE_ITER_KEEP_UNTIL_COMMIT; + struct btree_insert_entry *i; + struct btree_path *path; - /* - * XXX: we shouldn't be doing this if the transaction was restarted, but - * currently we still overflow transaction iterators if we do that - * */ - bch2_trans_unlink_iters(trans); - trans->iters_touched &= trans->iters_live; + trans_for_each_update(trans, i) + __btree_path_put(i->path, true); trans->extra_journal_res = 0; trans->nr_updates = 0; @@ -2550,29 +2582,41 @@ void bch2_trans_begin(struct btree_trans *trans) (void *) &trans->fs_usage_deltas->memset_start); } + trans_for_each_path(trans, path) { + /* + * XXX: we probably shouldn't be doing this if the transaction + * was restarted, but currently we still overflow transaction + * iterators if we do that + */ + if (!path->ref && !path->preserve) + __bch2_path_free(trans, path); + else + path->preserve = path->should_be_locked = false; + } + bch2_trans_cond_resched(trans); if (trans->restarted) - bch2_btree_iter_traverse_all(trans); + bch2_btree_path_traverse_all(trans); trans->restarted = false; } -static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c) +static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) { - size_t iters_bytes = sizeof(struct btree_iter) * BTREE_ITER_MAX; + size_t paths_bytes = sizeof(struct btree_path) * BTREE_ITER_MAX; size_t updates_bytes = sizeof(struct btree_insert_entry) * BTREE_ITER_MAX; void *p = NULL; BUG_ON(trans->used_mempool); #ifdef __KERNEL__ - p = this_cpu_xchg(c->btree_iters_bufs->iter, NULL); + p = this_cpu_xchg(c->btree_paths_bufs->path , NULL); #endif if (!p) - p = mempool_alloc(&trans->c->btree_iters_pool, GFP_NOFS); + p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS); - trans->iters = p; p += iters_bytes; + trans->paths = p; p += paths_bytes; trans->updates = p; p += updates_bytes; } @@ -2585,11 +2629,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, trans->c = c; trans->ip = _RET_IP_; - /* - * reallocating iterators currently completely breaks - * bch2_trans_iter_put(), we always allocate the max: - */ - bch2_trans_alloc_iters(trans, c); + bch2_trans_alloc_paths(trans, c); if (expected_mem_bytes) { expected_mem_bytes = roundup_pow_of_two(expected_mem_bytes); @@ -2613,54 +2653,63 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, #endif } +static void check_btree_paths_leaked(struct btree_trans *trans) +{ +#ifdef CONFIG_BCACHEFS_DEBUG + struct bch_fs *c = trans->c; + struct btree_path *path; + + trans_for_each_path(trans, path) + if (path->ref) + goto leaked; + return; +leaked: + bch_err(c, "btree paths leaked from %pS!", (void *) trans->ip); + trans_for_each_path(trans, path) + if (path->ref) + printk(KERN_ERR " btree %s %pS\n", + bch2_btree_ids[path->btree_id], + (void *) path->ip_allocated); + /* Be noisy about this: */ + bch2_fatal_error(c); +#endif +} + int bch2_trans_exit(struct btree_trans *trans) __releases(&c->btree_trans_barrier) { + struct btree_insert_entry *i; struct bch_fs *c = trans->c; bch2_trans_unlock(trans); -#ifdef CONFIG_BCACHEFS_DEBUG - if (trans->iters_live) { - struct btree_iter *iter; - - trans_for_each_iter(trans, iter) - btree_iter_child_free(trans, iter); - } + trans_for_each_update(trans, i) + __btree_path_put(i->path, true); + trans->nr_updates = 0; - if (trans->iters_live) { - struct btree_iter *iter; - - bch_err(c, "btree iterators leaked!"); - trans_for_each_iter(trans, iter) - if (btree_iter_live(trans, iter)) - printk(KERN_ERR " btree %s allocated at %pS\n", - bch2_btree_ids[iter->btree_id], - (void *) iter->ip_allocated); - /* Be noisy about this: */ - bch2_fatal_error(c); - } + check_btree_paths_leaked(trans); - mutex_lock(&trans->c->btree_trans_lock); +#ifdef CONFIG_BCACHEFS_DEBUG + mutex_lock(&c->btree_trans_lock); list_del(&trans->list); - mutex_unlock(&trans->c->btree_trans_lock); + mutex_unlock(&c->btree_trans_lock); #endif srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); - bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres); + bch2_journal_preres_put(&c->journal, &trans->journal_preres); if (trans->fs_usage_deltas) { if (trans->fs_usage_deltas->size + sizeof(trans->fs_usage_deltas) == REPLICAS_DELTA_LIST_MAX) mempool_free(trans->fs_usage_deltas, - &trans->c->replicas_delta_pool); + &c->replicas_delta_pool); else kfree(trans->fs_usage_deltas); } if (trans->mem_bytes == BTREE_TRANS_MEM_MAX) - mempool_free(trans->mem, &trans->c->btree_trans_mem_pool); + mempool_free(trans->mem, &c->btree_trans_mem_pool); else kfree(trans->mem); @@ -2668,20 +2717,20 @@ int bch2_trans_exit(struct btree_trans *trans) /* * Userspace doesn't have a real percpu implementation: */ - trans->iters = this_cpu_xchg(c->btree_iters_bufs->iter, trans->iters); + trans->paths = this_cpu_xchg(c->btree_paths_bufs->path, trans->paths); #endif - if (trans->iters) - mempool_free(trans->iters, &trans->c->btree_iters_pool); + if (trans->paths) + mempool_free(trans->paths, &c->btree_paths_pool); trans->mem = (void *) 0x1; - trans->iters = (void *) 0x1; + trans->paths = (void *) 0x1; return trans->error ? -EIO : 0; } static void __maybe_unused -bch2_btree_iter_node_to_text(struct printbuf *out, +bch2_btree_path_node_to_text(struct printbuf *out, struct btree_bkey_cached_common *_b, bool cached) { @@ -2693,10 +2742,10 @@ bch2_btree_iter_node_to_text(struct printbuf *out, #ifdef CONFIG_BCACHEFS_DEBUG static bool trans_has_locks(struct btree_trans *trans) { - struct btree_iter *iter; + struct btree_path *path; - trans_for_each_iter(trans, iter) - if (iter->nodes_locked) + trans_for_each_path(trans, path) + if (path->nodes_locked) return true; return false; } @@ -2706,7 +2755,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c) { #ifdef CONFIG_BCACHEFS_DEBUG struct btree_trans *trans; - struct btree_iter *iter; + struct btree_path *path; struct btree *b; unsigned l; @@ -2717,24 +2766,24 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c) pr_buf(out, "%i %ps\n", trans->pid, (void *) trans->ip); - trans_for_each_iter(trans, iter) { - if (!iter->nodes_locked) + trans_for_each_path(trans, path) { + if (!path->nodes_locked) continue; - pr_buf(out, " iter %u %c %s:", - iter->idx, - iter->cached ? 'c' : 'b', - bch2_btree_ids[iter->btree_id]); - bch2_bpos_to_text(out, iter->pos); + pr_buf(out, " path %u %c %s:", + path->idx, + path->cached ? 'c' : 'b', + bch2_btree_ids[path->btree_id]); + bch2_bpos_to_text(out, path->pos); pr_buf(out, "\n"); for (l = 0; l < BTREE_MAX_DEPTH; l++) { - if (btree_node_locked(iter, l)) { + if (btree_node_locked(path, l)) { pr_buf(out, " %s l=%u ", - btree_node_intent_locked(iter, l) ? "i" : "r", l); - bch2_btree_iter_node_to_text(out, - (void *) iter->l[l].b, - iter->cached); + btree_node_intent_locked(path, l) ? "i" : "r", l); + bch2_btree_path_node_to_text(out, + (void *) path->l[l].b, + path->cached); pr_buf(out, "\n"); } } @@ -2742,18 +2791,17 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c) b = READ_ONCE(trans->locking); if (b) { - iter = &trans->iters[trans->locking_iter_idx]; - pr_buf(out, " locking iter %u %c l=%u %s:", - trans->locking_iter_idx, - iter->cached ? 'c' : 'b', + path = &trans->paths[trans->locking_path_idx]; + pr_buf(out, " locking path %u %c l=%u %s:", + trans->locking_path_idx, + path->cached ? 'c' : 'b', trans->locking_level, bch2_btree_ids[trans->locking_btree_id]); bch2_bpos_to_text(out, trans->locking_pos); pr_buf(out, " node "); - bch2_btree_iter_node_to_text(out, - (void *) b, - iter->cached); + bch2_btree_path_node_to_text(out, + (void *) b, path->cached); pr_buf(out, "\n"); } } @@ -2764,7 +2812,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c) void bch2_fs_btree_iter_exit(struct bch_fs *c) { mempool_exit(&c->btree_trans_mem_pool); - mempool_exit(&c->btree_iters_pool); + mempool_exit(&c->btree_paths_pool); cleanup_srcu_struct(&c->btree_trans_barrier); } @@ -2776,8 +2824,8 @@ int bch2_fs_btree_iter_init(struct bch_fs *c) mutex_init(&c->btree_trans_lock); return init_srcu_struct(&c->btree_trans_barrier) ?: - mempool_init_kmalloc_pool(&c->btree_iters_pool, 1, - sizeof(struct btree_iter) * nr + + mempool_init_kmalloc_pool(&c->btree_paths_pool, 1, + sizeof(struct btree_path) * nr + sizeof(struct btree_insert_entry) * nr) ?: mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1, BTREE_TRANS_MEM_MAX); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 4ba55e02d4b7..983d61122458 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -5,40 +5,49 @@ #include "bset.h" #include "btree_types.h" -static inline void btree_iter_set_dirty(struct btree_iter *iter, - enum btree_iter_uptodate u) +static inline void __btree_path_get(struct btree_path *path, bool intent) { - iter->uptodate = max_t(unsigned, iter->uptodate, u); + path->ref++; + path->intent_ref += intent; } -static inline struct btree *btree_iter_node(struct btree_iter *iter, +static inline bool __btree_path_put(struct btree_path *path, bool intent) +{ + EBUG_ON(!path->ref); + EBUG_ON(!path->intent_ref && intent); + path->intent_ref -= intent; + return --path->ref == 0; +} + +static inline void btree_path_set_dirty(struct btree_path *path, + enum btree_path_uptodate u) +{ + path->uptodate = max_t(unsigned, path->uptodate, u); +} + +static inline struct btree *btree_path_node(struct btree_path *path, unsigned level) { - return level < BTREE_MAX_DEPTH ? iter->l[level].b : NULL; + return level < BTREE_MAX_DEPTH ? path->l[level].b : NULL; } -static inline bool btree_node_lock_seq_matches(const struct btree_iter *iter, +static inline bool btree_node_lock_seq_matches(const struct btree_path *path, const struct btree *b, unsigned level) { /* * We don't compare the low bits of the lock sequence numbers because - * @iter might have taken a write lock on @b, and we don't want to skip - * the linked iterator if the sequence numbers were equal before taking - * that write lock. The lock sequence number is incremented by taking - * and releasing write locks and is even when unlocked: + * @path might have taken a write lock on @b, and we don't want to skip + * the linked path if the sequence numbers were equal before taking that + * write lock. The lock sequence number is incremented by taking and + * releasing write locks and is even when unlocked: */ - return iter->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1; + return path->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1; } -static inline struct btree *btree_node_parent(struct btree_iter *iter, +static inline struct btree *btree_node_parent(struct btree_path *path, struct btree *b) { - return btree_iter_node(iter, b->c.level + 1); -} - -static inline bool btree_trans_has_multiple_iters(const struct btree_trans *trans) -{ - return hweight64(trans->iters_linked) > 1; + return btree_path_node(path, b->c.level + 1); } static inline int btree_iter_err(const struct btree_iter *iter) @@ -46,102 +55,121 @@ static inline int btree_iter_err(const struct btree_iter *iter) return iter->flags & BTREE_ITER_ERROR ? -EIO : 0; } -/* Iterate over iters within a transaction: */ +/* Iterate over paths within a transaction: */ -static inline struct btree_iter * -__trans_next_iter(struct btree_trans *trans, unsigned idx) +static inline struct btree_path * +__trans_next_path(struct btree_trans *trans, unsigned idx) { u64 l; if (idx == BTREE_ITER_MAX) return NULL; - l = trans->iters_linked >> idx; + l = trans->paths_allocated >> idx; if (!l) return NULL; idx += __ffs64(l); EBUG_ON(idx >= BTREE_ITER_MAX); - EBUG_ON(trans->iters[idx].idx != idx); - return &trans->iters[idx]; + EBUG_ON(trans->paths[idx].idx != idx); + return &trans->paths[idx]; } -#define trans_for_each_iter(_trans, _iter) \ - for (_iter = __trans_next_iter((_trans), 0); \ - (_iter); \ - _iter = __trans_next_iter((_trans), (_iter)->idx + 1)) +#define trans_for_each_path(_trans, _path) \ + for (_path = __trans_next_path((_trans), 0); \ + (_path); \ + _path = __trans_next_path((_trans), (_path)->idx + 1)) -static inline struct btree_iter *next_btree_iter(struct btree_trans *trans, struct btree_iter *iter) +static inline struct btree_path *next_btree_path(struct btree_trans *trans, struct btree_path *path) { - unsigned idx = iter ? iter->sorted_idx + 1 : 0; + unsigned idx = path ? path->sorted_idx + 1 : 0; EBUG_ON(idx > trans->nr_sorted); return idx < trans->nr_sorted - ? trans->iters + trans->sorted[idx] + ? trans->paths + trans->sorted[idx] : NULL; } -static inline struct btree_iter *prev_btree_iter(struct btree_trans *trans, struct btree_iter *iter) +static inline struct btree_path *prev_btree_path(struct btree_trans *trans, struct btree_path *path) { - unsigned idx = iter ? iter->sorted_idx : trans->nr_sorted; + unsigned idx = path ? path->sorted_idx : trans->nr_sorted; return idx - ? trans->iters + trans->sorted[idx - 1] + ? trans->paths + trans->sorted[idx - 1] : NULL; } -#define trans_for_each_iter_inorder(_trans, _iter, _i) \ +#define trans_for_each_path_inorder(_trans, _path, _i) \ for (_i = 0; \ - ((_iter) = (_trans)->iters + trans->sorted[_i]), (_i) < (_trans)->nr_sorted;\ + ((_path) = (_trans)->paths + trans->sorted[_i]), (_i) < (_trans)->nr_sorted;\ _i++) -#define trans_for_each_iter_inorder_reverse(_trans, _iter, _i) \ +#define trans_for_each_path_inorder_reverse(_trans, _path, _i) \ for (_i = trans->nr_sorted - 1; \ - ((_iter) = (_trans)->iters + trans->sorted[_i]), (_i) >= 0;\ + ((_path) = (_trans)->paths + trans->sorted[_i]), (_i) >= 0;\ --_i) -static inline bool __iter_has_node(const struct btree_iter *iter, +static inline bool __path_has_node(const struct btree_path *path, const struct btree *b) { - return iter->l[b->c.level].b == b && - btree_node_lock_seq_matches(iter, b, b->c.level); + return path->l[b->c.level].b == b && + btree_node_lock_seq_matches(path, b, b->c.level); } -static inline struct btree_iter * -__trans_next_iter_with_node(struct btree_trans *trans, struct btree *b, +static inline struct btree_path * +__trans_next_path_with_node(struct btree_trans *trans, struct btree *b, unsigned idx) { - struct btree_iter *iter = __trans_next_iter(trans, idx); + struct btree_path *path = __trans_next_path(trans, idx); + + while (path && !__path_has_node(path, b)) + path = __trans_next_path(trans, path->idx + 1); - while (iter && !__iter_has_node(iter, b)) - iter = __trans_next_iter(trans, iter->idx + 1); + return path; +} + +#define trans_for_each_path_with_node(_trans, _b, _path) \ + for (_path = __trans_next_path_with_node((_trans), (_b), 0); \ + (_path); \ + _path = __trans_next_path_with_node((_trans), (_b), \ + (_path)->idx + 1)) + +struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *, + struct btree_path *, bool); - return iter; +static inline struct btree_path * __must_check +bch2_btree_path_make_mut(struct btree_trans *trans, + struct btree_path *path, bool intent) +{ + if (path->ref > 1 || path->preserve) + path = __bch2_btree_path_make_mut(trans, path, intent); + return path; } -#define trans_for_each_iter_with_node(_trans, _b, _iter) \ - for (_iter = __trans_next_iter_with_node((_trans), (_b), 0); \ - (_iter); \ - _iter = __trans_next_iter_with_node((_trans), (_b), \ - (_iter)->idx + 1)) +int __must_check bch2_btree_path_traverse(struct btree_trans *, + struct btree_path *, unsigned); +struct btree_path *bch2_path_get(struct btree_trans *, bool, enum btree_id, + struct bpos, unsigned, unsigned, bool); +inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bkey *); #ifdef CONFIG_BCACHEFS_DEBUG -void bch2_trans_verify_iters(struct btree_trans *, struct btree *); +void bch2_trans_verify_paths(struct btree_trans *); void bch2_trans_verify_locks(struct btree_trans *); #else -static inline void bch2_trans_verify_iters(struct btree_trans *trans, - struct btree *b) {} -static inline void bch2_trans_verify_locks(struct btree_trans *iter) {} +static inline void bch2_trans_verify_paths(struct btree_trans *trans) {} +static inline void bch2_trans_verify_locks(struct btree_trans *trans) {} #endif -void bch2_btree_iter_fix_key_modified(struct btree_trans *trans, +void bch2_btree_path_fix_key_modified(struct btree_trans *trans, struct btree *, struct bkey_packed *); -void bch2_btree_node_iter_fix(struct btree_trans *trans, struct btree_iter *, +void bch2_btree_node_iter_fix(struct btree_trans *trans, struct btree_path *, struct btree *, struct btree_node_iter *, struct bkey_packed *, unsigned, unsigned); -bool bch2_btree_iter_relock_intent(struct btree_trans *, struct btree_iter *); +bool bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *); + +void bch2_path_put(struct btree_trans *, struct btree_path *, bool); bool bch2_trans_relock(struct btree_trans *); void bch2_trans_unlock(struct btree_trans *); @@ -154,28 +182,28 @@ static inline int btree_trans_restart(struct btree_trans *trans) return -EINTR; } -bool __bch2_btree_iter_upgrade(struct btree_trans *, - struct btree_iter *, unsigned); +bool __bch2_btree_path_upgrade(struct btree_trans *, + struct btree_path *, unsigned); -static inline bool bch2_btree_iter_upgrade(struct btree_trans *trans, - struct btree_iter *iter, +static inline bool bch2_btree_path_upgrade(struct btree_trans *trans, + struct btree_path *path, unsigned new_locks_want) { new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH); - return iter->locks_want < new_locks_want - ? __bch2_btree_iter_upgrade(trans, iter, new_locks_want) - : iter->uptodate == BTREE_ITER_UPTODATE; + return path->locks_want < new_locks_want + ? __bch2_btree_path_upgrade(trans, path, new_locks_want) + : path->uptodate == BTREE_ITER_UPTODATE; } -void __bch2_btree_iter_downgrade(struct btree_iter *, unsigned); +void __bch2_btree_path_downgrade(struct btree_path *, unsigned); -static inline void bch2_btree_iter_downgrade(struct btree_iter *iter) +static inline void bch2_btree_path_downgrade(struct btree_path *path) { - unsigned new_locks_want = iter->level + !!(iter->flags & BTREE_ITER_INTENT); + unsigned new_locks_want = path->level + !!path->intent_ref; - if (iter->locks_want > new_locks_want) - __bch2_btree_iter_downgrade(iter, new_locks_want); + if (path->locks_want > new_locks_want) + __bch2_btree_path_downgrade(path, new_locks_want); } void bch2_trans_downgrade(struct btree_trans *); @@ -212,7 +240,8 @@ static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos iter->k.p.offset = iter->pos.offset = new_pos.offset; iter->k.p.snapshot = iter->pos.snapshot = new_pos.snapshot; iter->k.size = 0; - iter->should_be_locked = false; + if (iter->path->ref == 1) + iter->path->should_be_locked = false; } static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *iter) @@ -221,17 +250,6 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it iter->pos = bkey_start_pos(&iter->k); } -static inline struct btree_iter *idx_to_btree_iter(struct btree_trans *trans, unsigned idx) -{ - return idx != U8_MAX ? trans->iters + idx : NULL; -} - -static inline struct btree_iter *btree_iter_child(struct btree_trans *trans, - struct btree_iter *iter) -{ - return idx_to_btree_iter(trans, iter->child_idx); -} - /* * Unlocks before scheduling * Note: does not revalidate iterator @@ -249,11 +267,11 @@ static inline int bch2_trans_cond_resched(struct btree_trans *trans) #define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ _locks_want, _depth, _flags, _b) \ - for (iter = bch2_trans_get_node_iter((_trans), (_btree_id), \ + for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \ _start, _locks_want, _depth, _flags), \ - _b = bch2_btree_iter_peek_node(_iter); \ + _b = bch2_btree_iter_peek_node(&(_iter)); \ (_b); \ - (_b) = bch2_btree_iter_next_node(_iter)) + (_b) = bch2_btree_iter_next_node(&(_iter))) #define for_each_btree_node(_trans, _iter, _btree_id, _start, \ _flags, _b) \ @@ -283,77 +301,36 @@ static inline int bkey_err(struct bkey_s_c k) #define for_each_btree_key(_trans, _iter, _btree_id, \ _start, _flags, _k, _ret) \ - for ((_iter) = bch2_trans_get_iter((_trans), (_btree_id), \ - (_start), (_flags)), \ - (_k) = __bch2_btree_iter_peek(_iter, _flags); \ + for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ + (_start), (_flags)), \ + (_k) = __bch2_btree_iter_peek(&(_iter), _flags); \ !((_ret) = bkey_err(_k)) && (_k).k; \ - (_k) = __bch2_btree_iter_next(_iter, _flags)) + (_k) = __bch2_btree_iter_next(&(_iter), _flags)) #define for_each_btree_key_continue(_iter, _flags, _k, _ret) \ - for ((_k) = __bch2_btree_iter_peek(_iter, _flags); \ + for ((_k) = __bch2_btree_iter_peek(&(_iter), _flags); \ !((_ret) = bkey_err(_k)) && (_k).k; \ - (_k) = __bch2_btree_iter_next(_iter, _flags)) + (_k) = __bch2_btree_iter_next(&(_iter), _flags)) /* new multiple iterator interface: */ -void bch2_dump_trans_iters_updates(struct btree_trans *); - -int bch2_trans_iter_put(struct btree_trans *, struct btree_iter *); -int bch2_trans_iter_free(struct btree_trans *, struct btree_iter *); - -void bch2_trans_unlink_iters(struct btree_trans *); - -struct btree_iter *__bch2_trans_get_iter(struct btree_trans *, enum btree_id, - struct bpos, unsigned, - unsigned, unsigned); - -static inline struct btree_iter * -bch2_trans_get_iter(struct btree_trans *trans, enum btree_id btree_id, - struct bpos pos, unsigned flags) -{ - struct btree_iter *iter = - __bch2_trans_get_iter(trans, btree_id, pos, - (flags & BTREE_ITER_INTENT) != 0, 0, - flags); - iter->ip_allocated = _THIS_IP_; - return iter; -} - -struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *, - struct btree_iter *); -static inline struct btree_iter * -bch2_trans_copy_iter(struct btree_trans *trans, struct btree_iter *src) -{ - struct btree_iter *iter = - __bch2_trans_copy_iter(trans, src); - - iter->ip_allocated = _THIS_IP_; - return iter; -} - -struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *, - enum btree_id, struct bpos, - unsigned, unsigned, unsigned); - -static inline bool btree_iter_live(struct btree_trans *trans, struct btree_iter *iter) -{ - return (trans->iters_live & (1ULL << iter->idx)) != 0; -} +void bch2_dump_trans_paths_updates(struct btree_trans *); -static inline bool btree_iter_keep(struct btree_trans *trans, struct btree_iter *iter) -{ - return btree_iter_live(trans, iter) || - (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT); -} +void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *); +void bch2_trans_iter_init(struct btree_trans *, struct btree_iter *, + unsigned, struct bpos, unsigned); +void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *, + enum btree_id, struct bpos, + unsigned, unsigned, unsigned); +void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *); -static inline void set_btree_iter_dontneed(struct btree_trans *trans, struct btree_iter *iter) +static inline void set_btree_iter_dontneed(struct btree_iter *iter) { - trans->iters_touched &= ~(1ULL << iter->idx); + iter->path->preserve = false; } -void bch2_trans_begin(struct btree_trans *); - void *bch2_trans_kmalloc(struct btree_trans *, size_t); +void bch2_trans_begin(struct btree_trans *); void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t); int bch2_trans_exit(struct btree_trans *); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 61210db57f56..9bdc2c3f21bf 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -197,23 +197,23 @@ btree_key_cache_create(struct btree_key_cache *c, } static int btree_key_cache_fill(struct btree_trans *trans, - struct btree_iter *ck_iter, + struct btree_path *ck_path, struct bkey_cached *ck) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; unsigned new_u64s = 0; struct bkey_i *new_k = NULL; int ret; - iter = bch2_trans_get_iter(trans, ck->key.btree_id, - ck->key.pos, BTREE_ITER_SLOTS); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, ck->key.btree_id, + ck->key.pos, BTREE_ITER_SLOTS); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; - if (!bch2_btree_node_relock(trans, ck_iter, 0)) { + if (!bch2_btree_node_relock(trans, ck_path, 0)) { trace_transaction_restart_ip(trans->ip, _THIS_IP_); ret = btree_trans_restart(trans); goto err; @@ -238,7 +238,7 @@ static int btree_key_cache_fill(struct btree_trans *trans, * XXX: not allowed to be holding read locks when we take a write lock, * currently */ - bch2_btree_node_lock_write(trans, ck_iter, ck_iter->l[0].b); + bch2_btree_node_lock_write(trans, ck_path, ck_path->l[0].b); if (new_k) { kfree(ck->k); ck->u64s = new_u64s; @@ -247,62 +247,64 @@ static int btree_key_cache_fill(struct btree_trans *trans, bkey_reassemble(ck->k, k); ck->valid = true; - bch2_btree_node_unlock_write(trans, ck_iter, ck_iter->l[0].b); + bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b); /* We're not likely to need this iterator again: */ - set_btree_iter_dontneed(trans, iter); + set_btree_iter_dontneed(&iter); err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } static int bkey_cached_check_fn(struct six_lock *lock, void *p) { struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock); - const struct btree_iter *iter = p; + const struct btree_path *path = p; - return ck->key.btree_id == iter->btree_id && - !bpos_cmp(ck->key.pos, iter->pos) ? 0 : -1; + return ck->key.btree_id == path->btree_id && + !bpos_cmp(ck->key.pos, path->pos) ? 0 : -1; } __flatten -int bch2_btree_iter_traverse_cached(struct btree_trans *trans, struct btree_iter *iter) +int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path, + unsigned flags) { struct bch_fs *c = trans->c; struct bkey_cached *ck; int ret = 0; - BUG_ON(iter->level); + BUG_ON(path->level); - iter->l[1].b = NULL; + path->l[1].b = NULL; - if (bch2_btree_node_relock(trans, iter, 0)) { - ck = (void *) iter->l[0].b; + if (bch2_btree_node_relock(trans, path, 0)) { + ck = (void *) path->l[0].b; goto fill; } retry: - ck = bch2_btree_key_cache_find(c, iter->btree_id, iter->pos); + ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); if (!ck) { - if (iter->flags & BTREE_ITER_CACHED_NOCREATE) { - iter->l[0].b = NULL; + if (flags & BTREE_ITER_CACHED_NOCREATE) { + path->l[0].b = NULL; return 0; } ck = btree_key_cache_create(&c->btree_key_cache, - iter->btree_id, iter->pos); + path->btree_id, path->pos); ret = PTR_ERR_OR_ZERO(ck); if (ret) goto err; if (!ck) goto retry; - mark_btree_node_locked(iter, 0, SIX_LOCK_intent); - iter->locks_want = 1; + mark_btree_node_locked(path, 0, SIX_LOCK_intent); + path->locks_want = 1; } else { - enum six_lock_type lock_want = __btree_lock_want(iter, 0); + enum six_lock_type lock_want = __btree_lock_want(path, 0); - if (!btree_node_lock(trans, iter, (void *) ck, iter->pos, 0, lock_want, - bkey_cached_check_fn, iter, _THIS_IP_)) { + if (!btree_node_lock(trans, path, (void *) ck, path->pos, 0, + lock_want, + bkey_cached_check_fn, path, _THIS_IP_)) { if (!trans->restarted) goto retry; @@ -311,28 +313,27 @@ retry: goto err; } - if (ck->key.btree_id != iter->btree_id || - bpos_cmp(ck->key.pos, iter->pos)) { + if (ck->key.btree_id != path->btree_id || + bpos_cmp(ck->key.pos, path->pos)) { six_unlock_type(&ck->c.lock, lock_want); goto retry; } - mark_btree_node_locked(iter, 0, lock_want); + mark_btree_node_locked(path, 0, lock_want); } - iter->l[0].lock_seq = ck->c.lock.state.seq; - iter->l[0].b = (void *) ck; + path->l[0].lock_seq = ck->c.lock.state.seq; + path->l[0].b = (void *) ck; fill: - if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) { - if (!iter->locks_want && - !!__bch2_btree_iter_upgrade(trans, iter, 1)) { + if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) { + if (!path->locks_want && + !__bch2_btree_path_upgrade(trans, path, 1)) { trace_transaction_restart_ip(trans->ip, _THIS_IP_); - BUG_ON(!trans->restarted); - ret = -EINTR; + ret = btree_trans_restart(trans); goto err; } - ret = btree_key_cache_fill(trans, iter, ck); + ret = btree_key_cache_fill(trans, path, ck); if (ret) goto err; } @@ -340,22 +341,14 @@ fill: if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) set_bit(BKEY_CACHED_ACCESSED, &ck->flags); - iter->uptodate = BTREE_ITER_UPTODATE; - - if ((iter->flags & BTREE_ITER_INTENT) && - !bch2_btree_iter_upgrade(trans, iter, 1)) { - BUG_ON(!trans->restarted); - ret = -EINTR; - } - - BUG_ON(!ret && !btree_node_locked(iter, 0)); + path->uptodate = BTREE_ITER_UPTODATE; + BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0)); return ret; err: if (ret != -EINTR) { - btree_node_unlock(iter, 0); - iter->flags |= BTREE_ITER_ERROR; - iter->l[0].b = BTREE_ITER_NO_NODE_ERROR; + btree_node_unlock(path, 0); + path->l[0].b = BTREE_ITER_NO_NODE_ERROR; } return ret; } @@ -368,23 +361,23 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct journal *j = &c->journal; - struct btree_iter *c_iter = NULL, *b_iter = NULL; + struct btree_iter c_iter, b_iter; struct bkey_cached *ck = NULL; int ret; - b_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos, - BTREE_ITER_SLOTS| - BTREE_ITER_INTENT); - c_iter = bch2_trans_get_iter(trans, key.btree_id, key.pos, - BTREE_ITER_CACHED| - BTREE_ITER_CACHED_NOFILL| - BTREE_ITER_CACHED_NOCREATE| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(c_iter); + bch2_trans_iter_init(trans, &b_iter, key.btree_id, key.pos, + BTREE_ITER_SLOTS| + BTREE_ITER_INTENT); + bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos, + BTREE_ITER_CACHED| + BTREE_ITER_CACHED_NOFILL| + BTREE_ITER_CACHED_NOCREATE| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&c_iter); if (ret) goto out; - ck = (void *) c_iter->l[0].b; + ck = (void *) c_iter.path->l[0].b; if (!ck || (journal_seq && ck->journal.seq != journal_seq)) goto out; @@ -400,8 +393,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, * allocator/copygc depend on journal reclaim making progress, we need * to be using alloc reserves: * */ - ret = bch2_btree_iter_traverse(b_iter) ?: - bch2_trans_update(trans, b_iter, ck->k, + ret = bch2_btree_iter_traverse(&b_iter) ?: + bch2_trans_update(trans, &b_iter, ck->k, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| BTREE_TRIGGER_NORUN) ?: bch2_trans_commit(trans, NULL, NULL, @@ -423,7 +416,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, bch2_journal_pin_drop(j, &ck->journal); bch2_journal_preres_put(j, &ck->res); - BUG_ON(!btree_node_locked(c_iter, 0)); + BUG_ON(!btree_node_locked(c_iter.path, 0)); if (!evict) { if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { @@ -432,10 +425,10 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, } } else { evict: - BUG_ON(!btree_node_intent_locked(c_iter, 0)); + BUG_ON(!btree_node_intent_locked(c_iter.path, 0)); - mark_btree_node_unlocked(c_iter, 0); - c_iter->l[0].b = NULL; + mark_btree_node_unlocked(c_iter.path, 0); + c_iter.path->l[0].b = NULL; six_lock_write(&ck->c.lock, NULL, NULL); @@ -451,8 +444,8 @@ evict: mutex_unlock(&c->btree_key_cache.lock); } out: - bch2_trans_iter_put(trans, b_iter); - bch2_trans_iter_put(trans, c_iter); + bch2_trans_iter_exit(trans, &b_iter); + bch2_trans_iter_exit(trans, &c_iter); return ret; } @@ -503,11 +496,11 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans, } bool bch2_btree_insert_key_cached(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, struct bkey_i *insert) { struct bch_fs *c = trans->c; - struct bkey_cached *ck = (void *) iter->l[0].b; + struct bkey_cached *ck = (void *) path->l[0].b; bool kick_reclaim = false; BUG_ON(insert->u64s > ck->u64s); diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index d890632e4425..0768ef3ca776 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -26,10 +26,11 @@ int bch2_btree_key_cache_journal_flush(struct journal *, struct bkey_cached * bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos); -int bch2_btree_iter_traverse_cached(struct btree_trans *, struct btree_iter *); +int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *, + unsigned); bool bch2_btree_insert_key_cached(struct btree_trans *, - struct btree_iter *, struct bkey_i *); + struct btree_path *, struct bkey_i *); int bch2_btree_key_cache_flush(struct btree_trans *, enum btree_id, struct bpos); #ifdef CONFIG_BCACHEFS_DEBUG diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index b490e4808631..d05689180c63 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -20,7 +20,7 @@ enum btree_node_locked_type { BTREE_NODE_INTENT_LOCKED = SIX_LOCK_intent, }; -static inline int btree_node_locked_type(struct btree_iter *iter, +static inline int btree_node_locked_type(struct btree_path *path, unsigned level) { /* @@ -29,35 +29,35 @@ static inline int btree_node_locked_type(struct btree_iter *iter, * branches: */ return BTREE_NODE_UNLOCKED + - ((iter->nodes_locked >> level) & 1) + - ((iter->nodes_intent_locked >> level) & 1); + ((path->nodes_locked >> level) & 1) + + ((path->nodes_intent_locked >> level) & 1); } -static inline bool btree_node_intent_locked(struct btree_iter *iter, +static inline bool btree_node_intent_locked(struct btree_path *path, unsigned level) { - return btree_node_locked_type(iter, level) == BTREE_NODE_INTENT_LOCKED; + return btree_node_locked_type(path, level) == BTREE_NODE_INTENT_LOCKED; } -static inline bool btree_node_read_locked(struct btree_iter *iter, +static inline bool btree_node_read_locked(struct btree_path *path, unsigned level) { - return btree_node_locked_type(iter, level) == BTREE_NODE_READ_LOCKED; + return btree_node_locked_type(path, level) == BTREE_NODE_READ_LOCKED; } -static inline bool btree_node_locked(struct btree_iter *iter, unsigned level) +static inline bool btree_node_locked(struct btree_path *path, unsigned level) { - return iter->nodes_locked & (1 << level); + return path->nodes_locked & (1 << level); } -static inline void mark_btree_node_unlocked(struct btree_iter *iter, +static inline void mark_btree_node_unlocked(struct btree_path *path, unsigned level) { - iter->nodes_locked &= ~(1 << level); - iter->nodes_intent_locked &= ~(1 << level); + path->nodes_locked &= ~(1 << level); + path->nodes_intent_locked &= ~(1 << level); } -static inline void mark_btree_node_locked(struct btree_iter *iter, +static inline void mark_btree_node_locked(struct btree_path *path, unsigned level, enum six_lock_type type) { @@ -65,52 +65,52 @@ static inline void mark_btree_node_locked(struct btree_iter *iter, BUILD_BUG_ON(SIX_LOCK_read != 0); BUILD_BUG_ON(SIX_LOCK_intent != 1); - iter->nodes_locked |= 1 << level; - iter->nodes_intent_locked |= type << level; + path->nodes_locked |= 1 << level; + path->nodes_intent_locked |= type << level; } -static inline void mark_btree_node_intent_locked(struct btree_iter *iter, +static inline void mark_btree_node_intent_locked(struct btree_path *path, unsigned level) { - mark_btree_node_locked(iter, level, SIX_LOCK_intent); + mark_btree_node_locked(path, level, SIX_LOCK_intent); } -static inline enum six_lock_type __btree_lock_want(struct btree_iter *iter, int level) +static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level) { - return level < iter->locks_want + return level < path->locks_want ? SIX_LOCK_intent : SIX_LOCK_read; } static inline enum btree_node_locked_type -btree_lock_want(struct btree_iter *iter, int level) +btree_lock_want(struct btree_path *path, int level) { - if (level < iter->level) + if (level < path->level) return BTREE_NODE_UNLOCKED; - if (level < iter->locks_want) + if (level < path->locks_want) return BTREE_NODE_INTENT_LOCKED; - if (level == iter->level) + if (level == path->level) return BTREE_NODE_READ_LOCKED; return BTREE_NODE_UNLOCKED; } -static inline void btree_node_unlock(struct btree_iter *iter, unsigned level) +static inline void btree_node_unlock(struct btree_path *path, unsigned level) { - int lock_type = btree_node_locked_type(iter, level); + int lock_type = btree_node_locked_type(path, level); EBUG_ON(level >= BTREE_MAX_DEPTH); if (lock_type != BTREE_NODE_UNLOCKED) - six_unlock_type(&iter->l[level].b->c.lock, lock_type); - mark_btree_node_unlocked(iter, level); + six_unlock_type(&path->l[level].b->c.lock, lock_type); + mark_btree_node_unlocked(path, level); } -static inline void __bch2_btree_iter_unlock(struct btree_iter *iter) +static inline void __bch2_btree_path_unlock(struct btree_path *path) { - btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK); + btree_path_set_dirty(path, BTREE_ITER_NEED_RELOCK); - while (iter->nodes_locked) - btree_node_unlock(iter, __ffs(iter->nodes_locked)); + while (path->nodes_locked) + btree_node_unlock(path, __ffs(path->nodes_locked)); } static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type) @@ -154,11 +154,11 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans, struct btree *b, unsigned level, enum btree_node_locked_type want) { - struct btree_iter *iter; + struct btree_path *path; - trans_for_each_iter(trans, iter) - if (iter->l[level].b == b && - btree_node_locked_type(iter, level) >= want) { + trans_for_each_path(trans, path) + if (path->l[level].b == b && + btree_node_locked_type(path, level) >= want) { six_lock_increment(&b->c.lock, want); return true; } @@ -166,38 +166,39 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans, return false; } -bool __bch2_btree_node_lock(struct btree_trans *, struct btree_iter *, +bool __bch2_btree_node_lock(struct btree_trans *, struct btree_path *, struct btree *, struct bpos, unsigned, - enum six_lock_type, six_lock_should_sleep_fn, - void *, unsigned long); + enum six_lock_type, + six_lock_should_sleep_fn, void *, + unsigned long); static inline bool btree_node_lock(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, struct btree *b, struct bpos pos, unsigned level, enum six_lock_type type, six_lock_should_sleep_fn should_sleep_fn, void *p, unsigned long ip) { EBUG_ON(level >= BTREE_MAX_DEPTH); - EBUG_ON(!(trans->iters_linked & (1ULL << iter->idx))); + EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); return likely(six_trylock_type(&b->c.lock, type)) || btree_node_lock_increment(trans, b, level, type) || - __bch2_btree_node_lock(trans, iter, b, pos, level, type, + __bch2_btree_node_lock(trans, path, b, pos, level, type, should_sleep_fn, p, ip); } -bool __bch2_btree_node_relock(struct btree_trans *, struct btree_iter *, unsigned); +bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned); static inline bool bch2_btree_node_relock(struct btree_trans *trans, - struct btree_iter *iter, unsigned level) + struct btree_path *path, unsigned level) { - EBUG_ON(btree_node_locked(iter, level) && - btree_node_locked_type(iter, level) != - __btree_lock_want(iter, level)); + EBUG_ON(btree_node_locked(path, level) && + btree_node_locked_type(path, level) != + __btree_lock_want(path, level)); - return likely(btree_node_locked(iter, level)) || - __bch2_btree_node_relock(trans, iter, level); + return likely(btree_node_locked(path, level)) || + __bch2_btree_node_relock(trans, path, level); } /* @@ -205,32 +206,32 @@ static inline bool bch2_btree_node_relock(struct btree_trans *trans, * succeed: */ static inline void -bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_iter *iter, +bch2_btree_node_unlock_write_inlined(struct btree_trans *trans, struct btree_path *path, struct btree *b) { - struct btree_iter *linked; + struct btree_path *linked; - EBUG_ON(iter->l[b->c.level].b != b); - EBUG_ON(iter->l[b->c.level].lock_seq + 1 != b->c.lock.state.seq); + EBUG_ON(path->l[b->c.level].b != b); + EBUG_ON(path->l[b->c.level].lock_seq + 1 != b->c.lock.state.seq); - trans_for_each_iter_with_node(trans, b, linked) + trans_for_each_path_with_node(trans, b, linked) linked->l[b->c.level].lock_seq += 2; six_unlock_write(&b->c.lock); } void bch2_btree_node_unlock_write(struct btree_trans *, - struct btree_iter *, struct btree *); + struct btree_path *, struct btree *); void __bch2_btree_node_lock_write(struct btree_trans *, struct btree *); static inline void bch2_btree_node_lock_write(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, struct btree *b) { - EBUG_ON(iter->l[b->c.level].b != b); - EBUG_ON(iter->l[b->c.level].lock_seq != b->c.lock.state.seq); - EBUG_ON(!btree_node_intent_locked(iter, b->c.level)); + EBUG_ON(path->l[b->c.level].b != b); + EBUG_ON(path->l[b->c.level].lock_seq != b->c.lock.state.seq); + EBUG_ON(!btree_node_intent_locked(path, b->c.level)); if (unlikely(!six_trylock_write(&b->c.lock))) __bch2_btree_node_lock_write(trans, b); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 56dc5fbb7c91..b7cded2095ff 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -210,7 +210,7 @@ struct btree_node_iter { #define __BTREE_ITER_ALL_SNAPSHOTS (1 << 11) #define BTREE_ITER_ALL_SNAPSHOTS (1 << 12) -enum btree_iter_uptodate { +enum btree_path_uptodate { BTREE_ITER_UPTODATE = 0, BTREE_ITER_NEED_RELOCK = 1, BTREE_ITER_NEED_TRAVERSE = 2, @@ -225,51 +225,66 @@ enum btree_iter_uptodate { #define BTREE_ITER_NO_NODE_ERROR ((struct btree *) 7) #define BTREE_ITER_NO_NODE_CACHED ((struct btree *) 8) -/* - * @pos - iterator's current position - * @level - current btree depth - * @locks_want - btree level below which we start taking intent locks - * @nodes_locked - bitmask indicating which nodes in @nodes are locked - * @nodes_intent_locked - bitmask indicating which locks are intent locks - */ -struct btree_iter { - struct btree_trans *trans; - unsigned long ip_allocated; - +struct btree_path { u8 idx; - u8 child_idx; u8 sorted_idx; + u8 ref; + u8 intent_ref; /* btree_iter_copy starts here: */ - u16 flags; - - /* When we're filtering by snapshot, the snapshot ID we're looking for: */ - unsigned snapshot; - struct bpos pos; - struct bpos real_pos; enum btree_id btree_id:4; bool cached:1; - enum btree_iter_uptodate uptodate:2; + bool preserve:1; + enum btree_path_uptodate uptodate:2; /* - * True if we've returned a key (and thus are expected to keep it - * locked), false after set_pos - for avoiding spurious transaction - * restarts in bch2_trans_relock(): + * When true, failing to relock this path will cause the transaction to + * restart: */ bool should_be_locked:1; - unsigned level:4, - min_depth:4, + unsigned level:3, locks_want:4, nodes_locked:4, nodes_intent_locked:4; - struct btree_iter_level { + struct btree_path_level { struct btree *b; struct btree_node_iter iter; u32 lock_seq; } l[BTREE_MAX_DEPTH]; +#ifdef CONFIG_BCACHEFS_DEBUG + unsigned long ip_allocated; +#endif +}; +static inline struct btree_path_level *path_l(struct btree_path *path) +{ + return path->l + path->level; +} + +/* + * @pos - iterator's current position + * @level - current btree depth + * @locks_want - btree level below which we start taking intent locks + * @nodes_locked - bitmask indicating which nodes in @nodes are locked + * @nodes_intent_locked - bitmask indicating which locks are intent locks + */ +struct btree_iter { + struct btree_trans *trans; + struct btree_path *path; + + enum btree_id btree_id:4; + unsigned min_depth:4; + + /* btree_iter_copy starts here: */ + u16 flags; + + /* When we're filtering by snapshot, the snapshot ID we're looking for: */ + unsigned snapshot; + + struct bpos pos; + struct bpos pos_after_commit; /* * Current unpacked key - so that bch2_btree_iter_next()/ * bch2_btree_iter_next_slot() can correctly advance pos. @@ -277,11 +292,6 @@ struct btree_iter { struct bkey k; }; -static inline struct btree_iter_level *iter_l(struct btree_iter *iter) -{ - return iter->l + iter->level; -} - struct btree_key_cache { struct mutex lock; struct rhashtable table; @@ -329,7 +339,7 @@ struct btree_insert_entry { bool cached:1; bool trans_triggers_run:1; struct bkey_i *k; - struct btree_iter *iter; + struct btree_path *path; unsigned long ip_allocated; }; @@ -354,7 +364,7 @@ struct btree_trans { #ifdef CONFIG_BCACHEFS_DEBUG struct list_head list; struct btree *locking; - unsigned locking_iter_idx; + unsigned locking_path_idx; struct bpos locking_pos; u8 locking_btree_id; u8 locking_level; @@ -369,23 +379,21 @@ struct btree_trans { bool error:1; bool in_traverse_all:1; bool restarted:1; - bool iters_sorted:1; + bool paths_sorted:1; /* * For when bch2_trans_update notices we'll be splitting a compressed * extent: */ unsigned extra_journal_res; - u64 iters_linked; - u64 iters_live; - u64 iters_touched; + u64 paths_allocated; unsigned mem_top; unsigned mem_bytes; void *mem; u8 sorted[BTREE_ITER_MAX + 8]; - struct btree_iter *iters; + struct btree_path *paths; struct btree_insert_entry *updates; /* update path: */ @@ -589,16 +597,6 @@ static inline bool btree_node_is_extents(struct btree *b) return btree_node_type_is_extents(btree_node_type(b)); } -static inline enum btree_node_type btree_iter_key_type(struct btree_iter *iter) -{ - return __btree_node_type(iter->level, iter->btree_id); -} - -static inline bool btree_iter_is_extents(struct btree_iter *iter) -{ - return btree_node_type_is_extents(btree_iter_key_type(iter)); -} - #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ ((1U << BKEY_TYPE_extents)| \ (1U << BKEY_TYPE_inodes)| \ diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 5707baf10262..058d283a105c 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -8,9 +8,9 @@ struct bch_fs; struct btree; -void bch2_btree_node_lock_for_insert(struct btree_trans *, struct btree_iter *, +void bch2_btree_node_lock_for_insert(struct btree_trans *, struct btree_path *, struct btree *); -bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_iter *, +bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *, struct btree *, struct btree_node_iter *, struct bkey_i *); void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64); @@ -135,4 +135,13 @@ static inline int bch2_trans_commit(struct btree_trans *trans, (_i) < (_trans)->updates + (_trans)->nr_updates; \ (_i)++) +struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *); + +static inline struct bkey_i *btree_trans_peek_updates(struct btree_iter *iter) +{ + return iter->flags & BTREE_ITER_WITH_UPDATES + ? __bch2_btree_trans_peek_updates(iter) + : NULL; +} + #endif /* _BCACHEFS_BTREE_UPDATE_H */ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 652f08dea804..6dcce175fd8b 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -23,7 +23,7 @@ #include static void bch2_btree_insert_node(struct btree_update *, struct btree_trans *, - struct btree_iter *, struct btree *, + struct btree_path *, struct btree *, struct keylist *, unsigned); static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); @@ -162,10 +162,10 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, struct btree *b) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_path *path; - trans_for_each_iter(trans, iter) - BUG_ON(iter->l[b->c.level].b == b); + trans_for_each_path(trans, path) + BUG_ON(path->l[b->c.level].b == b); six_lock_write(&b->c.lock, NULL, NULL); @@ -914,7 +914,7 @@ static void bch2_btree_update_done(struct btree_update *as) } static struct btree_update * -bch2_btree_update_start(struct btree_trans *trans, struct btree_iter *iter, +bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, unsigned level, unsigned nr_nodes, unsigned flags) { struct bch_fs *c = trans->c; @@ -925,7 +925,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_iter *iter, int journal_flags = 0; int ret = 0; - BUG_ON(!iter->should_be_locked); + BUG_ON(!path->should_be_locked); if (flags & BTREE_INSERT_JOURNAL_RESERVED) journal_flags |= JOURNAL_RES_GET_RESERVED; @@ -937,11 +937,11 @@ retry: * XXX: figure out how far we might need to split, * instead of locking/reserving all the way to the root: */ - if (!bch2_btree_iter_upgrade(trans, iter, U8_MAX)) { + if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) { trace_trans_restart_iter_upgrade(trans->ip, _RET_IP_, - iter->btree_id, - &iter->real_pos); - return ERR_PTR(-EINTR); + path->btree_id, &path->pos); + ret = btree_trans_restart(trans); + return ERR_PTR(ret); } if (flags & BTREE_INSERT_GC_LOCK_HELD) @@ -961,7 +961,7 @@ retry: as->c = c; as->mode = BTREE_INTERIOR_NO_UPDATE; as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD); - as->btree_id = iter->btree_id; + as->btree_id = path->btree_id; INIT_LIST_HEAD(&as->list); INIT_LIST_HEAD(&as->unwritten_list); INIT_LIST_HEAD(&as->write_blocked_list); @@ -1081,7 +1081,7 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b) */ static void bch2_btree_set_root(struct btree_update *as, struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, struct btree *b) { struct bch_fs *c = as->c; @@ -1097,7 +1097,7 @@ static void bch2_btree_set_root(struct btree_update *as, * Ensure no one is using the old root while we switch to the * new root: */ - bch2_btree_node_lock_write(trans, iter, old); + bch2_btree_node_lock_write(trans, path, old); bch2_btree_set_root_inmem(c, b); @@ -1110,14 +1110,14 @@ static void bch2_btree_set_root(struct btree_update *as, * an intent lock on the new root, and any updates that would * depend on the new root would have to update the new root. */ - bch2_btree_node_unlock_write(trans, iter, old); + bch2_btree_node_unlock_write(trans, path, old); } /* Interior node updates: */ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, struct btree *b, struct btree_node_iter *node_iter, struct bkey_i *insert) @@ -1152,7 +1152,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, bkey_iter_pos_cmp(b, k, &insert->k.p) < 0) bch2_btree_node_iter_advance(node_iter, b); - bch2_btree_bset_insert_key(trans, iter, b, node_iter, insert); + bch2_btree_bset_insert_key(trans, path, b, node_iter, insert); set_btree_node_dirty(c, b); set_btree_node_need_write(b); } @@ -1160,7 +1160,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, static void __bch2_btree_insert_keys_interior(struct btree_update *as, struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, struct btree *b, struct btree_node_iter node_iter, struct keylist *keys) @@ -1175,7 +1175,7 @@ __bch2_btree_insert_keys_interior(struct btree_update *as, ; while (!bch2_keylist_empty(keys)) { - bch2_insert_fixup_btree_ptr(as, trans, iter, b, + bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, bch2_keylist_front(keys)); bch2_keylist_pop_front(keys); } @@ -1186,8 +1186,7 @@ __bch2_btree_insert_keys_interior(struct btree_update *as, * node) */ static struct btree *__btree_split_node(struct btree_update *as, - struct btree *n1, - struct btree_iter *iter) + struct btree *n1) { struct bkey_format_state s; size_t nr_packed = 0, nr_unpacked = 0; @@ -1304,7 +1303,7 @@ static struct btree *__btree_split_node(struct btree_update *as, */ static void btree_split_insert_keys(struct btree_update *as, struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, struct btree *b, struct keylist *keys) { @@ -1315,7 +1314,7 @@ static void btree_split_insert_keys(struct btree_update *as, bch2_btree_node_iter_init(&node_iter, b, &k->k.p); - __bch2_btree_insert_keys_interior(as, trans, iter, b, node_iter, keys); + __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); /* * We can't tolerate whiteouts here - with whiteouts there can be @@ -1345,18 +1344,17 @@ static void btree_split_insert_keys(struct btree_update *as, btree_node_interior_verify(as->c, b); } -static void btree_split(struct btree_update *as, - struct btree_trans *trans, struct btree_iter *iter, - struct btree *b, struct keylist *keys, - unsigned flags) +static void btree_split(struct btree_update *as, struct btree_trans *trans, + struct btree_path *path, struct btree *b, + struct keylist *keys, unsigned flags) { struct bch_fs *c = as->c; - struct btree *parent = btree_node_parent(iter, b); + struct btree *parent = btree_node_parent(path, b); struct btree *n1, *n2 = NULL, *n3 = NULL; u64 start_time = local_clock(); BUG_ON(!parent && (b != btree_node_root(c, b))); - BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level)); + BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level)); bch2_btree_interior_update_will_free_node(as, b); @@ -1364,12 +1362,12 @@ static void btree_split(struct btree_update *as, bch2_btree_update_add_new_node(as, n1); if (keys) - btree_split_insert_keys(as, trans, iter, n1, keys); + btree_split_insert_keys(as, trans, path, n1, keys); if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) { trace_btree_split(c, b); - n2 = __btree_split_node(as, n1, iter); + n2 = __btree_split_node(as, n1); bch2_btree_build_aux_trees(n2); bch2_btree_build_aux_trees(n1); @@ -1394,7 +1392,7 @@ static void btree_split(struct btree_update *as, n3->sib_u64s[0] = U16_MAX; n3->sib_u64s[1] = U16_MAX; - btree_split_insert_keys(as, trans, iter, n3, &as->parent_keys); + btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); bch2_btree_node_write(c, n3, SIX_LOCK_intent); } @@ -1414,12 +1412,12 @@ static void btree_split(struct btree_update *as, if (parent) { /* Split a non root node */ - bch2_btree_insert_node(as, trans, iter, parent, &as->parent_keys, flags); + bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); } else if (n3) { - bch2_btree_set_root(as, trans, iter, n3); + bch2_btree_set_root(as, trans, path, n3); } else { /* Root filled up but didn't need to be split */ - bch2_btree_set_root(as, trans, iter, n1); + bch2_btree_set_root(as, trans, path, n1); } bch2_btree_update_get_open_buckets(as, n1); @@ -1428,7 +1426,7 @@ static void btree_split(struct btree_update *as, if (n3) bch2_btree_update_get_open_buckets(as, n3); - /* Successful split, update the iterator to point to the new nodes: */ + /* Successful split, update the path to point to the new nodes: */ six_lock_increment(&b->c.lock, SIX_LOCK_intent); bch2_trans_node_drop(trans, b); @@ -1461,21 +1459,21 @@ static void btree_split(struct btree_update *as, static void bch2_btree_insert_keys_interior(struct btree_update *as, struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, struct btree *b, struct keylist *keys) { - struct btree_iter *linked; + struct btree_path *linked; - __bch2_btree_insert_keys_interior(as, trans, iter, b, - iter->l[b->c.level].iter, keys); + __bch2_btree_insert_keys_interior(as, trans, path, b, + path->l[b->c.level].iter, keys); btree_update_updated_node(as, b); - trans_for_each_iter_with_node(trans, b, linked) + trans_for_each_path_with_node(trans, b, linked) bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); - bch2_trans_verify_iters(trans, b); + bch2_trans_verify_paths(trans); } /** @@ -1490,10 +1488,9 @@ bch2_btree_insert_keys_interior(struct btree_update *as, * If a split occurred, this function will return early. This can only happen * for leaf nodes -- inserts into interior nodes have to be atomic. */ -static void bch2_btree_insert_node(struct btree_update *as, - struct btree_trans *trans, struct btree_iter *iter, - struct btree *b, struct keylist *keys, - unsigned flags) +static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans, + struct btree_path *path, struct btree *b, + struct keylist *keys, unsigned flags) { struct bch_fs *c = as->c; int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s); @@ -1501,21 +1498,21 @@ static void bch2_btree_insert_node(struct btree_update *as, int live_u64s_added, u64s_added; lockdep_assert_held(&c->gc_lock); - BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level)); + BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level)); BUG_ON(!b->c.level); BUG_ON(!as || as->b); bch2_verify_keylist_sorted(keys); - bch2_btree_node_lock_for_insert(trans, iter, b); + bch2_btree_node_lock_for_insert(trans, path, b); if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) { - bch2_btree_node_unlock_write(trans, iter, b); + bch2_btree_node_unlock_write(trans, path, b); goto split; } btree_node_interior_verify(c, b); - bch2_btree_insert_keys_interior(as, trans, iter, b, keys); + bch2_btree_insert_keys_interior(as, trans, path, b, keys); live_u64s_added = (int) b->nr.live_u64s - old_live_u64s; u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s; @@ -1529,46 +1526,46 @@ static void bch2_btree_insert_node(struct btree_update *as, bch2_maybe_compact_whiteouts(c, b)) bch2_trans_node_reinit_iter(trans, b); - bch2_btree_node_unlock_write(trans, iter, b); + bch2_btree_node_unlock_write(trans, path, b); btree_node_interior_verify(c, b); return; split: - btree_split(as, trans, iter, b, keys, flags); + btree_split(as, trans, path, b, keys, flags); } int bch2_btree_split_leaf(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, unsigned flags) { struct bch_fs *c = trans->c; - struct btree *b = iter_l(iter)->b; + struct btree *b = path_l(path)->b; struct btree_update *as; unsigned l; int ret = 0; - as = bch2_btree_update_start(trans, iter, iter->level, + as = bch2_btree_update_start(trans, path, path->level, btree_update_reserve_required(c, b), flags); if (IS_ERR(as)) return PTR_ERR(as); - btree_split(as, trans, iter, b, NULL, flags); + btree_split(as, trans, path, b, NULL, flags); bch2_btree_update_done(as); - for (l = iter->level + 1; btree_iter_node(iter, l) && !ret; l++) - ret = bch2_foreground_maybe_merge(trans, iter, l, flags); + for (l = path->level + 1; btree_path_node(path, l) && !ret; l++) + ret = bch2_foreground_maybe_merge(trans, path, l, flags); return ret; } int __bch2_foreground_maybe_merge(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, unsigned level, unsigned flags, enum btree_node_sibling sib) { struct bch_fs *c = trans->c; - struct btree_iter *sib_iter = NULL; + struct btree_path *sib_path = NULL; struct btree_update *as; struct bkey_format_state new_s; struct bkey_format new_f; @@ -1579,14 +1576,14 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, int ret = 0, ret2 = 0; retry: - ret = bch2_btree_iter_traverse(iter); + ret = bch2_btree_path_traverse(trans, path, false); if (ret) return ret; - BUG_ON(!iter->should_be_locked); - BUG_ON(!btree_node_locked(iter, level)); + BUG_ON(!path->should_be_locked); + BUG_ON(!btree_node_locked(path, level)); - b = iter->l[level].b; + b = path->l[level].b; if ((sib == btree_prev_sib && !bpos_cmp(b->data->min_key, POS_MIN)) || (sib == btree_next_sib && !bpos_cmp(b->data->max_key, SPOS_MAX))) { @@ -1598,17 +1595,18 @@ retry: ? bpos_predecessor(b->data->min_key) : bpos_successor(b->data->max_key); - sib_iter = bch2_trans_get_node_iter(trans, iter->btree_id, - sib_pos, U8_MAX, level, - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(sib_iter); + sib_path = bch2_path_get(trans, false, path->btree_id, + sib_pos, U8_MAX, level, true); + ret = bch2_btree_path_traverse(trans, sib_path, false); if (ret) goto err; - m = sib_iter->l[level].b; + sib_path->should_be_locked = true; + + m = sib_path->l[level].b; - if (btree_node_parent(iter, b) != - btree_node_parent(sib_iter, m)) { + if (btree_node_parent(path, b) != + btree_node_parent(sib_path, m)) { b->sib_u64s[sib] = U16_MAX; goto out; } @@ -1659,8 +1657,8 @@ retry: if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold) goto out; - parent = btree_node_parent(iter, b); - as = bch2_btree_update_start(trans, iter, level, + parent = btree_node_parent(path, b); + as = bch2_btree_update_start(trans, path, level, btree_update_reserve_required(c, parent) + 1, flags| BTREE_INSERT_NOFAIL| @@ -1696,7 +1694,7 @@ retry: bch2_keylist_add(&as->parent_keys, &delete); bch2_keylist_add(&as->parent_keys, &n->key); - bch2_btree_insert_node(as, trans, iter, parent, &as->parent_keys, flags); + bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags); bch2_btree_update_get_open_buckets(as, n); @@ -1707,7 +1705,7 @@ retry: bch2_trans_node_add(trans, n); - bch2_trans_verify_iters(trans, n); + bch2_trans_verify_paths(trans); bch2_btree_node_free_inmem(trans, b); bch2_btree_node_free_inmem(trans, m); @@ -1717,7 +1715,8 @@ retry: bch2_btree_update_done(as); out: bch2_trans_verify_locks(trans); - bch2_trans_iter_free(trans, sib_iter); + if (sib_path) + bch2_path_put(trans, sib_path, true); /* * Don't downgrade locks here: we're called after successful insert, @@ -1730,8 +1729,9 @@ out: */ return ret ?: ret2; err: - bch2_trans_iter_put(trans, sib_iter); - sib_iter = NULL; + if (sib_path) + bch2_path_put(trans, sib_path, true); + sib_path = NULL; if (ret == -EINTR && bch2_trans_relock(trans)) goto retry; @@ -1761,8 +1761,8 @@ retry: if (!b || b->data->keys.seq != seq) goto out; - parent = btree_node_parent(iter, b); - as = bch2_btree_update_start(trans, iter, b->c.level, + parent = btree_node_parent(iter->path, b); + as = bch2_btree_update_start(trans, iter->path, b->c.level, (parent ? btree_update_reserve_required(c, parent) : 0) + 1, @@ -1789,10 +1789,10 @@ retry: if (parent) { bch2_keylist_add(&as->parent_keys, &n->key); - bch2_btree_insert_node(as, trans, iter, parent, + bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys, flags); } else { - bch2_btree_set_root(as, trans, iter, n); + bch2_btree_set_root(as, trans, iter->path, n); } bch2_btree_update_get_open_buckets(as, n); @@ -1805,7 +1805,7 @@ retry: bch2_btree_update_done(as); out: - bch2_btree_iter_downgrade(iter); + bch2_btree_path_downgrade(iter->path); return ret; } @@ -1824,13 +1824,13 @@ void async_btree_node_rewrite_work(struct work_struct *work) container_of(work, struct async_btree_rewrite, work); struct bch_fs *c = a->c; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_node_iter(&trans, a->btree_id, a->pos, + bch2_trans_node_iter_init(&trans, &iter, a->btree_id, a->pos, BTREE_MAX_DEPTH, a->level, 0); - bch2_btree_node_rewrite(&trans, iter, a->seq, 0); - bch2_trans_iter_put(&trans, iter); + bch2_btree_node_rewrite(&trans, &iter, a->seq, 0); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); percpu_ref_put(&c->writes); kfree(a); @@ -1869,7 +1869,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bool skip_triggers) { struct bch_fs *c = trans->c; - struct btree_iter *iter2 = NULL; + struct btree_iter iter2 = { NULL }; struct btree *parent; u64 journal_entries[BKEY_BTREE_PTR_U64s_MAX]; int ret; @@ -1897,19 +1897,22 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, BUG_ON(ret); } - parent = btree_node_parent(iter, b); + parent = btree_node_parent(iter->path, b); if (parent) { - iter2 = bch2_trans_copy_iter(trans, iter); + bch2_trans_copy_iter(&iter2, iter); - BUG_ON(iter2->level != b->c.level); - BUG_ON(bpos_cmp(iter2->pos, new_key->k.p)); + iter2.path = bch2_btree_path_make_mut(trans, iter2.path, + iter2.flags & BTREE_ITER_INTENT); - btree_node_unlock(iter2, iter2->level); - iter2->l[iter2->level].b = BTREE_ITER_NO_NODE_UP; - iter2->level++; + BUG_ON(iter2.path->level != b->c.level); + BUG_ON(bpos_cmp(iter2.path->pos, new_key->k.p)); - ret = bch2_btree_iter_traverse(iter2) ?: - bch2_trans_update(trans, iter2, new_key, BTREE_TRIGGER_NORUN); + btree_node_unlock(iter2.path, iter2.path->level); + path_l(iter2.path)->b = BTREE_ITER_NO_NODE_UP; + iter2.path->level++; + + ret = bch2_btree_iter_traverse(&iter2) ?: + bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_NORUN); if (ret) goto err; } else { @@ -1931,7 +1934,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, if (ret) goto err; - bch2_btree_node_lock_write(trans, iter, b); + bch2_btree_node_lock_write(trans, iter->path, b); if (new_hash) { mutex_lock(&c->btree_cache.lock); @@ -1946,9 +1949,9 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bkey_copy(&b->key, new_key); } - bch2_btree_node_unlock_write(trans, iter, b); + bch2_btree_node_unlock_write(trans, iter->path, b); out: - bch2_trans_iter_put(trans, iter2); + bch2_trans_iter_exit(trans, &iter2); return ret; err: if (new_hash) { @@ -2006,18 +2009,18 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, struct btree *b, struct bkey_i *new_key, bool skip_triggers) { - struct btree_iter *iter; + struct btree_iter iter; int ret; - iter = bch2_trans_get_node_iter(trans, b->c.btree_id, b->key.k.p, - BTREE_MAX_DEPTH, b->c.level, - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(iter); + bch2_trans_node_iter_init(trans, &iter, b->c.btree_id, b->key.k.p, + BTREE_MAX_DEPTH, b->c.level, + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&iter); if (ret) goto out; /* has node been freed? */ - if (iter->l[b->c.level].b != b) { + if (iter.path->l[b->c.level].b != b) { /* node has been freed: */ BUG_ON(!btree_node_dying(b)); goto out; @@ -2025,9 +2028,9 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, BUG_ON(!btree_node_hashed(b)); - ret = bch2_btree_node_update_key(trans, iter, b, new_key, skip_triggers); + ret = bch2_btree_node_update_key(trans, &iter, b, new_key, skip_triggers); out: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 13b3a1bf0f4f..c06cfcc66db7 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -117,39 +117,39 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *, struct btree *, struct bkey_format); -int bch2_btree_split_leaf(struct btree_trans *, struct btree_iter *, unsigned); +int bch2_btree_split_leaf(struct btree_trans *, struct btree_path *, unsigned); -int __bch2_foreground_maybe_merge(struct btree_trans *, struct btree_iter *, +int __bch2_foreground_maybe_merge(struct btree_trans *, struct btree_path *, unsigned, unsigned, enum btree_node_sibling); static inline int bch2_foreground_maybe_merge_sibling(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, unsigned level, unsigned flags, enum btree_node_sibling sib) { struct btree *b; - if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE) + if (path->uptodate >= BTREE_ITER_NEED_TRAVERSE) return 0; - if (!bch2_btree_node_relock(trans, iter, level)) + if (!bch2_btree_node_relock(trans, path, level)) return 0; - b = iter->l[level].b; + b = path->l[level].b; if (b->sib_u64s[sib] > trans->c->btree_foreground_merge_threshold) return 0; - return __bch2_foreground_maybe_merge(trans, iter, level, flags, sib); + return __bch2_foreground_maybe_merge(trans, path, level, flags, sib); } static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, unsigned level, unsigned flags) { - return bch2_foreground_maybe_merge_sibling(trans, iter, level, flags, + return bch2_foreground_maybe_merge_sibling(trans, path, level, flags, btree_prev_sib) ?: - bch2_foreground_maybe_merge_sibling(trans, iter, level, flags, + bch2_foreground_maybe_merge_sibling(trans, path, level, flags, btree_next_sib); } diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 5e57ff5a5ceb..4fb5a5666e20 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -29,9 +29,9 @@ static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, bpos_cmp(l->k->k.p, r->k->k.p); } -static inline struct btree_iter_level *insert_l(struct btree_insert_entry *i) +static inline struct btree_path_level *insert_l(struct btree_insert_entry *i) { - return i->iter->l + i->level; + return i->path->l + i->level; } static inline bool same_leaf_as_prev(struct btree_trans *trans, @@ -49,14 +49,14 @@ static inline bool same_leaf_as_next(struct btree_trans *trans, } inline void bch2_btree_node_lock_for_insert(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, struct btree *b) { struct bch_fs *c = trans->c; - bch2_btree_node_lock_write(trans, iter, b); + bch2_btree_node_lock_write(trans, path, b); - if (iter->cached) + if (path->cached) return; if (unlikely(btree_node_just_written(b)) && @@ -75,7 +75,7 @@ inline void bch2_btree_node_lock_for_insert(struct btree_trans *trans, /* Handle overwrites and do insert, for non extents: */ bool bch2_btree_bset_insert_key(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, struct btree *b, struct btree_node_iter *node_iter, struct bkey_i *insert) @@ -116,7 +116,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans, bch2_bset_delete(b, k, clobber_u64s); goto fix_iter; } else { - bch2_btree_iter_fix_key_modified(trans, b, k); + bch2_btree_path_fix_key_modified(trans, b, k); } return true; @@ -134,7 +134,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans, clobber_u64s = k->u64s; goto overwrite; } else { - bch2_btree_iter_fix_key_modified(trans, b, k); + bch2_btree_path_fix_key_modified(trans, b, k); } } @@ -144,7 +144,7 @@ overwrite: new_u64s = k->u64s; fix_iter: if (clobber_u64s != new_u64s) - bch2_btree_node_iter_fix(trans, iter, b, node_iter, k, + bch2_btree_node_iter_fix(trans, path, b, node_iter, k, clobber_u64s, new_u64s); return true; } @@ -201,7 +201,7 @@ static bool btree_insert_key_leaf(struct btree_trans *trans, EBUG_ON(!insert->level && !test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags)); - if (unlikely(!bch2_btree_bset_insert_key(trans, insert->iter, b, + if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b, &insert_l(insert)->iter, insert->k))) return false; @@ -236,9 +236,10 @@ static bool btree_insert_key_leaf(struct btree_trans *trans, static inline void btree_insert_entry_checks(struct btree_trans *trans, struct btree_insert_entry *i) { - BUG_ON(bpos_cmp(i->k->k.p, i->iter->real_pos)); - BUG_ON(i->level != i->iter->level); - BUG_ON(i->btree_id != i->iter->btree_id); + BUG_ON(bpos_cmp(i->k->k.p, i->path->pos)); + BUG_ON(i->cached != i->path->cached); + BUG_ON(i->level != i->path->level); + BUG_ON(i->btree_id != i->path->btree_id); } static noinline int @@ -293,14 +294,14 @@ btree_key_can_insert(struct btree_trans *trans, static enum btree_insert_ret btree_key_can_insert_cached(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, unsigned u64s) { - struct bkey_cached *ck = (void *) iter->l[0].b; + struct bkey_cached *ck = (void *) path->l[0].b; unsigned new_u64s; struct bkey_i *new_k; - EBUG_ON(iter->level); + EBUG_ON(path->level); if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) && bch2_btree_key_cache_must_wait(trans->c) && @@ -340,7 +341,7 @@ static inline void do_btree_insert_one(struct btree_trans *trans, did_work = !i->cached ? btree_insert_key_leaf(trans, i) - : bch2_btree_insert_key_cached(trans, i->iter, i->k); + : bch2_btree_insert_key_cached(trans, i->path, i->k); if (!did_work) return; @@ -366,11 +367,12 @@ static noinline void bch2_trans_mark_gc(struct btree_trans *trans) trans_for_each_update(trans, i) { /* * XXX: synchronization of cached update triggers with gc + * XXX: synchronization of interior node updates with gc */ BUG_ON(i->cached || i->level); if (gc_visited(c, gc_pos_btree_node(insert_l(i)->b))) - bch2_mark_update(trans, i->iter, i->k, + bch2_mark_update(trans, i->path, i->k, i->flags|BTREE_TRIGGER_GC); } } @@ -417,7 +419,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, u64s += i->k->k.u64s; ret = !i->cached ? btree_key_can_insert(trans, insert_l(i)->b, u64s) - : btree_key_can_insert_cached(trans, i->iter, u64s); + : btree_key_can_insert_cached(trans, i->path, u64s); if (ret) { *stopped_at = i; return ret; @@ -476,7 +478,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, trans_for_each_update(trans, i) if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) - bch2_mark_update(trans, i->iter, i->k, i->flags); + bch2_mark_update(trans, i->path, i->k, i->flags); if (marking && trans->fs_usage_deltas) bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas); @@ -503,11 +505,13 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_insert_entry *i; - struct btree_iter *iter; + struct btree_path *path; struct bkey_s_c old; int ret, u64s_delta = 0; trans_for_each_update(trans, i) { + struct bkey u; + /* * peek_slot() doesn't yet work on iterators that point to * interior nodes: @@ -515,7 +519,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, if (i->cached || i->level) continue; - old = bch2_btree_iter_peek_slot(i->iter); + old = bch2_btree_path_peek_slot(i->path, &u); ret = bkey_err(old); if (unlikely(ret)) return ret; @@ -525,7 +529,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, if (!same_leaf_as_next(trans, i)) { if (u64s_delta <= 0) { - ret = bch2_foreground_maybe_merge(trans, i->iter, + ret = bch2_foreground_maybe_merge(trans, i->path, i->level, trans->flags); if (unlikely(ret)) return ret; @@ -536,7 +540,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, } trans_for_each_update(trans, i) - BUG_ON(!btree_node_intent_locked(i->iter, i->level)); + BUG_ON(!btree_node_intent_locked(i->path, i->level)); ret = bch2_journal_preres_get(&c->journal, &trans->journal_preres, trans->journal_preres_u64s, @@ -560,14 +564,12 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, * or anything else that might call bch2_trans_relock(), since that * would just retake the read locks: */ - trans_for_each_iter(trans, iter) - if (iter->nodes_locked != iter->nodes_intent_locked && - !bch2_btree_iter_upgrade(trans, iter, 1)) { + trans_for_each_path(trans, path) + if (path->nodes_locked != path->nodes_intent_locked && + !bch2_btree_path_upgrade(trans, path, path->level + 1)) { trace_trans_restart_upgrade(trans->ip, trace_ip, - iter->btree_id, - &iter->real_pos); - trans->restarted = true; - return -EINTR; + path->btree_id, &path->pos); + return btree_trans_restart(trans); } trans_for_each_update(trans, i) { @@ -581,6 +583,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, buf, (void *) trans->ip, (void *) i->ip_allocated, invalid); bch2_fatal_error(c); + return -EINVAL; } btree_insert_entry_checks(trans, i); } @@ -588,14 +591,14 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, trans_for_each_update(trans, i) if (!same_leaf_as_prev(trans, i)) - bch2_btree_node_lock_for_insert(trans, i->iter, + bch2_btree_node_lock_for_insert(trans, i->path, insert_l(i)->b); ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip); trans_for_each_update(trans, i) if (!same_leaf_as_prev(trans, i)) - bch2_btree_node_unlock_write_inlined(trans, i->iter, + bch2_btree_node_unlock_write_inlined(trans, i->path, insert_l(i)->b); if (!ret && trans->journal_pin) @@ -635,13 +638,13 @@ int bch2_trans_commit_error(struct btree_trans *trans, switch (ret) { case BTREE_INSERT_BTREE_NODE_FULL: - ret = bch2_btree_split_leaf(trans, i->iter, trans->flags); + ret = bch2_btree_split_leaf(trans, i->path, trans->flags); if (!ret) return 0; if (ret == -EINTR) trace_trans_restart_btree_node_split(trans->ip, trace_ip, - i->btree_id, &i->iter->real_pos); + i->btree_id, &i->path->pos); break; case BTREE_INSERT_NEED_MARK_REPLICAS: bch2_trans_unlock(trans); @@ -749,6 +752,10 @@ int __bch2_trans_commit(struct btree_trans *trans) } #ifdef CONFIG_BCACHEFS_DEBUG + /* + * if BTREE_TRIGGER_NORUN is set, it means we're probably being called + * from the key cache flush code: + */ trans_for_each_update(trans, i) if (!i->cached && !(i->flags & BTREE_TRIGGER_NORUN)) @@ -769,13 +776,12 @@ int __bch2_trans_commit(struct btree_trans *trans) i->trans_triggers_run = true; trans_trigger_run = true; - ret = bch2_trans_mark_update(trans, i->iter, + ret = bch2_trans_mark_update(trans, i->path, i->k, i->flags); if (unlikely(ret)) { if (ret == -EINTR) trace_trans_restart_mark(trans->ip, _RET_IP_, - i->btree_id, - &i->iter->pos); + i->btree_id, &i->path->pos); goto out; } } @@ -783,18 +789,16 @@ int __bch2_trans_commit(struct btree_trans *trans) } while (trans_trigger_run); trans_for_each_update(trans, i) { - BUG_ON(!i->iter->should_be_locked); + BUG_ON(!i->path->should_be_locked); - if (unlikely(!bch2_btree_iter_upgrade(trans, i->iter, - i->level + 1))) { + if (unlikely(!bch2_btree_path_upgrade(trans, i->path, i->level + 1))) { trace_trans_restart_upgrade(trans->ip, _RET_IP_, - i->btree_id, &i->iter->pos); - trans->restarted = true; - ret = -EINTR; + i->btree_id, &i->path->pos); + ret = btree_trans_restart(trans); goto out; } - BUG_ON(!btree_node_intent_locked(i->iter, i->level)); + BUG_ON(!btree_node_intent_locked(i->path, i->level)); u64s = jset_u64s(i->k->k.u64s); if (i->cached && @@ -828,6 +832,9 @@ out: if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW))) percpu_ref_put(&trans->c->writes); out_reset: + trans_for_each_update(trans, i) + bch2_path_put(trans, i->path, true); + trans->extra_journal_res = 0; trans->nr_updates = 0; trans->hooks = NULL; @@ -869,11 +876,11 @@ static noinline int extent_front_merge(struct btree_trans *trans, bkey_reassemble(update, k); if (bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert))) { - struct btree_iter *update_iter = - bch2_trans_copy_iter(trans, iter); + struct btree_iter update_iter; - ret = bch2_btree_delete_at(trans, update_iter, flags); - bch2_trans_iter_put(trans, update_iter); + bch2_trans_copy_iter(&update_iter, iter); + ret = bch2_btree_delete_at(trans, &update_iter, flags); + bch2_trans_iter_exit(trans, &update_iter); if (ret) return ret; @@ -890,18 +897,18 @@ static int bch2_trans_update_extent(struct btree_trans *trans, enum btree_update_flags flags) { struct bch_fs *c = trans->c; - struct btree_iter *iter, *update_iter; + struct btree_iter iter, update_iter; struct bpos start = bkey_start_pos(&insert->k); struct bkey_i *update; struct bkey_s_c k; enum btree_id btree_id = orig_iter->btree_id; int ret = 0, compressed_sectors; - iter = bch2_trans_get_iter(trans, btree_id, start, - BTREE_ITER_INTENT| - BTREE_ITER_WITH_UPDATES| - BTREE_ITER_NOT_EXTENTS); - k = bch2_btree_iter_peek(iter); + bch2_trans_iter_init(trans, &iter, btree_id, start, + BTREE_ITER_INTENT| + BTREE_ITER_WITH_UPDATES| + BTREE_ITER_NOT_EXTENTS); + k = bch2_btree_iter_peek(&iter); if ((ret = bkey_err(k))) goto err; if (!k.k) @@ -909,7 +916,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans, if (!bkey_cmp(k.k->p, bkey_start_pos(&insert->k))) { if (bch2_bkey_maybe_mergable(k.k, &insert->k)) { - ret = extent_front_merge(trans, iter, k, &insert, flags); + ret = extent_front_merge(trans, &iter, k, &insert, flags); if (ret) goto out; } @@ -940,23 +947,22 @@ static int bch2_trans_update_extent(struct btree_trans *trans, bch2_cut_back(start, update); - update_iter = bch2_trans_get_iter(trans, btree_id, update->k.p, - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(update_iter) ?: - bch2_trans_update(trans, update_iter, update, + bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p, + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&update_iter) ?: + bch2_trans_update(trans, &update_iter, update, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| flags); - bch2_trans_iter_put(trans, update_iter); + bch2_trans_iter_exit(trans, &update_iter); if (ret) goto err; } if (bkey_cmp(k.k->p, insert->k.p) <= 0) { - update_iter = bch2_trans_copy_iter(trans, iter); - ret = bch2_btree_delete_at(trans, update_iter, - flags); - bch2_trans_iter_put(trans, update_iter); + bch2_trans_copy_iter(&update_iter, &iter); + ret = bch2_btree_delete_at(trans, &update_iter, flags); + bch2_trans_iter_exit(trans, &update_iter); if (ret) goto err; @@ -970,13 +976,13 @@ static int bch2_trans_update_extent(struct btree_trans *trans, bkey_reassemble(update, k); bch2_cut_front(insert->k.p, update); - update_iter = bch2_trans_copy_iter(trans, iter); - bch2_trans_update(trans, update_iter, update, flags); - bch2_trans_iter_put(trans, update_iter); + bch2_trans_copy_iter(&update_iter, &iter); + bch2_trans_update(trans, &update_iter, update, flags); + bch2_trans_iter_exit(trans, &update_iter); goto out; } next: - k = bch2_btree_iter_next(iter); + k = bch2_btree_iter_next(&iter); if ((ret = bkey_err(k))) goto err; if (!k.k) @@ -987,14 +993,12 @@ next: bch2_bkey_merge(c, bkey_i_to_s(insert), k); out: if (!bkey_deleted(&insert->k)) { - bch2_btree_iter_set_pos(iter, insert->k.p); - ret = bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(trans, iter, insert, flags); - } else { - set_btree_iter_dontneed(trans, iter); + bch2_btree_iter_set_pos(&iter, insert->k.p); + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(trans, &iter, insert, flags); } err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1002,31 +1006,34 @@ err: int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, struct bkey_i *k, enum btree_update_flags flags) { - struct btree_insert_entry *i, n = (struct btree_insert_entry) { + struct btree_insert_entry *i, n; + + BUG_ON(!iter->path->should_be_locked); + + if (iter->flags & BTREE_ITER_IS_EXTENTS) + return bch2_trans_update_extent(trans, iter, k, flags); + + BUG_ON(trans->nr_updates >= BTREE_ITER_MAX); + BUG_ON(bpos_cmp(k->k.p, iter->path->pos)); + + n = (struct btree_insert_entry) { .flags = flags, - .bkey_type = __btree_node_type(iter->level, iter->btree_id), + .bkey_type = __btree_node_type(iter->path->level, iter->btree_id), .btree_id = iter->btree_id, - .level = iter->level, - .cached = iter->cached, - .iter = iter, + .level = iter->path->level, + .cached = iter->flags & BTREE_ITER_CACHED, + .path = iter->path, .k = k, .ip_allocated = _RET_IP_, }; - BUG_ON(!iter->should_be_locked); - - if (iter->flags & BTREE_ITER_IS_EXTENTS) - return bch2_trans_update_extent(trans, iter, k, flags); + __btree_path_get(n.path, true); #ifdef CONFIG_BCACHEFS_DEBUG trans_for_each_update(trans, i) BUG_ON(i != trans->updates && btree_insert_entry_cmp(i - 1, i) >= 0); #endif - BUG_ON(trans->nr_updates >= BTREE_ITER_MAX); - BUG_ON(bpos_cmp(n.k->k.p, n.iter->real_pos)); - - n.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; /* * Pending updates are kept sorted: first, find position of new update, @@ -1048,7 +1055,10 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, if (n.cached && !i->cached) { i->k = n.k; i->flags = n.flags; + + __btree_path_get(n.path, false); } else { + bch2_path_put(trans, i->path, true); *i = n; } } else @@ -1068,15 +1078,15 @@ void bch2_trans_commit_hook(struct btree_trans *trans, int __bch2_btree_insert(struct btree_trans *trans, enum btree_id id, struct bkey_i *k) { - struct btree_iter *iter; + struct btree_iter iter; int ret; - iter = bch2_trans_get_iter(trans, id, bkey_start_pos(&k->k), + bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k), BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(trans, iter, k, 0); - bch2_trans_iter_put(trans, iter); + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(trans, &iter, k, 0); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1114,16 +1124,16 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, struct bpos start, struct bpos end, u64 *journal_seq) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret = 0; - iter = bch2_trans_get_iter(trans, id, start, BTREE_ITER_INTENT); + bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT); retry: while ((bch2_trans_begin(trans), - (k = bch2_btree_iter_peek(iter)).k) && + (k = bch2_btree_iter_peek(&iter)).k) && !(ret = bkey_err(k)) && - bkey_cmp(iter->pos, end) < 0) { + bkey_cmp(iter.pos, end) < 0) { struct bkey_i delete; bkey_init(&delete.k); @@ -1142,9 +1152,9 @@ retry: * (bch2_btree_iter_peek() does guarantee that iter.pos >= * bkey_start_pos(k.k)). */ - delete.k.p = iter->pos; + delete.k.p = iter.pos; - if (btree_node_type_is_extents(iter->btree_id)) { + if (btree_node_type_is_extents(id)) { unsigned max_sectors = KEY_SIZE_MAX & (~0 << trans->c->block_bits); @@ -1152,12 +1162,12 @@ retry: bch2_key_resize(&delete.k, max_sectors); bch2_cut_back(end, &delete); - ret = bch2_extent_trim_atomic(trans, iter, &delete); + ret = bch2_extent_trim_atomic(trans, &iter, &delete); if (ret) break; } - ret = bch2_trans_update(trans, iter, &delete, 0) ?: + ret = bch2_trans_update(trans, &iter, &delete, 0) ?: bch2_trans_commit(trans, NULL, journal_seq, BTREE_INSERT_NOFAIL); if (ret) @@ -1171,7 +1181,7 @@ retry: goto retry; } - bch2_trans_iter_free(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index a1d4a25bc42c..6831c002961d 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1222,38 +1222,23 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c new, unsigned flags) return ret; } -int bch2_mark_update(struct btree_trans *trans, struct btree_iter *iter, +int bch2_mark_update(struct btree_trans *trans, struct btree_path *path, struct bkey_i *new, unsigned flags) { struct bch_fs *c = trans->c; struct bkey _deleted = KEY(0, 0, 0); struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; struct bkey_s_c old; - int iter_flags, ret; + struct bkey unpacked; + int ret; if (unlikely(flags & BTREE_TRIGGER_NORUN)) return 0; - if (!btree_node_type_needs_gc(iter->btree_id)) + if (!btree_node_type_needs_gc(path->btree_id)) return 0; - if (likely(!(iter->flags & BTREE_ITER_CACHED_NOFILL))) { - iter_flags = iter->flags & BTREE_ITER_WITH_UPDATES; - iter->flags &= ~BTREE_ITER_WITH_UPDATES; - - old = bch2_btree_iter_peek_slot(iter); - iter->flags |= iter_flags; - - ret = bkey_err(old); - if (ret) - return ret; - } else { - /* - * If BTREE_ITER_CACHED_NOFILL was used, we better not be - * running triggers that do anything on removal (alloc btree): - */ - old = deleted; - } + old = bch2_btree_path_peek_slot(path, &unpacked); if (old.k->type == new->k.type && ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { @@ -1291,22 +1276,13 @@ void fs_usage_apply_warn(struct btree_trans *trans, pr_err("overlapping with"); if (!i->cached) { - struct btree_iter *copy = bch2_trans_copy_iter(trans, i->iter); - struct bkey_s_c k; - int ret; - - for_each_btree_key_continue(copy, 0, k, ret) { - if (btree_node_type_is_extents(i->iter->btree_id) - ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0 - : bkey_cmp(i->k->k.p, k.k->p)) - break; + struct bkey u; + struct bkey_s_c k = bch2_btree_path_peek_slot(i->path, &u); - bch2_bkey_val_to_text(&PBUF(buf), c, k); - pr_err("%s", buf); - } - bch2_trans_iter_put(trans, copy); + bch2_bkey_val_to_text(&PBUF(buf), c, k); + pr_err("%s", buf); } else { - struct bkey_cached *ck = (void *) i->iter->l[0].b; + struct bkey_cached *ck = (void *) i->path->l[0].b; if (ck->valid) { bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(ck->k)); @@ -1385,31 +1361,8 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans, /* trans_mark: */ -static struct btree_iter *trans_get_update(struct btree_trans *trans, - enum btree_id btree_id, struct bpos pos, - struct bkey_s_c *k) -{ - struct btree_insert_entry *i; - - trans_for_each_update(trans, i) - if (i->iter->btree_id == btree_id && - (btree_node_type_is_extents(btree_id) - ? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 && - bkey_cmp(pos, i->k->k.p) < 0 - : !bkey_cmp(pos, i->iter->pos))) { - *k = bkey_i_to_s_c(i->k); - - /* ugly hack.. */ - BUG_ON(btree_iter_live(trans, i->iter)); - trans->iters_live |= 1ULL << i->iter->idx; - return i->iter; - } - - return NULL; -} - static struct bkey_alloc_buf * -bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_iter, +bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter, const struct bch_extent_ptr *ptr, struct bkey_alloc_unpacked *u) { @@ -1417,36 +1370,34 @@ bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_it struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); struct bpos pos = POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); struct bucket *g; - struct btree_iter *iter; - struct bkey_s_c k; struct bkey_alloc_buf *a; + struct bkey_i *update; int ret; a = bch2_trans_kmalloc(trans, sizeof(struct bkey_alloc_buf)); if (IS_ERR(a)) return a; - iter = trans_get_update(trans, BTREE_ID_alloc, pos, &k); - if (iter) { - *u = bch2_alloc_unpack(k); - } else { - iter = bch2_trans_get_iter(trans, BTREE_ID_alloc, pos, - BTREE_ITER_CACHED| - BTREE_ITER_CACHED_NOFILL| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(iter); - if (ret) { - bch2_trans_iter_put(trans, iter); - return ERR_PTR(ret); - } + bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos, + BTREE_ITER_CACHED| + BTREE_ITER_CACHED_NOFILL| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(iter); + if (ret) { + bch2_trans_iter_exit(trans, iter); + return ERR_PTR(ret); + } + update = __bch2_btree_trans_peek_updates(iter); + if (update && !bpos_cmp(update->k.p, pos)) { + *u = bch2_alloc_unpack(bkey_i_to_s_c(update)); + } else { percpu_down_read(&c->mark_lock); g = bucket(ca, pos.offset); *u = alloc_mem_to_key(iter, g, READ_ONCE(g->mark)); percpu_up_read(&c->mark_lock); } - *_iter = iter; return a; } @@ -1455,7 +1406,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, s64 sectors, enum bch_data_type data_type) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_alloc_unpacked u; struct bkey_alloc_buf *a; int ret; @@ -1470,9 +1421,9 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, goto out; bch2_alloc_pack(c, a, u); - bch2_trans_update(trans, iter, &a->k, 0); + bch2_trans_update(trans, &iter, &a->k, 0); out: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1481,16 +1432,16 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, s64 sectors, enum bch_data_type data_type) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_i_stripe *s; struct bch_replicas_padded r; int ret = 0; - iter = bch2_trans_get_iter(trans, BTREE_ID_stripes, POS(0, p.ec.idx), - BTREE_ITER_INTENT| - BTREE_ITER_WITH_UPDATES); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_stripes, POS(0, p.ec.idx), + BTREE_ITER_INTENT| + BTREE_ITER_WITH_UPDATES); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; @@ -1521,13 +1472,13 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, stripe_blockcount_set(&s->v, p.ec.block, stripe_blockcount_get(&s->v, p.ec.block) + sectors); - bch2_trans_update(trans, iter, &s->k_i, 0); + bch2_trans_update(trans, &iter, &s->k_i, 0); bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i)); r.e.data_type = data_type; update_replicas_list(trans, &r.e, sectors); err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1599,7 +1550,7 @@ static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans, struct bch_fs *c = trans->c; const struct bch_extent_ptr *ptr = &s.v->ptrs[idx]; struct bkey_alloc_buf *a; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_alloc_unpacked u; bool parity = idx >= s.v->nr_blocks - s.v->nr_redundant; int ret = 0; @@ -1623,7 +1574,7 @@ static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans, if (!deleting) { if (bch2_fs_inconsistent_on(u.stripe && u.stripe != s.k->p.offset, c, "bucket %llu:%llu gen %u: multiple stripes using same bucket (%u, %llu)", - iter->pos.inode, iter->pos.offset, u.gen, + iter.pos.inode, iter.pos.offset, u.gen, u.stripe, s.k->p.offset)) { ret = -EIO; goto err; @@ -1637,9 +1588,9 @@ static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans, } bch2_alloc_pack(c, a, u); - bch2_trans_update(trans, iter, &a->k, 0); + bch2_trans_update(trans, &iter, &a->k, 0); err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1744,17 +1695,17 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, u64 idx, unsigned flags) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_i *n; __le64 *refcount; int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; s64 ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_reflink, POS(0, idx), - BTREE_ITER_INTENT| - BTREE_ITER_WITH_UPDATES); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink, POS(0, idx), + BTREE_ITER_INTENT| + BTREE_ITER_WITH_UPDATES); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; @@ -1784,14 +1735,14 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, set_bkey_val_u64s(&n->k, 0); } - bch2_btree_iter_set_pos_to_extent_start(iter); - ret = bch2_trans_update(trans, iter, n, 0); + bch2_btree_iter_set_pos_to_extent_start(&iter); + ret = bch2_trans_update(trans, &iter, n, 0); if (ret) goto err; ret = k.k->p.offset - idx; err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1843,39 +1794,23 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old, } int bch2_trans_mark_update(struct btree_trans *trans, - struct btree_iter *iter, + struct btree_path *path, struct bkey_i *new, unsigned flags) { struct bkey _deleted = KEY(0, 0, 0); struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; struct bkey_s_c old; - int iter_flags, ret; + struct bkey unpacked; + int ret; if (unlikely(flags & BTREE_TRIGGER_NORUN)) return 0; - if (!btree_node_type_needs_gc(iter->btree_id)) + if (!btree_node_type_needs_gc(path->btree_id)) return 0; - - if (likely(!(iter->flags & BTREE_ITER_CACHED_NOFILL))) { - iter_flags = iter->flags & BTREE_ITER_WITH_UPDATES; - iter->flags &= ~BTREE_ITER_WITH_UPDATES; - - old = bch2_btree_iter_peek_slot(iter); - iter->flags |= iter_flags; - - ret = bkey_err(old); - if (ret) - return ret; - } else { - /* - * If BTREE_ITER_CACHED_NOFILL was used, we better not be - * running triggers that do anything on removal (alloc btree): - */ - old = deleted; - } + old = bch2_btree_path_peek_slot(path, &unpacked); if (old.k->type == new->k.type && ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { @@ -1897,7 +1832,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, unsigned sectors) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_alloc_unpacked u; struct bkey_alloc_buf *a; struct bch_extent_ptr ptr = { @@ -1920,7 +1855,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", - iter->pos.inode, iter->pos.offset, u.gen, + iter.pos.inode, iter.pos.offset, u.gen, bch2_data_types[u.data_type], bch2_data_types[type], bch2_data_types[type]); @@ -1932,9 +1867,9 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, u.dirty_sectors = sectors; bch2_alloc_pack(c, a, u); - bch2_trans_update(trans, iter, &a->k, 0); + bch2_trans_update(trans, &iter, &a->k, 0); out: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 3fb91ef60685..4687fba2eed6 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -228,13 +228,13 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, int bch2_mark_key(struct bch_fs *, struct bkey_s_c, unsigned); -int bch2_mark_update(struct btree_trans *, struct btree_iter *, +int bch2_mark_update(struct btree_trans *, struct btree_path *, struct bkey_i *, unsigned); int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); -int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter, - struct bkey_i *insert, unsigned); +int bch2_trans_mark_update(struct btree_trans *, struct btree_path *, + struct bkey_i *, unsigned); void bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *, diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 6a28de30ea3b..5ffb7f0a3bf6 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -243,7 +243,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, { struct dump_iter *i = file->private_data; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int err; @@ -260,10 +260,10 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, bch2_trans_init(&trans, i->c, 0, 0); - iter = bch2_trans_get_iter(&trans, i->id, i->from, - BTREE_ITER_PREFETCH| - BTREE_ITER_ALL_SNAPSHOTS); - k = bch2_btree_iter_peek(iter); + bch2_trans_iter_init(&trans, &iter, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS); + k = bch2_btree_iter_peek(&iter); while (k.k && !(err = bkey_err(k))) { bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k); @@ -272,8 +272,8 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, i->buf[i->bytes] = '\n'; i->bytes++; - k = bch2_btree_iter_next(iter); - i->from = iter->pos; + k = bch2_btree_iter_next(&iter); + i->from = iter.pos; err = flush_buf(i); if (err) @@ -282,7 +282,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, if (!i->size) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); @@ -301,7 +301,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, { struct dump_iter *i = file->private_data; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct btree *b; int err; @@ -336,7 +336,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, if (!i->size) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); @@ -355,7 +355,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, { struct dump_iter *i = file->private_data; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct btree *prev_node = NULL; int err; @@ -373,11 +373,11 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, bch2_trans_init(&trans, i->c, 0, 0); - iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH); + bch2_trans_iter_init(&trans, &iter, i->id, i->from, BTREE_ITER_PREFETCH); - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(&iter)).k && !(err = bkey_err(k))) { - struct btree_iter_level *l = &iter->l[0]; + struct btree_path_level *l = &iter.path->l[0]; struct bkey_packed *_k = bch2_btree_node_iter_peek(&l->iter, l->b); @@ -396,8 +396,8 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, if (err) break; - bch2_btree_iter_advance(iter); - i->from = iter->pos; + bch2_btree_iter_advance(&iter); + i->from = iter.pos; err = flush_buf(i); if (err) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 02b29681f695..1d510f7728b6 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -183,7 +183,8 @@ int bch2_dirent_rename(struct btree_trans *trans, const struct qstr *dst_name, u64 *dst_inum, u64 *dst_offset, enum bch_rename_mode mode) { - struct btree_iter *src_iter = NULL, *dst_iter = NULL; + struct btree_iter src_iter = { NULL }; + struct btree_iter dst_iter = { NULL }; struct bkey_s_c old_src, old_dst; struct bkey_i_dirent *new_src = NULL, *new_dst = NULL; struct bpos dst_pos = @@ -199,17 +200,16 @@ int bch2_dirent_rename(struct btree_trans *trans, * the target already exists - we're relying on the VFS * to do that check for us for correctness: */ - dst_iter = mode == BCH_RENAME - ? bch2_hash_hole(trans, bch2_dirent_hash_desc, + ret = mode == BCH_RENAME + ? bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc, dst_hash, dst_dir, dst_name) - : bch2_hash_lookup(trans, bch2_dirent_hash_desc, + : bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc, dst_hash, dst_dir, dst_name, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(dst_iter); if (ret) goto out; - old_dst = bch2_btree_iter_peek_slot(dst_iter); + old_dst = bch2_btree_iter_peek_slot(&dst_iter); ret = bkey_err(old_dst); if (ret) goto out; @@ -217,17 +217,16 @@ int bch2_dirent_rename(struct btree_trans *trans, if (mode != BCH_RENAME) *dst_inum = le64_to_cpu(bkey_s_c_to_dirent(old_dst).v->d_inum); if (mode != BCH_RENAME_EXCHANGE) - *src_offset = dst_iter->pos.offset; + *src_offset = dst_iter.pos.offset; /* Lookup src: */ - src_iter = bch2_hash_lookup(trans, bch2_dirent_hash_desc, - src_hash, src_dir, src_name, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(src_iter); + ret = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc, + src_hash, src_dir, src_name, + BTREE_ITER_INTENT); if (ret) goto out; - old_src = bch2_btree_iter_peek_slot(src_iter); + old_src = bch2_btree_iter_peek_slot(&src_iter); ret = bkey_err(old_src); if (ret) goto out; @@ -241,7 +240,7 @@ int bch2_dirent_rename(struct btree_trans *trans, goto out; dirent_copy_target(new_dst, bkey_s_c_to_dirent(old_src)); - new_dst->k.p = dst_iter->pos; + new_dst->k.p = dst_iter.pos; /* Create new src key: */ if (mode == BCH_RENAME_EXCHANGE) { @@ -251,7 +250,7 @@ int bch2_dirent_rename(struct btree_trans *trans, goto out; dirent_copy_target(new_src, bkey_s_c_to_dirent(old_dst)); - new_src->k.p = src_iter->pos; + new_src->k.p = src_iter.pos; } else { new_src = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); ret = PTR_ERR_OR_ZERO(new_src); @@ -259,10 +258,10 @@ int bch2_dirent_rename(struct btree_trans *trans, goto out; bkey_init(&new_src->k); - new_src->k.p = src_iter->pos; + new_src->k.p = src_iter.pos; - if (bkey_cmp(dst_pos, src_iter->pos) <= 0 && - bkey_cmp(src_iter->pos, dst_iter->pos) < 0) { + if (bkey_cmp(dst_pos, src_iter.pos) <= 0 && + bkey_cmp(src_iter.pos, dst_iter.pos) < 0) { /* * We have a hash collision for the new dst key, * and new_src - the key we're deleting - is between @@ -275,8 +274,8 @@ int bch2_dirent_rename(struct btree_trans *trans, * If we're not overwriting, we can just insert * new_dst at the src position: */ - new_dst->k.p = src_iter->pos; - bch2_trans_update(trans, src_iter, + new_dst->k.p = src_iter.pos; + bch2_trans_update(trans, &src_iter, &new_dst->k_i, 0); goto out_set_offset; } else { @@ -290,7 +289,7 @@ int bch2_dirent_rename(struct btree_trans *trans, } else { /* Check if we need a whiteout to delete src: */ ret = bch2_hash_needs_whiteout(trans, bch2_dirent_hash_desc, - src_hash, src_iter); + src_hash, &src_iter); if (ret < 0) goto out; @@ -299,15 +298,15 @@ int bch2_dirent_rename(struct btree_trans *trans, } } - bch2_trans_update(trans, src_iter, &new_src->k_i, 0); - bch2_trans_update(trans, dst_iter, &new_dst->k_i, 0); + bch2_trans_update(trans, &src_iter, &new_src->k_i, 0); + bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0); out_set_offset: if (mode == BCH_RENAME_EXCHANGE) *src_offset = new_src->k.p.offset; *dst_offset = new_dst->k.p.offset; out: - bch2_trans_iter_put(trans, src_iter); - bch2_trans_iter_put(trans, dst_iter); + bch2_trans_iter_exit(trans, &src_iter); + bch2_trans_iter_exit(trans, &dst_iter); return ret; } @@ -319,12 +318,13 @@ int bch2_dirent_delete_at(struct btree_trans *trans, hash_info, iter); } -struct btree_iter * -__bch2_dirent_lookup_trans(struct btree_trans *trans, u64 dir_inum, - const struct bch_hash_info *hash_info, - const struct qstr *name, unsigned flags) +int __bch2_dirent_lookup_trans(struct btree_trans *trans, + struct btree_iter *iter, + u64 dir_inum, + const struct bch_hash_info *hash_info, + const struct qstr *name, unsigned flags) { - return bch2_hash_lookup(trans, bch2_dirent_hash_desc, + return bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc, hash_info, dir_inum, name, flags); } @@ -333,26 +333,25 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum, const struct qstr *name) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; u64 inum = 0; int ret = 0; bch2_trans_init(&trans, c, 0, 0); - iter = __bch2_dirent_lookup_trans(&trans, dir_inum, - hash_info, name, 0); - ret = PTR_ERR_OR_ZERO(iter); + ret = __bch2_dirent_lookup_trans(&trans, &iter, dir_inum, + hash_info, name, 0); if (ret) goto out; - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto out; inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); out: BUG_ON(ret == -EINTR); bch2_trans_exit(&trans); @@ -361,7 +360,7 @@ out: int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; @@ -375,7 +374,7 @@ int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum) break; } } - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -383,7 +382,7 @@ int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum) int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_dirent dirent; int ret; @@ -412,7 +411,7 @@ int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx) break; ctx->pos = dirent.k->p.offset + 1; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index e1d8ce377d43..c14f6029e1c9 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -50,8 +50,7 @@ int bch2_dirent_rename(struct btree_trans *, const struct qstr *, u64 *, u64 *, enum bch_rename_mode); -struct btree_iter * -__bch2_dirent_lookup_trans(struct btree_trans *, u64, +int __bch2_dirent_lookup_trans(struct btree_trans *, struct btree_iter *, u64, const struct bch_hash_info *, const struct qstr *, unsigned); u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *, diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 7ad74987757f..2c538f9b54f8 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -429,13 +429,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_stripes, POS(0, idx), BTREE_ITER_SLOTS); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_stripes, + POS(0, idx), BTREE_ITER_SLOTS); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; @@ -445,6 +446,7 @@ static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *strip } bkey_reassemble(&stripe->key.k_i, k); err: + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } @@ -704,7 +706,7 @@ static int ec_stripe_bkey_insert(struct bch_fs *c, struct disk_reservation *res) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bpos min_pos = POS(0, 1); struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint)); @@ -719,7 +721,7 @@ retry: if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) { if (start_pos.offset) { start_pos = min_pos; - bch2_btree_iter_set_pos(iter, start_pos); + bch2_btree_iter_set_pos(&iter, start_pos); continue; } @@ -733,19 +735,19 @@ retry: goto err; found_slot: - start_pos = iter->pos; + start_pos = iter.pos; - ret = ec_stripe_mem_alloc(&trans, iter); + ret = ec_stripe_mem_alloc(&trans, &iter); if (ret) goto err; - stripe->k.p = iter->pos; + stripe->k.p = iter.pos; - ret = bch2_trans_update(&trans, iter, &stripe->k_i, 0) ?: + ret = bch2_trans_update(&trans, &iter, &stripe->k_i, 0) ?: bch2_trans_commit(&trans, res, NULL, BTREE_INSERT_NOFAIL); err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); if (ret == -EINTR) goto retry; @@ -759,15 +761,15 @@ err: static int ec_stripe_bkey_update(struct btree_trans *trans, struct bkey_i_stripe *new) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; const struct bch_stripe *existing; unsigned i; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_stripes, - new->k.p, BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_stripes, + new->k.p, BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; @@ -790,9 +792,9 @@ static int ec_stripe_bkey_update(struct btree_trans *trans, stripe_blockcount_set(&new->v, i, stripe_blockcount_get(existing, i)); - ret = bch2_trans_update(trans, iter, &new->k_i, 0); + ret = bch2_trans_update(trans, &iter, &new->k_i, 0); err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -820,7 +822,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, struct bkey *pos) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_s_extent e; struct bkey_buf sk; @@ -832,23 +834,23 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, /* XXX this doesn't support the reflink btree */ - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, - bkey_start_pos(pos), - BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + bkey_start_pos(pos), + BTREE_ITER_INTENT); - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(&iter)).k && !(ret = bkey_err(k)) && bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) { struct bch_extent_ptr *ptr, *ec_ptr = NULL; if (extent_has_stripe_ptr(k, s->key.k.p.offset)) { - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); continue; } block = bkey_matches_stripe(&s->key.v, k); if (block < 0) { - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); continue; } @@ -863,21 +865,21 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, extent_stripe_ptr_add(e, s, ec_ptr, block); - bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k)); + bch2_btree_iter_set_pos(&iter, bkey_start_pos(&sk.k->k)); next_pos = sk.k->k.p; - ret = bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(&trans, iter, sk.k, 0) ?: + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(&trans, &iter, sk.k, 0) ?: bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); if (!ret) - bch2_btree_iter_set_pos(iter, next_pos); + bch2_btree_iter_set_pos(&iter, next_pos); if (ret == -EINTR) ret = 0; if (ret) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); @@ -1598,7 +1600,7 @@ write: int bch2_stripes_write(struct bch_fs *c, unsigned flags) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct genradix_iter giter; struct bkey_i_stripe *new_key; struct stripe *m; @@ -1609,8 +1611,8 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags) bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_stripes, POS_MIN, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_stripes, POS_MIN, + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); genradix_for_each(&c->stripes[0], giter, m) { if (!m->alive) @@ -1618,13 +1620,13 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags) ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL|flags, - __bch2_stripe_write_key(&trans, iter, m, + __bch2_stripe_write_key(&trans, &iter, m, giter.pos, new_key)); if (ret) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); @@ -1659,19 +1661,19 @@ int bch2_stripes_read(struct bch_fs *c) int bch2_ec_mem_alloc(struct bch_fs *c, bool gc) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; size_t i, idx = 0; int ret = 0; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_stripes, POS(0, U64_MAX), 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_stripes, POS(0, U64_MAX), 0); - k = bch2_btree_iter_prev(iter); + k = bch2_btree_iter_prev(&iter); if (!IS_ERR_OR_NULL(k.k)) idx = k.k->p.offset + 1; - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans); if (ret) return ret; diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index 93d55f46233f..9d959b053def 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -58,7 +58,7 @@ static int count_iters_for_insert(struct btree_trans *trans, u64 idx = le64_to_cpu(p.v->idx); unsigned sectors = bpos_min(*end, p.k->p).offset - bkey_start_offset(p.k); - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c r_k; for_each_btree_key(trans, iter, @@ -83,8 +83,8 @@ static int count_iters_for_insert(struct btree_trans *trans, break; } } + bch2_trans_iter_exit(trans, &iter); - bch2_trans_iter_put(trans, iter); break; } } @@ -99,7 +99,7 @@ int bch2_extent_atomic_end(struct btree_trans *trans, struct bkey_i *insert, struct bpos *end) { - struct btree_iter *copy; + struct btree_iter copy; struct bkey_s_c k; unsigned nr_iters = 0; int ret; @@ -118,7 +118,7 @@ int bch2_extent_atomic_end(struct btree_trans *trans, if (ret < 0) return ret; - copy = bch2_trans_copy_iter(trans, iter); + bch2_trans_copy_iter(©, iter); for_each_btree_key_continue(copy, 0, k, ret) { unsigned offset = 0; @@ -149,7 +149,7 @@ int bch2_extent_atomic_end(struct btree_trans *trans, break; } - bch2_trans_iter_put(trans, copy); + bch2_trans_iter_exit(trans, ©); return ret < 0 ? ret : 0; } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 6524703f3da4..0190605711e5 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -615,7 +615,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, unsigned nr_replicas, bool compressed) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bpos end = pos; struct bkey_s_c k; bool ret = true; @@ -636,7 +636,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, break; } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index 2189a11ccad8..a6617455ea12 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -19,16 +19,15 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, struct posix_acl *acl) { struct bch_fs *c = trans->c; - struct btree_iter *dir_iter = NULL; - struct btree_iter *inode_iter = NULL; + struct btree_iter dir_iter = { NULL }; + struct btree_iter inode_iter = { NULL }; struct bch_hash_info hash = bch2_hash_info_init(c, new_inode); u64 now = bch2_current_time(c); u64 cpu = raw_smp_processor_id(); u64 dir_offset = 0; int ret; - dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(dir_iter); + ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, BTREE_ITER_INTENT); if (ret) goto err; @@ -37,8 +36,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, if (!name) new_inode->bi_flags |= BCH_INODE_UNLINKED; - inode_iter = bch2_inode_create(trans, new_inode, U32_MAX, cpu); - ret = PTR_ERR_OR_ZERO(inode_iter); + ret = bch2_inode_create(trans, &inode_iter, new_inode, U32_MAX, cpu); if (ret) goto err; @@ -63,7 +61,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, if (S_ISDIR(new_inode->bi_mode)) dir_u->bi_nlink++; - ret = bch2_inode_write(trans, dir_iter, dir_u); + ret = bch2_inode_write(trans, &dir_iter, dir_u); if (ret) goto err; @@ -82,14 +80,14 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, } /* XXX use bch2_btree_iter_set_snapshot() */ - inode_iter->snapshot = U32_MAX; - bch2_btree_iter_set_pos(inode_iter, SPOS(0, new_inode->bi_inum, U32_MAX)); + inode_iter.snapshot = U32_MAX; + bch2_btree_iter_set_pos(&inode_iter, SPOS(0, new_inode->bi_inum, U32_MAX)); - ret = bch2_btree_iter_traverse(inode_iter) ?: - bch2_inode_write(trans, inode_iter, new_inode); + ret = bch2_btree_iter_traverse(&inode_iter) ?: + bch2_inode_write(trans, &inode_iter, new_inode); err: - bch2_trans_iter_put(trans, inode_iter); - bch2_trans_iter_put(trans, dir_iter); + bch2_trans_iter_exit(trans, &inode_iter); + bch2_trans_iter_exit(trans, &dir_iter); return ret; } @@ -98,22 +96,21 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum, struct bch_inode_unpacked *inode_u, const struct qstr *name) { struct bch_fs *c = trans->c; - struct btree_iter *dir_iter = NULL, *inode_iter = NULL; + struct btree_iter dir_iter = { NULL }; + struct btree_iter inode_iter = { NULL }; struct bch_hash_info dir_hash; u64 now = bch2_current_time(c); u64 dir_offset = 0; int ret; - inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(inode_iter); + ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT); if (ret) goto err; inode_u->bi_ctime = now; bch2_inode_nlink_inc(inode_u); - dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, 0); - ret = PTR_ERR_OR_ZERO(dir_iter); + ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, 0); if (ret) goto err; @@ -133,11 +130,11 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum, inode_u->bi_dir_offset = dir_offset; } - ret = bch2_inode_write(trans, dir_iter, dir_u) ?: - bch2_inode_write(trans, inode_iter, inode_u); + ret = bch2_inode_write(trans, &dir_iter, dir_u) ?: + bch2_inode_write(trans, &inode_iter, inode_u); err: - bch2_trans_iter_put(trans, dir_iter); - bch2_trans_iter_put(trans, inode_iter); + bch2_trans_iter_exit(trans, &dir_iter); + bch2_trans_iter_exit(trans, &inode_iter); return ret; } @@ -147,35 +144,33 @@ int bch2_unlink_trans(struct btree_trans *trans, const struct qstr *name) { struct bch_fs *c = trans->c; - struct btree_iter *dir_iter = NULL, *dirent_iter = NULL, - *inode_iter = NULL; + struct btree_iter dir_iter = { NULL }; + struct btree_iter dirent_iter = { NULL }; + struct btree_iter inode_iter = { NULL }; struct bch_hash_info dir_hash; u64 inum, now = bch2_current_time(c); struct bkey_s_c k; int ret; - dir_iter = bch2_inode_peek(trans, dir_u, dir_inum, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(dir_iter); + ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, BTREE_ITER_INTENT); if (ret) goto err; dir_hash = bch2_hash_info_init(c, dir_u); - dirent_iter = __bch2_dirent_lookup_trans(trans, dir_inum, &dir_hash, - name, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(dirent_iter); + ret = __bch2_dirent_lookup_trans(trans, &dirent_iter, dir_inum, &dir_hash, + name, BTREE_ITER_INTENT); if (ret) goto err; - k = bch2_btree_iter_peek_slot(dirent_iter); + k = bch2_btree_iter_peek_slot(&dirent_iter); ret = bkey_err(k); if (ret) goto err; inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum); - inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(inode_iter); + ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT); if (ret) goto err; @@ -192,13 +187,13 @@ int bch2_unlink_trans(struct btree_trans *trans, ret = (S_ISDIR(inode_u->bi_mode) ? bch2_empty_dir_trans(trans, inum) : 0) ?: - bch2_dirent_delete_at(trans, &dir_hash, dirent_iter) ?: - bch2_inode_write(trans, dir_iter, dir_u) ?: - bch2_inode_write(trans, inode_iter, inode_u); + bch2_dirent_delete_at(trans, &dir_hash, &dirent_iter) ?: + bch2_inode_write(trans, &dir_iter, dir_u) ?: + bch2_inode_write(trans, &inode_iter, inode_u); err: - bch2_trans_iter_put(trans, inode_iter); - bch2_trans_iter_put(trans, dirent_iter); - bch2_trans_iter_put(trans, dir_iter); + bch2_trans_iter_exit(trans, &inode_iter); + bch2_trans_iter_exit(trans, &dirent_iter); + bch2_trans_iter_exit(trans, &dir_iter); return ret; } @@ -236,25 +231,25 @@ int bch2_rename_trans(struct btree_trans *trans, enum bch_rename_mode mode) { struct bch_fs *c = trans->c; - struct btree_iter *src_dir_iter = NULL, *dst_dir_iter = NULL; - struct btree_iter *src_inode_iter = NULL, *dst_inode_iter = NULL; + struct btree_iter src_dir_iter = { NULL }; + struct btree_iter dst_dir_iter = { NULL }; + struct btree_iter src_inode_iter = { NULL }; + struct btree_iter dst_inode_iter = { NULL }; struct bch_hash_info src_hash, dst_hash; u64 src_inode, src_offset, dst_inode, dst_offset; u64 now = bch2_current_time(c); int ret; - src_dir_iter = bch2_inode_peek(trans, src_dir_u, src_dir, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(src_dir_iter); + ret = bch2_inode_peek(trans, &src_dir_iter, src_dir_u, src_dir, + BTREE_ITER_INTENT); if (ret) goto err; src_hash = bch2_hash_info_init(c, src_dir_u); if (dst_dir != src_dir) { - dst_dir_iter = bch2_inode_peek(trans, dst_dir_u, dst_dir, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(dst_dir_iter); + ret = bch2_inode_peek(trans, &dst_dir_iter, dst_dir_u, dst_dir, + BTREE_ITER_INTENT); if (ret) goto err; @@ -273,16 +268,14 @@ int bch2_rename_trans(struct btree_trans *trans, if (ret) goto err; - src_inode_iter = bch2_inode_peek(trans, src_inode_u, src_inode, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(src_inode_iter); + ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inode, + BTREE_ITER_INTENT); if (ret) goto err; if (dst_inode) { - dst_inode_iter = bch2_inode_peek(trans, dst_inode_u, dst_inode, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(dst_inode_iter); + ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inode, + BTREE_ITER_INTENT); if (ret) goto err; } @@ -357,18 +350,18 @@ int bch2_rename_trans(struct btree_trans *trans, if (dst_inode) dst_inode_u->bi_ctime = now; - ret = bch2_inode_write(trans, src_dir_iter, src_dir_u) ?: + ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?: (src_dir != dst_dir - ? bch2_inode_write(trans, dst_dir_iter, dst_dir_u) + ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u) : 0 ) ?: - bch2_inode_write(trans, src_inode_iter, src_inode_u) ?: + bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?: (dst_inode - ? bch2_inode_write(trans, dst_inode_iter, dst_inode_u) + ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u) : 0 ); err: - bch2_trans_iter_put(trans, dst_inode_iter); - bch2_trans_iter_put(trans, src_inode_iter); - bch2_trans_iter_put(trans, dst_dir_iter); - bch2_trans_iter_put(trans, src_dir_iter); + bch2_trans_iter_exit(trans, &dst_inode_iter); + bch2_trans_iter_exit(trans, &src_inode_iter); + bch2_trans_iter_exit(trans, &dst_dir_iter); + bch2_trans_iter_exit(trans, &src_dir_iter); return ret; } diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 251029c33164..909db2f104cd 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -867,7 +867,7 @@ void bch2_readahead(struct readahead_control *ractl) struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_io_opts opts = io_opts(c, &inode->ei_inode); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct page *page; struct readpages_iter readpages_iter; int ret; @@ -876,8 +876,8 @@ void bch2_readahead(struct readahead_control *ractl) BUG_ON(ret); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS_MIN, - BTREE_ITER_SLOTS); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS_MIN, + BTREE_ITER_SLOTS); bch2_pagecache_add_get(&inode->ei_pagecache_lock); @@ -898,13 +898,13 @@ void bch2_readahead(struct readahead_control *ractl) rbio->bio.bi_end_io = bch2_readpages_end_io; BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0)); - bchfs_read(&trans, iter, rbio, inode->v.i_ino, + bchfs_read(&trans, &iter, rbio, inode->v.i_ino, &readpages_iter); } bch2_pagecache_add_put(&inode->ei_pagecache_lock); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); kfree(readpages_iter.pages); } @@ -913,7 +913,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, u64 inum, struct page *page) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; bch2_page_state_create(page, __GFP_NOFAIL); @@ -923,12 +923,12 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, BUG_ON(!bio_add_page(&rbio->bio, page, PAGE_SIZE, 0)); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS_MIN, - BTREE_ITER_SLOTS); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS_MIN, + BTREE_ITER_SLOTS); - bchfs_read(&trans, iter, rbio, inum, NULL); + bchfs_read(&trans, &iter, rbio, inum, NULL); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); } @@ -2146,7 +2146,7 @@ static inline int range_has_data(struct bch_fs *c, struct bpos end) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret = 0; @@ -2161,7 +2161,7 @@ static inline int range_has_data(struct bch_fs *c, break; } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); return bch2_trans_exit(&trans) ?: ret; } @@ -2471,7 +2471,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, struct address_space *mapping = inode->v.i_mapping; struct bkey_buf copy; struct btree_trans trans; - struct btree_iter *src, *dst, *del; + struct btree_iter src, dst, del; loff_t shift, new_size; u64 src_start; int ret = 0; @@ -2536,11 +2536,11 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, bch2_bkey_buf_init(©); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - src = bch2_trans_get_iter(&trans, BTREE_ID_extents, + bch2_trans_iter_init(&trans, &src, BTREE_ID_extents, POS(inode->v.i_ino, src_start >> 9), BTREE_ITER_INTENT); - dst = bch2_trans_copy_iter(&trans, src); - del = bch2_trans_copy_iter(&trans, src); + bch2_trans_copy_iter(&dst, &src); + bch2_trans_copy_iter(&del, &src); while (ret == 0 || ret == -EINTR) { struct disk_reservation disk_res = @@ -2555,8 +2555,8 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, bch2_trans_begin(&trans); k = insert - ? bch2_btree_iter_peek_prev(src) - : bch2_btree_iter_peek(src); + ? bch2_btree_iter_peek_prev(&src) + : bch2_btree_iter_peek(&src); if ((ret = bkey_err(k))) continue; @@ -2574,9 +2574,9 @@ reassemble: bch2_cut_front(move_pos, copy.k); copy.k->k.p.offset += shift >> 9; - bch2_btree_iter_set_pos(dst, bkey_start_pos(©.k->k)); + bch2_btree_iter_set_pos(&dst, bkey_start_pos(©.k->k)); - ret = bch2_extent_atomic_end(&trans, dst, copy.k, &atomic_end); + ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end); if (ret) continue; @@ -2594,7 +2594,7 @@ reassemble: delete.k.p = copy.k->k.p; delete.k.size = copy.k->k.size; delete.k.p.offset -= shift >> 9; - bch2_btree_iter_set_pos(del, bkey_start_pos(&delete.k)); + bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k)); next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; @@ -2615,20 +2615,20 @@ reassemble: BUG_ON(ret); } - ret = bch2_btree_iter_traverse(del) ?: - bch2_trans_update(&trans, del, &delete, trigger_flags) ?: - bch2_trans_update(&trans, dst, copy.k, trigger_flags) ?: + ret = bch2_btree_iter_traverse(&del) ?: + bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: + bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: bch2_trans_commit(&trans, &disk_res, &inode->ei_journal_seq, BTREE_INSERT_NOFAIL); bch2_disk_reservation_put(c, &disk_res); if (!ret) - bch2_btree_iter_set_pos(src, next_pos); + bch2_btree_iter_set_pos(&src, next_pos); } - bch2_trans_iter_put(&trans, del); - bch2_trans_iter_put(&trans, dst); - bch2_trans_iter_put(&trans, src); + bch2_trans_iter_exit(&trans, &del); + bch2_trans_iter_exit(&trans, &dst); + bch2_trans_iter_exit(&trans, &src); bch2_trans_exit(&trans); bch2_bkey_buf_exit(©, c); @@ -2653,18 +2653,18 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bpos end_pos = POS(inode->v.i_ino, end_sector); unsigned replicas = io_opts(c, &inode->ei_inode).data_replicas; int ret = 0; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS(inode->v.i_ino, start_sector), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - while (!ret && bkey_cmp(iter->pos, end_pos) < 0) { + while (!ret && bkey_cmp(iter.pos, end_pos) < 0) { s64 i_sectors_delta = 0; struct disk_reservation disk_res = { 0 }; struct quota_res quota_res = { 0 }; @@ -2674,20 +2674,20 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, bch2_trans_begin(&trans); - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); if ((ret = bkey_err(k))) goto bkey_err; /* already reserved */ if (k.k->type == KEY_TYPE_reservation && bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) { - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); continue; } if (bkey_extent_is_data(k.k) && !(mode & FALLOC_FL_ZERO_RANGE)) { - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); continue; } @@ -2696,7 +2696,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, reservation.k.p = k.k->p; reservation.k.size = k.k->size; - bch2_cut_front(iter->pos, &reservation.k_i); + bch2_cut_front(iter.pos, &reservation.k_i); bch2_cut_back(end_pos, &reservation.k_i); sectors = reservation.k.size; @@ -2720,7 +2720,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, reservation.v.nr_replicas = disk_res.nr_replicas; } - ret = bch2_extent_update(&trans, iter, &reservation.k_i, + ret = bch2_extent_update(&trans, &iter, &reservation.k_i, &disk_res, &inode->ei_journal_seq, 0, &i_sectors_delta, true); i_sectors_acct(c, inode, "a_res, i_sectors_delta); @@ -2730,7 +2730,7 @@ bkey_err: if (ret == -EINTR) ret = 0; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } @@ -3010,7 +3010,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; u64 isize, next_data = MAX_LFS_FILESIZE; int ret; @@ -3031,7 +3031,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) } else if (k.k->p.offset >> 9 > isize) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; if (ret) @@ -3106,7 +3106,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; u64 isize, next_hole = MAX_LFS_FILESIZE; int ret; @@ -3135,7 +3135,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) offset = max(offset, bkey_start_offset(k.k) << 9); } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; if (ret) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index f6c058540712..570ae826ebb5 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -142,7 +142,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, void *p, unsigned fields) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter = { NULL }; struct bch_inode_unpacked inode_u; int ret; @@ -150,11 +150,10 @@ int __must_check bch2_write_inode(struct bch_fs *c, retry: bch2_trans_begin(&trans); - iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(iter) ?: + ret = bch2_inode_peek(&trans, &iter, &inode_u, inode->v.i_ino, + BTREE_ITER_INTENT) ?: (set ? set(inode, &inode_u, p) : 0) ?: - bch2_inode_write(&trans, iter, &inode_u) ?: + bch2_inode_write(&trans, &iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, BTREE_INSERT_NOFAIL); @@ -166,7 +165,7 @@ retry: if (!ret) bch2_inode_update_after_write(c, inode, &inode_u, fields); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); if (ret == -EINTR) goto retry; @@ -687,7 +686,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_qid qid; struct btree_trans trans; - struct btree_iter *inode_iter; + struct btree_iter inode_iter = { NULL }; struct bch_inode_unpacked inode_u; struct posix_acl *acl = NULL; int ret; @@ -713,9 +712,8 @@ retry: kfree(acl); acl = NULL; - inode_iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, - BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(inode_iter); + ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode->v.i_ino, + BTREE_ITER_INTENT); if (ret) goto btree_err; @@ -727,12 +725,12 @@ retry: goto btree_err; } - ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: + ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, BTREE_INSERT_NOFAIL); btree_err: - bch2_trans_iter_put(&trans, inode_iter); + bch2_trans_iter_exit(&trans, &inode_iter); if (ret == -EINTR) goto retry; @@ -883,7 +881,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct bch_fs *c = vinode->i_sb->s_fs_info; struct bch_inode_info *ei = to_bch_ei(vinode); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_buf cur, prev; struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); @@ -902,23 +900,23 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bch2_bkey_buf_init(&prev); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, - POS(ei->v.i_ino, start >> 9), 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + POS(ei->v.i_ino, start >> 9), 0); retry: bch2_trans_begin(&trans); - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(&iter)).k && !(ret = bkey_err(k)) && - bkey_cmp(iter->pos, end) < 0) { + bkey_cmp(iter.pos, end) < 0) { enum btree_id data_btree = BTREE_ID_extents; if (!bkey_extent_is_data(k.k) && k.k->type != KEY_TYPE_reservation) { - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); continue; } - offset_into_extent = iter->pos.offset - + offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; @@ -939,7 +937,7 @@ retry: offset_into_extent), cur.k); bch2_key_resize(&cur.k->k, sectors); - cur.k->k.p = iter->pos; + cur.k->k.p = iter.pos; cur.k->k.p.offset += cur.k->k.size; if (have_extent) { @@ -952,8 +950,8 @@ retry: bkey_copy(prev.k, cur.k); have_extent = true; - bch2_btree_iter_set_pos(iter, - POS(iter->pos.inode, iter->pos.offset + sectors)); + bch2_btree_iter_set_pos(&iter, + POS(iter.pos.inode, iter.pos.offset + sectors)); } if (ret == -EINTR) @@ -963,7 +961,7 @@ retry: ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), FIEMAP_EXTENT_LAST); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; bch2_bkey_buf_exit(&cur, c); bch2_bkey_buf_exit(&prev, c); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 36eba46d566e..eb979e79eaac 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -19,7 +19,7 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; u64 sectors = 0; int ret; @@ -33,7 +33,7 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum) sectors += k.k->size; } - bch2_trans_iter_free(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret ?: sectors; } @@ -42,24 +42,24 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, struct bch_inode_unpacked *inode, u32 *snapshot) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, - POS(0, inode_nr), 0); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, + POS(0, inode_nr), 0); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; if (snapshot) - *snapshot = iter->pos.snapshot; + *snapshot = iter.pos.snapshot; ret = k.k->type == KEY_TYPE_inode ? bch2_inode_unpack(bkey_s_c_to_inode(k), inode) : -ENOENT; err: - bch2_trans_iter_free(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -74,13 +74,16 @@ static int __write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, u32 snapshot) { - struct btree_iter *inode_iter = - bch2_trans_get_iter(trans, BTREE_ID_inodes, - SPOS(0, inode->bi_inum, snapshot), - BTREE_ITER_INTENT); - int ret = bch2_btree_iter_traverse(inode_iter) ?: - bch2_inode_write(trans, inode_iter, inode); - bch2_trans_iter_put(trans, inode_iter); + struct btree_iter iter; + int ret; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, + SPOS(0, inode->bi_inum, snapshot), + BTREE_ITER_INTENT); + + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_inode_write(trans, &iter, inode); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -100,7 +103,7 @@ static int write_inode(struct btree_trans *trans, static int __remove_dirent(struct btree_trans *trans, struct bpos pos) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_iter iter; struct bch_inode_unpacked dir_inode; struct bch_hash_info dir_hash_info; int ret; @@ -111,11 +114,11 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos) dir_hash_info = bch2_hash_info_init(c, &dir_inode); - iter = bch2_trans_get_iter(trans, BTREE_ID_dirents, pos, BTREE_ITER_INTENT); + bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT); ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, - &dir_hash_info, iter); - bch2_trans_iter_put(trans, iter); + &dir_hash_info, &iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -230,13 +233,13 @@ static int reattach_inode(struct btree_trans *trans, static int remove_backpointer(struct btree_trans *trans, struct bch_inode_unpacked *inode) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_dirents, - POS(inode->bi_dir, inode->bi_dir_offset), 0); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, + POS(inode->bi_dir, inode->bi_dir_offset), 0); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto out; @@ -247,7 +250,7 @@ static int remove_backpointer(struct btree_trans *trans, ret = remove_dirent(trans, k.k->p); out: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -343,7 +346,7 @@ static int hash_check_key(struct btree_trans *trans, struct btree_iter *k_iter, struct bkey_s_c hash_k) { struct bch_fs *c = trans->c; - struct btree_iter *iter = NULL; + struct btree_iter iter = { NULL }; char buf[200]; struct bkey_s_c k; u64 hash; @@ -378,12 +381,12 @@ static int hash_check_key(struct btree_trans *trans, } if (bkey_deleted(k.k)) { - bch2_trans_iter_free(trans, iter); + bch2_trans_iter_exit(trans, &iter); goto bad_hash; } } - bch2_trans_iter_free(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; bad_hash: if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, " @@ -513,7 +516,7 @@ noinline_for_stack static int check_inodes(struct bch_fs *c, bool full) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_inode inode; int ret; @@ -532,12 +535,12 @@ static int check_inodes(struct bch_fs *c, bool full) (inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY| BCH_INODE_I_SECTORS_DIRTY| BCH_INODE_UNLINKED))) { - ret = check_inode(&trans, iter, inode); + ret = check_inode(&trans, &iter, inode); if (ret) break; } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); BUG_ON(ret == -EINTR); @@ -547,7 +550,7 @@ static int check_inodes(struct bch_fs *c, bool full) static int fix_overlapping_extent(struct btree_trans *trans, struct bkey_s_c k, struct bpos cut_at) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_i *u; int ret; @@ -567,29 +570,29 @@ static int fix_overlapping_extent(struct btree_trans *trans, * assume things about extent overwrites - we should be running the * triggers manually here */ - iter = bch2_trans_get_iter(trans, BTREE_ID_extents, u->k.p, - BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS); + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, u->k.p, + BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS); - BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); - ret = bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN) ?: + BUG_ON(iter.flags & BTREE_ITER_IS_EXTENTS); + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(trans, &iter, u, BTREE_TRIGGER_NORUN) ?: bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } static int inode_backpointer_exists(struct btree_trans *trans, struct bch_inode_unpacked *inode) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_dirents, - POS(inode->bi_dir, inode->bi_dir_offset), 0); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, + POS(inode->bi_dir, inode->bi_dir_offset), 0); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto out; @@ -598,7 +601,7 @@ static int inode_backpointer_exists(struct btree_trans *trans, ret = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum) == inode->bi_inum; out: - bch2_trans_iter_free(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -618,7 +621,7 @@ static int check_extents(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_buf prev; u64 i_sectors = 0; @@ -630,12 +633,12 @@ static int check_extents(struct bch_fs *c) bch_verbose(c, "checking extents"); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, - POS(BCACHEFS_ROOT_INO, 0), - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + POS(BCACHEFS_ROOT_INO, 0), + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH); retry: - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(&iter)).k && !(ret = bkey_err(k))) { if (w.have_inode && w.cur_inum != k.k->p.inode && @@ -700,12 +703,12 @@ retry: i_sectors += k.k->size; bch2_bkey_buf_reassemble(&prev, c, k); - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); } fsck_err: if (ret == -EINTR) goto retry; - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_bkey_buf_exit(&prev, c); return bch2_trans_exit(&trans) ?: ret; } @@ -890,7 +893,7 @@ static int check_dirents(struct bch_fs *c) struct inode_walker w = inode_walker_init(); struct bch_hash_info hash_info; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; unsigned nr_subdirs = 0; int ret = 0; @@ -898,18 +901,18 @@ static int check_dirents(struct bch_fs *c) bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_dirents, - POS(BCACHEFS_ROOT_INO, 0), - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_dirents, + POS(BCACHEFS_ROOT_INO, 0), + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH); do { ret = lockrestart_do(&trans, - check_dirent(&trans, iter, &hash_info, &w, &nr_subdirs)); + check_dirent(&trans, &iter, &hash_info, &w, &nr_subdirs)); if (ret) break; - } while (bch2_btree_iter_advance(iter)); - bch2_trans_iter_put(&trans, iter); + } while (bch2_btree_iter_advance(&iter)); + bch2_trans_iter_exit(&trans, &iter); return bch2_trans_exit(&trans) ?: ret; } @@ -923,7 +926,7 @@ static int check_xattrs(struct bch_fs *c) struct inode_walker w = inode_walker_init(); struct bch_hash_info hash_info; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret = 0; @@ -931,12 +934,12 @@ static int check_xattrs(struct bch_fs *c) bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, - POS(BCACHEFS_ROOT_INO, 0), - BTREE_ITER_INTENT| - BTREE_ITER_PREFETCH); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, + POS(BCACHEFS_ROOT_INO, 0), + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH); retry: - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(&iter)).k && !(ret = bkey_err(k))) { ret = walk_inode(&trans, &w, k.k->p.inode); if (ret) @@ -945,7 +948,7 @@ retry: if (fsck_err_on(!w.have_inode, c, "xattr for missing inode %llu", k.k->p.inode)) { - ret = bch2_btree_delete_at(&trans, iter, 0); + ret = bch2_btree_delete_at(&trans, &iter, 0); if (ret) break; continue; @@ -955,17 +958,17 @@ retry: hash_info = bch2_hash_info_init(c, &w.inode); ret = hash_check_key(&trans, bch2_xattr_hash_desc, - &hash_info, iter, k); + &hash_info, &iter, k); if (ret) break; - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); } fsck_err: if (ret == -EINTR) goto retry; - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); return bch2_trans_exit(&trans) ?: ret; } @@ -1114,7 +1117,7 @@ fsck_err: static int check_directory_structure(struct bch_fs *c) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bch_inode_unpacked u; struct pathbuf path = { 0, 0, NULL }; @@ -1139,7 +1142,7 @@ static int check_directory_structure(struct bch_fs *c) if (ret) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); BUG_ON(ret == -EINTR); @@ -1215,7 +1218,7 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, u64 start, u64 *end) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_inode inode; struct bch_inode_unpacked u; @@ -1253,7 +1256,7 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); if (ret) @@ -1267,7 +1270,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links u64 range_start, u64 range_end) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_dirent d; int ret; @@ -1289,7 +1292,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links bch2_trans_cond_resched(&trans); } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; if (ret) @@ -1304,7 +1307,7 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, u64 range_start, u64 range_end) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_inode inode; struct bch_inode_unpacked u; @@ -1346,14 +1349,14 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, - bch2_btree_iter_traverse(iter) ?: - bch2_inode_write(&trans, iter, &u)); + bch2_btree_iter_traverse(&iter) ?: + bch2_inode_write(&trans, &iter, &u)); if (ret) bch_err(c, "error in fsck: error %i updating inode", ret); } } fsck_err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); if (ret) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 63f50891594c..2b653ee03f4f 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -292,18 +292,18 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode, return 0; } -struct btree_iter *bch2_inode_peek(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u64 inum, unsigned flags) +int bch2_inode_peek(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode, + u64 inum, unsigned flags) { - struct btree_iter *iter; struct bkey_s_c k; int ret; if (trans->c->opts.inodes_use_key_cache) flags |= BTREE_ITER_CACHED; - iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, POS(0, inum), flags); + bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, inum), flags); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) @@ -317,10 +317,10 @@ struct btree_iter *bch2_inode_peek(struct btree_trans *trans, if (ret) goto err; - return iter; + return 0; err: - bch2_trans_iter_put(trans, iter); - return ERR_PTR(ret); + bch2_trans_iter_exit(trans, iter); + return ret; } int bch2_inode_write(struct btree_trans *trans, @@ -482,12 +482,12 @@ static inline u32 bkey_generation(struct bkey_s_c k) } } -struct btree_iter *bch2_inode_create(struct btree_trans *trans, - struct bch_inode_unpacked *inode_u, - u32 snapshot, u64 cpu) +int bch2_inode_create(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode_u, + u32 snapshot, u64 cpu) { struct bch_fs *c = trans->c; - struct btree_iter *iter = NULL; struct bkey_s_c k; u64 min, max, start, pos, *hint; int ret = 0; @@ -513,9 +513,9 @@ struct btree_iter *bch2_inode_create(struct btree_trans *trans, start = min; pos = start; - iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, POS(0, pos), - BTREE_ITER_ALL_SNAPSHOTS| - BTREE_ITER_INTENT); + bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos), + BTREE_ITER_ALL_SNAPSHOTS| + BTREE_ITER_INTENT); again: while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k)) && @@ -553,8 +553,8 @@ again: ret = -ENOSPC; if (ret) { - bch2_trans_iter_put(trans, iter); - return ERR_PTR(ret); + bch2_trans_iter_exit(trans, iter); + return ret; } /* Retry from start */ @@ -566,8 +566,8 @@ found_slot: k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) { - bch2_trans_iter_put(trans, iter); - return ERR_PTR(ret); + bch2_trans_iter_exit(trans, iter); + return ret; } /* We may have raced while the iterator wasn't pointing at pos: */ @@ -578,13 +578,13 @@ found_slot: *hint = k.k->p.offset; inode_u->bi_inum = k.k->p.offset; inode_u->bi_generation = bkey_generation(k); - return iter; + return 0; } int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached) { struct btree_trans trans; - struct btree_iter *iter = NULL; + struct btree_iter iter = { NULL }; struct bkey_i_inode_generation delete; struct bpos start = POS(inode_nr, 0); struct bpos end = POS(inode_nr + 1, 0); @@ -617,9 +617,9 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached) retry: bch2_trans_begin(&trans); - iter = bch2_trans_get_iter(&trans, BTREE_ID_inodes, - POS(0, inode_nr), iter_flags); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes, + POS(0, inode_nr), iter_flags); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) @@ -636,14 +636,14 @@ retry: bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u); bkey_inode_generation_init(&delete.k_i); - delete.k.p = iter->pos; + delete.k.p = iter.pos; delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); - ret = bch2_trans_update(&trans, iter, &delete.k_i, 0) ?: + ret = bch2_trans_update(&trans, &iter, &delete.k_i, 0) ?: bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); if (ret == -EINTR) goto retry; @@ -654,12 +654,11 @@ err: static int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr, struct bch_inode_unpacked *inode) { - struct btree_iter *iter; + struct btree_iter iter = { NULL }; int ret; - iter = bch2_inode_peek(trans, inode, inode_nr, 0); - ret = PTR_ERR_OR_ZERO(iter); - bch2_trans_iter_put(trans, iter); + ret = bch2_inode_peek(trans, &iter, inode, inode_nr, 0); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index d67af4f56f05..25bef104ebcc 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -57,8 +57,8 @@ int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *); void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *); -struct btree_iter *bch2_inode_peek(struct btree_trans *, - struct bch_inode_unpacked *, u64, unsigned); +int bch2_inode_peek(struct btree_trans *, struct btree_iter *, + struct bch_inode_unpacked *, u64, unsigned); int bch2_inode_write(struct btree_trans *, struct btree_iter *, struct bch_inode_unpacked *); @@ -71,8 +71,8 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *, uid_t, gid_t, umode_t, dev_t, struct bch_inode_unpacked *); -struct btree_iter *bch2_inode_create(struct btree_trans *, - struct bch_inode_unpacked *, u32, u64); +int bch2_inode_create(struct btree_trans *, struct btree_iter *, + struct bch_inode_unpacked *, u32, u64); int bch2_inode_rm(struct bch_fs *, u64, bool); diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 34295419190d..bee33258c0d8 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -202,7 +202,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, s64 *disk_sectors_delta) { struct bch_fs *c = trans->c; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c old; unsigned new_replicas = bch2_bkey_replicas(c, bkey_i_to_s_c(new)); bool new_compressed = bch2_bkey_sectors_compressed(bkey_i_to_s_c(new)); @@ -213,7 +213,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, *i_sectors_delta = 0; *disk_sectors_delta = 0; - iter = bch2_trans_copy_iter(trans, extent_iter); + bch2_trans_copy_iter(&iter, extent_iter); for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) { s64 sectors = min(new->k.p.offset, old.k->p.offset) - @@ -246,7 +246,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, * less: */ if (!bkey_cmp(old.k->p, new->k.p)) { - old = bch2_btree_iter_next(iter); + old = bch2_btree_iter_next(&iter); ret = bkey_err(old); if (ret) break; @@ -261,7 +261,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, } } - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -311,12 +311,11 @@ int bch2_extent_update(struct btree_trans *trans, : 0; if (i_sectors_delta || new_i_size) { - struct btree_iter *inode_iter; + struct btree_iter inode_iter; struct bch_inode_unpacked inode_u; - inode_iter = bch2_inode_peek(trans, &inode_u, + ret = bch2_inode_peek(trans, &inode_iter, &inode_u, k->k.p.inode, BTREE_ITER_INTENT); - ret = PTR_ERR_OR_ZERO(inode_iter); if (ret) return ret; @@ -345,11 +344,11 @@ int bch2_extent_update(struct btree_trans *trans, inode_p.inode.k.p.snapshot = iter->snapshot; - ret = bch2_trans_update(trans, inode_iter, + ret = bch2_trans_update(trans, &inode_iter, &inode_p.inode.k_i, 0); } - bch2_trans_iter_put(trans, inode_iter); + bch2_trans_iter_exit(trans, &inode_iter); if (ret) return ret; @@ -424,18 +423,18 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end, u64 *journal_seq, s64 *i_sectors_delta) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; int ret = 0; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS(inum, start), BTREE_ITER_INTENT); - ret = bch2_fpunch_at(&trans, iter, POS(inum, end), + ret = bch2_fpunch_at(&trans, &iter, POS(inum, end), journal_seq, i_sectors_delta); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); if (ret == -EINTR) @@ -451,28 +450,28 @@ static int bch2_write_index_default(struct bch_write_op *op) struct keylist *keys = &op->insert_keys; struct bkey_i *k = bch2_keylist_front(keys); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; int ret; bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, - bkey_start_pos(&k->k), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + bkey_start_pos(&k->k), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); do { bch2_trans_begin(&trans); k = bch2_keylist_front(keys); - k->k.p.snapshot = iter->snapshot; + k->k.p.snapshot = iter.snapshot; bch2_bkey_buf_realloc(&sk, c, k->k.u64s); bkey_copy(sk.k, k); - bch2_cut_front(iter->pos, sk.k); + bch2_cut_front(iter.pos, sk.k); - ret = bch2_extent_update(&trans, iter, sk.k, + ret = bch2_extent_update(&trans, &iter, sk.k, &op->res, op_journal_seq(op), op->new_i_size, &op->i_sectors_delta, op->flags & BCH_WRITE_CHECK_ENOSPC); @@ -481,11 +480,11 @@ static int bch2_write_index_default(struct bch_write_op *op) if (ret) break; - if (bkey_cmp(iter->pos, k->k.p) >= 0) + if (bkey_cmp(iter.pos, k->k.p) >= 0) bch2_keylist_pop_front(keys); } while (!bch2_keylist_empty(keys)); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); @@ -1638,7 +1637,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio unsigned flags) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_buf sk; struct bkey_s_c k; int ret; @@ -1649,12 +1648,12 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, rbio->data_btree, - rbio->read_pos, BTREE_ITER_SLOTS); + bch2_trans_iter_init(&trans, &iter, rbio->data_btree, + rbio->read_pos, BTREE_ITER_SLOTS); retry: rbio->bio.bi_status = 0; - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); if (bkey_err(k)) goto err; @@ -1681,7 +1680,7 @@ retry: goto err; out: bch2_rbio_done(rbio); - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); return; @@ -1747,7 +1746,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, struct bch_fs *c = rbio->c; u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; struct bch_extent_crc_unpacked new_crc; - struct btree_iter *iter = NULL; + struct btree_iter iter; struct bkey_i *new; struct bkey_s_c k; int ret = 0; @@ -1755,9 +1754,9 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, if (crc_is_compressed(rbio->pick.crc)) return 0; - iter = bch2_trans_get_iter(trans, rbio->data_btree, rbio->data_pos, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, rbio->data_btree, rbio->data_pos, + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(&iter); if ((ret = bkey_err(k))) goto out; @@ -1792,9 +1791,9 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, if (!bch2_bkey_narrow_crcs(new, new_crc)) goto out; - ret = bch2_trans_update(trans, iter, new, 0); + ret = bch2_trans_update(trans, &iter, new, 0); out: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1965,7 +1964,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans, unsigned *offset_into_extent, struct bkey_buf *orig_k) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; u64 reflink_offset; int ret; @@ -1973,10 +1972,10 @@ int __bch2_read_indirect_extent(struct btree_trans *trans, reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) + *offset_into_extent; - iter = bch2_trans_get_iter(trans, BTREE_ID_reflink, - POS(0, reflink_offset), - BTREE_ITER_SLOTS); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink, + POS(0, reflink_offset), + BTREE_ITER_SLOTS); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; @@ -1993,10 +1992,10 @@ int __bch2_read_indirect_extent(struct btree_trans *trans, goto err; } - *offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); + *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); bch2_bkey_buf_reassemble(orig_k, trans->c, k); err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -2273,7 +2272,7 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, struct bch_io_failures *failed, unsigned flags) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_buf sk; struct bkey_s_c k; int ret; @@ -2282,10 +2281,9 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); - - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, - POS(inode, bvec_iter.bi_sector), - BTREE_ITER_SLOTS); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + POS(inode, bvec_iter.bi_sector), + BTREE_ITER_SLOTS); retry: bch2_trans_begin(&trans); @@ -2302,15 +2300,15 @@ retry: break; } - bch2_btree_iter_set_pos(iter, + bch2_btree_iter_set_pos(&iter, POS(inode, bvec_iter.bi_sector)); - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) break; - offset_into_extent = iter->pos.offset - + offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; @@ -2341,7 +2339,7 @@ retry: if (bvec_iter.bi_size == bytes) flags |= BCH_READ_LAST_FRAGMENT; - ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter->pos, + ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter.pos, data_btree, k, offset_into_extent, failed, flags); if (ret) @@ -2357,7 +2355,7 @@ retry: if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID) goto retry; - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c index f2060f903cbc..68fb2ebd91ac 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -250,7 +250,7 @@ void bch2_blacklist_entries_gc(struct work_struct *work) bch2_trans_init(&trans, c, 0, 0); for (i = 0; i < BTREE_ID_NR; i++) { - struct btree_iter *iter; + struct btree_iter iter; struct btree *b; for_each_btree_node(&trans, iter, i, POS_MIN, @@ -259,7 +259,7 @@ void bch2_blacklist_entries_gc(struct work_struct *work) bch2_trans_exit(&trans); return; } - bch2_trans_iter_free(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); } ret = bch2_trans_exit(&trans); diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 1f65eca48c6e..1899326d9754 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -39,7 +39,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags enum btree_id btree_id) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_buf sk; int ret = 0; @@ -47,13 +47,13 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, - BTREE_ITER_PREFETCH); + bch2_trans_iter_init(&trans, &iter, btree_id, POS_MIN, + BTREE_ITER_PREFETCH); - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(&iter)).k && !(ret = bkey_err(k))) { if (!bch2_bkey_has_device(k, dev_idx)) { - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); continue; } @@ -71,10 +71,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags */ bch2_extent_normalize(c, bkey_i_to_s(sk.k)); - bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k)); + bch2_btree_iter_set_pos(&iter, bkey_start_pos(&sk.k->k)); - ret = bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(&trans, iter, sk.k, 0) ?: + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(&trans, &iter, sk.k, 0) ?: bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); @@ -88,7 +88,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags if (ret) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; bch2_bkey_buf_exit(&sk, c); @@ -107,7 +107,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct closure cl; struct btree *b; struct bkey_buf k; @@ -139,9 +139,9 @@ retry: break; } - ret = bch2_btree_node_update_key(&trans, iter, b, k.k, false); + ret = bch2_btree_node_update_key(&trans, &iter, b, k.k, false); if (ret == -EINTR) { - b = bch2_btree_iter_peek_node(iter); + b = bch2_btree_iter_peek_node(&iter); ret = 0; goto retry; } @@ -150,7 +150,7 @@ retry: break; } } - bch2_trans_iter_free(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); if (ret) goto err; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 3c2e566beb2d..eb2b91f7e682 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -56,7 +56,7 @@ int bch2_migrate_index_update(struct bch_write_op *op) { struct bch_fs *c = op->c; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct migrate_write *m = container_of(op, struct migrate_write, op); struct keylist *keys = &op->insert_keys; @@ -69,9 +69,9 @@ int bch2_migrate_index_update(struct bch_write_op *op) bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - iter = bch2_trans_get_iter(&trans, m->btree_id, - bkey_start_pos(&bch2_keylist_front(keys)->k), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, m->btree_id, + bkey_start_pos(&bch2_keylist_front(keys)->k), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); while (1) { struct bkey_s_c k; @@ -86,7 +86,7 @@ int bch2_migrate_index_update(struct bch_write_op *op) bch2_trans_begin(&trans); - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; @@ -102,9 +102,9 @@ int bch2_migrate_index_update(struct bch_write_op *op) bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys)); new = bkey_i_to_extent(_new.k); - bch2_cut_front(iter->pos, &new->k_i); + bch2_cut_front(iter.pos, &new->k_i); - bch2_cut_front(iter->pos, insert); + bch2_cut_front(iter.pos, insert); bch2_cut_back(new->k.p, insert); bch2_cut_back(insert->k.p, &new->k_i); @@ -146,7 +146,7 @@ int bch2_migrate_index_update(struct bch_write_op *op) op->opts.background_target, op->opts.data_replicas); - ret = bch2_sum_sector_overwrites(&trans, iter, insert, + ret = bch2_sum_sector_overwrites(&trans, &iter, insert, &extending, &should_check_enospc, &i_sectors_delta, @@ -165,13 +165,13 @@ int bch2_migrate_index_update(struct bch_write_op *op) next_pos = insert->k.p; - ret = bch2_trans_update(&trans, iter, insert, 0) ?: + ret = bch2_trans_update(&trans, &iter, insert, 0) ?: bch2_trans_commit(&trans, &op->res, op_journal_seq(op), BTREE_INSERT_NOFAIL| m->data_opts.btree_insert_flags); if (!ret) { - bch2_btree_iter_set_pos(iter, next_pos); + bch2_btree_iter_set_pos(&iter, next_pos); atomic_long_inc(&c->extent_migrate_done); } err: @@ -180,7 +180,7 @@ err: if (ret) break; next: - while (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) >= 0) { + while (bkey_cmp(iter.pos, bch2_keylist_front(keys)->k.p) >= 0) { bch2_keylist_pop_front(keys); if (bch2_keylist_empty(keys)) goto out; @@ -188,18 +188,18 @@ next: continue; nomatch: if (m->ctxt) { - BUG_ON(k.k->p.offset <= iter->pos.offset); + BUG_ON(k.k->p.offset <= iter.pos.offset); atomic64_inc(&m->ctxt->stats->keys_raced); - atomic64_add(k.k->p.offset - iter->pos.offset, + atomic64_add(k.k->p.offset - iter.pos.offset, &m->ctxt->stats->sectors_raced); } atomic_long_inc(&c->extent_migrate_raced); trace_move_race(&new->k); - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); goto next; } out: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&_insert, c); bch2_bkey_buf_exit(&_new, c); @@ -524,13 +524,13 @@ err: static int lookup_inode(struct btree_trans *trans, struct bpos pos, struct bch_inode_unpacked *inode) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, pos, - BTREE_ITER_ALL_SNAPSHOTS); - k = bch2_btree_iter_peek(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, pos, + BTREE_ITER_ALL_SNAPSHOTS); + k = bch2_btree_iter_peek(&iter); ret = bkey_err(k); if (ret) goto err; @@ -548,7 +548,7 @@ static int lookup_inode(struct btree_trans *trans, struct bpos pos, if (ret) goto err; err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -566,7 +566,7 @@ static int __bch2_move_data(struct bch_fs *c, struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct bkey_buf sk; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct data_opts data_opts; enum data_cmd data_cmd; @@ -580,8 +580,8 @@ static int __bch2_move_data(struct bch_fs *c, stats->btree_id = btree_id; stats->pos = start; - iter = bch2_trans_get_iter(&trans, btree_id, start, - BTREE_ITER_PREFETCH); + bch2_trans_iter_init(&trans, &iter, btree_id, start, + BTREE_ITER_PREFETCH); if (rate) bch2_ratelimit_reset(rate); @@ -612,9 +612,9 @@ static int __bch2_move_data(struct bch_fs *c, bch2_trans_begin(&trans); - k = bch2_btree_iter_peek(iter); + k = bch2_btree_iter_peek(&iter); - stats->pos = iter->pos; + stats->pos = iter.pos; if (!k.k) break; @@ -687,12 +687,12 @@ next: atomic64_add(k.k->size * bch2_bkey_nr_ptrs_allocated(k), &stats->sectors_seen); next_nondata: - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); bch2_trans_cond_resched(&trans); } out: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; bch2_bkey_buf_exit(&sk, c); @@ -786,7 +786,7 @@ static int bch2_move_btree(struct bch_fs *c, bool kthread = (current->flags & PF_KTHREAD) != 0; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct btree *b; enum btree_id id; struct data_opts data_opts; @@ -813,7 +813,7 @@ static int bch2_move_btree(struct bch_fs *c, bpos_cmp(b->key.k.p, end_pos)) > 0) break; - stats->pos = iter->pos; + stats->pos = iter.pos; switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) { case DATA_SKIP: @@ -827,13 +827,13 @@ static int bch2_move_btree(struct bch_fs *c, BUG(); } - ret = bch2_btree_node_rewrite(&trans, iter, + ret = bch2_btree_node_rewrite(&trans, &iter, b->data->keys.seq, 0) ?: ret; next: bch2_trans_cond_resched(&trans); } + bch2_trans_iter_exit(&trans, &iter); - ret = bch2_trans_iter_free(&trans, iter) ?: ret; if (kthread && kthread_should_stop()) break; } diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 7861781a4a7f..9b0f4d3f176d 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -357,7 +357,7 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k) static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret = 0; @@ -372,7 +372,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) if (ret) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); return bch2_trans_exit(&trans) ?: ret; } @@ -419,7 +419,7 @@ int bch2_fs_quota_read(struct bch_fs *c) unsigned i, qtypes = enabled_qtypes(c); struct bch_memquota_type *q; struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bch_inode_unpacked u; struct bkey_s_c k; int ret; @@ -450,7 +450,7 @@ int bch2_fs_quota_read(struct bch_fs *c) KEY_TYPE_QUOTA_NOCHECK); } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); return bch2_trans_exit(&trans) ?: ret; } @@ -717,13 +717,13 @@ static int bch2_set_quota_trans(struct btree_trans *trans, struct bkey_i_quota *new_quota, struct qc_dqblk *qdq) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_quotas, new_quota->k.p, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - k = bch2_btree_iter_peek_slot(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_quotas, new_quota->k.p, + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (unlikely(ret)) @@ -742,8 +742,8 @@ static int bch2_set_quota_trans(struct btree_trans *trans, if (qdq->d_fieldmask & QC_INO_HARD) new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit); - ret = bch2_trans_update(trans, iter, &new_quota->k_i, 0); - bch2_trans_iter_put(trans, iter); + ret = bch2_trans_update(trans, &iter, &new_quota->k_i, 0); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 71b0f14f41f3..11208e83fabe 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -327,7 +327,7 @@ static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b, bch2_bkey_buf_reassemble(&tmp, c, k); bch2_btree_node_prefetch(c, NULL, NULL, tmp.k, - b->c.btree_id, b->c.level - 1); + b->c.btree_id, b->c.level - 1); bch2_btree_and_journal_iter_advance(&iter); i++; @@ -518,16 +518,16 @@ static int __bch2_journal_replay_key(struct btree_trans *trans, enum btree_id id, unsigned level, struct bkey_i *k) { - struct btree_iter *iter; + struct btree_iter iter; int ret; - iter = bch2_trans_get_node_iter(trans, id, k->k.p, - BTREE_MAX_DEPTH, level, - BTREE_ITER_INTENT| - BTREE_ITER_NOT_EXTENTS); - ret = bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN); - bch2_trans_iter_put(trans, iter); + bch2_trans_node_iter_init(trans, &iter, id, k->k.p, + BTREE_MAX_DEPTH, level, + BTREE_ITER_INTENT| + BTREE_ITER_NOT_EXTENTS); + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(trans, &iter, k, BTREE_TRIGGER_NORUN); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -545,16 +545,16 @@ static int bch2_journal_replay_key(struct bch_fs *c, struct journal_key *k) static int __bch2_alloc_replay_key(struct btree_trans *trans, struct bkey_i *k) { - struct btree_iter *iter; + struct btree_iter iter; int ret; - iter = bch2_trans_get_iter(trans, BTREE_ID_alloc, k->k.p, - BTREE_ITER_CACHED| - BTREE_ITER_CACHED_NOFILL| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(trans, iter, k, BTREE_TRIGGER_NORUN); - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, k->k.p, + BTREE_ITER_CACHED| + BTREE_ITER_CACHED_NOFILL| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(trans, &iter, k, BTREE_TRIGGER_NORUN); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 3d9c5c5b0eba..576cfbccf5b5 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -116,7 +116,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, struct bkey_i *orig) { struct bch_fs *c = trans->c; - struct btree_iter *reflink_iter; + struct btree_iter reflink_iter = { NULL }; struct bkey_s_c k; struct bkey_i *r_v; struct bkey_i_reflink_p *r_p; @@ -129,8 +129,8 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, for_each_btree_key(trans, reflink_iter, BTREE_ID_reflink, POS(0, c->reflink_hint), BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) { - if (reflink_iter->pos.inode) { - bch2_btree_iter_set_pos(reflink_iter, POS_MIN); + if (reflink_iter.pos.inode) { + bch2_btree_iter_set_pos(&reflink_iter, POS_MIN); continue; } @@ -142,7 +142,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, goto err; /* rewind iter to start of hole, if necessary: */ - bch2_btree_iter_set_pos_to_extent_start(reflink_iter); + bch2_btree_iter_set_pos_to_extent_start(&reflink_iter); r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); ret = PTR_ERR_OR_ZERO(r_v); @@ -151,7 +151,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, bkey_init(&r_v->k); r_v->k.type = bkey_type_to_indirect(&orig->k); - r_v->k.p = reflink_iter->pos; + r_v->k.p = reflink_iter.pos; bch2_key_resize(&r_v->k, orig->k.size); r_v->k.version = orig->k.version; @@ -161,7 +161,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, *refcount = 0; memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k)); - ret = bch2_trans_update(trans, reflink_iter, r_v, 0); + ret = bch2_trans_update(trans, &reflink_iter, r_v, 0); if (ret) goto err; @@ -172,9 +172,8 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, 0); err: - if (!IS_ERR(reflink_iter)) - c->reflink_hint = reflink_iter->pos.offset; - bch2_trans_iter_put(trans, reflink_iter); + c->reflink_hint = reflink_iter.pos.offset; + bch2_trans_iter_exit(trans, &reflink_iter); return ret; } @@ -184,7 +183,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) struct bkey_s_c k; int ret; - for_each_btree_key_continue(iter, 0, k, ret) { + for_each_btree_key_continue(*iter, 0, k, ret) { if (bkey_cmp(iter->pos, end) >= 0) break; @@ -203,7 +202,7 @@ s64 bch2_remap_range(struct bch_fs *c, u64 new_i_size, s64 *i_sectors_delta) { struct btree_trans trans; - struct btree_iter *dst_iter, *src_iter; + struct btree_iter dst_iter, src_iter; struct bkey_s_c src_k; struct bkey_buf new_dst, new_src; struct bpos dst_end = dst_start, src_end = src_start; @@ -223,13 +222,13 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_bkey_buf_init(&new_src); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096); - src_iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, src_start, - BTREE_ITER_INTENT); - dst_iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, dst_start, - BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &src_iter, BTREE_ID_extents, src_start, + BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &dst_iter, BTREE_ID_extents, dst_start, + BTREE_ITER_INTENT); while ((ret == 0 || ret == -EINTR) && - bkey_cmp(dst_iter->pos, dst_end) < 0) { + bkey_cmp(dst_iter.pos, dst_end) < 0) { struct disk_reservation disk_res = { 0 }; bch2_trans_begin(&trans); @@ -239,31 +238,31 @@ s64 bch2_remap_range(struct bch_fs *c, break; } - dst_done = dst_iter->pos.offset - dst_start.offset; + dst_done = dst_iter.pos.offset - dst_start.offset; src_want = POS(src_start.inode, src_start.offset + dst_done); - bch2_btree_iter_set_pos(src_iter, src_want); + bch2_btree_iter_set_pos(&src_iter, src_want); - src_k = get_next_src(src_iter, src_end); + src_k = get_next_src(&src_iter, src_end); ret = bkey_err(src_k); if (ret) continue; - if (bkey_cmp(src_want, src_iter->pos) < 0) { - ret = bch2_fpunch_at(&trans, dst_iter, + if (bkey_cmp(src_want, src_iter.pos) < 0) { + ret = bch2_fpunch_at(&trans, &dst_iter, bpos_min(dst_end, - POS(dst_iter->pos.inode, dst_iter->pos.offset + - src_iter->pos.offset - src_want.offset)), + POS(dst_iter.pos.inode, dst_iter.pos.offset + + src_iter.pos.offset - src_want.offset)), journal_seq, i_sectors_delta); continue; } if (src_k.k->type != KEY_TYPE_reflink_p) { - bch2_btree_iter_set_pos_to_extent_start(src_iter); + bch2_btree_iter_set_pos_to_extent_start(&src_iter); bch2_bkey_buf_reassemble(&new_src, c, src_k); src_k = bkey_i_to_s_c(new_src.k); - ret = bch2_make_extent_indirect(&trans, src_iter, + ret = bch2_make_extent_indirect(&trans, &src_iter, new_src.k); if (ret) continue; @@ -286,43 +285,42 @@ s64 bch2_remap_range(struct bch_fs *c, BUG(); } - new_dst.k->k.p = dst_iter->pos; + new_dst.k->k.p = dst_iter.pos; bch2_key_resize(&new_dst.k->k, min(src_k.k->p.offset - src_want.offset, - dst_end.offset - dst_iter->pos.offset)); - ret = bch2_extent_update(&trans, dst_iter, new_dst.k, + dst_end.offset - dst_iter.pos.offset)); + ret = bch2_extent_update(&trans, &dst_iter, new_dst.k, &disk_res, journal_seq, new_i_size, i_sectors_delta, true); bch2_disk_reservation_put(c, &disk_res); } - bch2_trans_iter_put(&trans, dst_iter); - bch2_trans_iter_put(&trans, src_iter); + bch2_trans_iter_exit(&trans, &dst_iter); + bch2_trans_iter_exit(&trans, &src_iter); - BUG_ON(!ret && bkey_cmp(dst_iter->pos, dst_end)); - BUG_ON(bkey_cmp(dst_iter->pos, dst_end) > 0); + BUG_ON(!ret && bkey_cmp(dst_iter.pos, dst_end)); + BUG_ON(bkey_cmp(dst_iter.pos, dst_end) > 0); - dst_done = dst_iter->pos.offset - dst_start.offset; - new_i_size = min(dst_iter->pos.offset << 9, new_i_size); + dst_done = dst_iter.pos.offset - dst_start.offset; + new_i_size = min(dst_iter.pos.offset << 9, new_i_size); do { struct bch_inode_unpacked inode_u; - struct btree_iter *inode_iter; + struct btree_iter inode_iter = { NULL }; bch2_trans_begin(&trans); - inode_iter = bch2_inode_peek(&trans, &inode_u, + ret2 = bch2_inode_peek(&trans, &inode_iter, &inode_u, dst_start.inode, BTREE_ITER_INTENT); - ret2 = PTR_ERR_OR_ZERO(inode_iter); if (!ret2 && inode_u.bi_size < new_i_size) { inode_u.bi_size = new_i_size; - ret2 = bch2_inode_write(&trans, inode_iter, &inode_u) ?: + ret2 = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, journal_seq, 0); } - bch2_trans_iter_put(&trans, inode_iter); + bch2_trans_iter_exit(&trans, &inode_iter); } while (ret2 == -EINTR); ret = bch2_trans_exit(&trans) ?: ret; diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 236023494191..c6a132b3c5bb 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -139,18 +139,18 @@ struct bch_hash_desc { bool (*cmp_bkey)(struct bkey_s_c, struct bkey_s_c); }; -static __always_inline struct btree_iter * +static __always_inline int bch2_hash_lookup(struct btree_trans *trans, + struct btree_iter *iter, const struct bch_hash_desc desc, const struct bch_hash_info *info, u64 inode, const void *key, unsigned flags) { - struct btree_iter *iter; struct bkey_s_c k; int ret; - for_each_btree_key(trans, iter, desc.btree_id, + for_each_btree_key(trans, *iter, desc.btree_id, POS(inode, desc.hash_key(info, key)), BTREE_ITER_SLOTS|flags, k, ret) { if (iter->pos.inode != inode) @@ -158,7 +158,7 @@ bch2_hash_lookup(struct btree_trans *trans, if (k.k->type == desc.key_type) { if (!desc.cmp_key(k, key)) - return iter; + return 0; } else if (k.k->type == KEY_TYPE_hash_whiteout) { ; } else { @@ -166,35 +166,33 @@ bch2_hash_lookup(struct btree_trans *trans, break; } } - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, iter); - return ERR_PTR(ret ?: -ENOENT); + return ret ?: -ENOENT; } -static __always_inline struct btree_iter * +static __always_inline int bch2_hash_hole(struct btree_trans *trans, + struct btree_iter *iter, const struct bch_hash_desc desc, const struct bch_hash_info *info, u64 inode, const void *key) { - struct btree_iter *iter; struct bkey_s_c k; int ret; - for_each_btree_key(trans, iter, desc.btree_id, + for_each_btree_key(trans, *iter, desc.btree_id, POS(inode, desc.hash_key(info, key)), BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { if (iter->pos.inode != inode) break; if (k.k->type != desc.key_type) - return iter; + return 0; } + bch2_trans_iter_exit(trans, iter); - iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; - bch2_trans_iter_put(trans, iter); - - return ERR_PTR(ret ?: -ENOSPC); + return ret ?: -ENOSPC; } static __always_inline @@ -203,13 +201,13 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans, const struct bch_hash_info *info, struct btree_iter *start) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret; - iter = bch2_trans_copy_iter(trans, start); + bch2_trans_copy_iter(&iter, start); - bch2_btree_iter_advance(iter); + bch2_btree_iter_advance(&iter); for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) { if (k.k->type != desc.key_type && @@ -218,13 +216,12 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans, if (k.k->type == desc.key_type && desc.hash_bkey(info, k) <= start->pos.offset) { - iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; ret = 1; break; } } - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -234,7 +231,7 @@ int bch2_hash_set(struct btree_trans *trans, const struct bch_hash_info *info, u64 inode, struct bkey_i *insert, int flags) { - struct btree_iter *iter, *slot = NULL; + struct btree_iter iter, slot = { NULL }; struct bkey_s_c k; bool found = false; int ret; @@ -242,7 +239,7 @@ int bch2_hash_set(struct btree_trans *trans, for_each_btree_key(trans, iter, desc.btree_id, POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))), BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { - if (iter->pos.inode != inode) + if (iter.pos.inode != inode) break; if (k.k->type == desc.key_type) { @@ -253,9 +250,9 @@ int bch2_hash_set(struct btree_trans *trans, continue; } - if (!slot && + if (!slot.path && !(flags & BCH_HASH_SET_MUST_REPLACE)) - slot = bch2_trans_copy_iter(trans, iter); + bch2_trans_copy_iter(&slot, &iter); if (k.k->type != KEY_TYPE_hash_whiteout) goto not_found; @@ -264,8 +261,8 @@ int bch2_hash_set(struct btree_trans *trans, if (!ret) ret = -ENOSPC; out: - bch2_trans_iter_put(trans, slot); - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &slot); + bch2_trans_iter_exit(trans, &iter); return ret; found: @@ -277,11 +274,11 @@ not_found: } else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) { ret = -EEXIST; } else { - if (!found && slot) + if (!found && slot.path) swap(iter, slot); - insert->k.p = iter->pos; - ret = bch2_trans_update(trans, iter, insert, 0); + insert->k.p = iter.pos; + ret = bch2_trans_update(trans, &iter, insert, 0); } goto out; @@ -318,16 +315,16 @@ int bch2_hash_delete(struct btree_trans *trans, const struct bch_hash_info *info, u64 inode, const void *key) { - struct btree_iter *iter; + struct btree_iter iter; int ret; - iter = bch2_hash_lookup(trans, desc, info, inode, key, + ret = bch2_hash_lookup(trans, &iter, desc, info, inode, key, BTREE_ITER_INTENT); - if (IS_ERR(iter)) - return PTR_ERR(iter); + if (ret) + return ret; - ret = bch2_hash_delete_at(trans, desc, info, iter); - bch2_trans_iter_put(trans, iter); + ret = bch2_hash_delete_at(trans, desc, info, &iter); + bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 1d793e554084..b18ca3947ac8 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -494,11 +494,11 @@ static void __bch2_fs_free(struct bch_fs *c) percpu_free_rwsem(&c->mark_lock); free_percpu(c->online_reserved); - if (c->btree_iters_bufs) + if (c->btree_paths_bufs) for_each_possible_cpu(cpu) - kfree(per_cpu_ptr(c->btree_iters_bufs, cpu)->iter); + kfree(per_cpu_ptr(c->btree_paths_bufs, cpu)->path); - free_percpu(c->btree_iters_bufs); + free_percpu(c->btree_paths_bufs); free_percpu(c->pcpu); mempool_exit(&c->large_bkey_pool); mempool_exit(&c->btree_bounce_pool); @@ -783,7 +783,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) BIOSET_NEED_BVECS) || !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) || !(c->online_reserved = alloc_percpu(u64)) || - !(c->btree_iters_bufs = alloc_percpu(struct btree_iter_buf)) || + !(c->btree_paths_bufs = alloc_percpu(struct btree_path_buf)) || mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, btree_bytes(c)) || mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index b5ce336f00ca..92e58f5c6bbf 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -290,7 +290,7 @@ static int fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c) static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0, nr_compressed_extents = 0, @@ -325,6 +325,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c break; } } + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; if (ret) diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 43b514974d91..1b583b134853 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -29,7 +29,7 @@ static void delete_test_keys(struct bch_fs *c) static int test_delete(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_i_cookie k; int ret; @@ -37,13 +37,12 @@ static int test_delete(struct bch_fs *c, u64 nr) k.k.p.snapshot = U32_MAX; bch2_trans_init(&trans, c, 0, 0); - - iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, k.k.p, - BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p, + BTREE_ITER_INTENT); ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(&trans, iter, &k.k_i, 0)); + bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(&trans, &iter, &k.k_i, 0)); if (ret) { bch_err(c, "update error in test_delete: %i", ret); goto err; @@ -51,8 +50,8 @@ static int test_delete(struct bch_fs *c, u64 nr) pr_info("deleting once"); ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_btree_delete_at(&trans, iter, 0)); + bch2_btree_iter_traverse(&iter) ?: + bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { bch_err(c, "delete error (first) in test_delete: %i", ret); goto err; @@ -60,14 +59,14 @@ static int test_delete(struct bch_fs *c, u64 nr) pr_info("deleting twice"); ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_btree_delete_at(&trans, iter, 0)); + bch2_btree_iter_traverse(&iter) ?: + bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { bch_err(c, "delete error (second) in test_delete: %i", ret); goto err; } err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } @@ -75,7 +74,7 @@ err: static int test_delete_written(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_i_cookie k; int ret; @@ -84,12 +83,12 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, k.k.p, - BTREE_ITER_INTENT); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p, + BTREE_ITER_INTENT); ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(&trans, iter, &k.k_i, 0)); + bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(&trans, &iter, &k.k_i, 0)); if (ret) { bch_err(c, "update error in test_delete_written: %i", ret); goto err; @@ -99,14 +98,14 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_journal_flush_all_pins(&c->journal); ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_btree_delete_at(&trans, iter, 0)); + bch2_btree_iter_traverse(&iter) ?: + bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { bch_err(c, "delete error in test_delete_written: %i", ret); goto err; } err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } @@ -114,7 +113,7 @@ err: static int test_iterate(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter = NULL; + struct btree_iter iter = { NULL }; struct bkey_s_c k; u64 i; int ret = 0; @@ -156,12 +155,12 @@ static int test_iterate(struct bch_fs *c, u64 nr) pr_info("iterating backwards"); - while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) + while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) BUG_ON(k.k->p.offset != --i); BUG_ON(i); err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } @@ -169,7 +168,7 @@ err: static int test_iterate_extents(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter = NULL; + struct btree_iter iter = { NULL }; struct bkey_s_c k; u64 i; int ret = 0; @@ -210,14 +209,14 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) pr_info("iterating backwards"); - while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(iter)).k)) { + while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) { BUG_ON(k.k->p.offset != i); i = bkey_start_offset(k.k); } BUG_ON(i); err: - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } @@ -225,7 +224,7 @@ err: static int test_iterate_slots(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter = { NULL }; struct bkey_s_c k; u64 i; int ret = 0; @@ -263,7 +262,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) BUG_ON(k.k->p.offset != i); i += 2; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); BUG_ON(i != nr * 2); @@ -280,7 +279,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) if (i == nr * 2) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); err: bch2_trans_exit(&trans); return ret; @@ -289,7 +288,7 @@ err: static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter = { NULL }; struct bkey_s_c k; u64 i; int ret = 0; @@ -326,7 +325,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) BUG_ON(k.k->size != 8); i += 16; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); BUG_ON(i != nr); @@ -345,7 +344,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) if (i == nr) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); err: bch2_trans_exit(&trans); return 0; @@ -358,21 +357,19 @@ err: static int test_peek_end(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; bch2_trans_init(&trans, c, 0, 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, POS_MIN, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, POS_MIN, 0); - - k = bch2_btree_iter_peek(iter); + k = bch2_btree_iter_peek(&iter); BUG_ON(k.k); - k = bch2_btree_iter_peek(iter); + k = bch2_btree_iter_peek(&iter); BUG_ON(k.k); - bch2_trans_iter_put(&trans, iter); - + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return 0; } @@ -380,21 +377,19 @@ static int test_peek_end(struct bch_fs *c, u64 nr) static int test_peek_end_extents(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; bch2_trans_init(&trans, c, 0, 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, POS_MIN, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS_MIN, 0); - - k = bch2_btree_iter_peek(iter); + k = bch2_btree_iter_peek(&iter); BUG_ON(k.k); - k = bch2_btree_iter_peek(iter); + k = bch2_btree_iter_peek(&iter); BUG_ON(k.k); - bch2_trans_iter_put(&trans, iter); - + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return 0; } @@ -540,18 +535,18 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr) static int rand_lookup(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret = 0; u64 i; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, POS_MIN, 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, POS_MIN, 0); for (i = 0; i < nr; i++) { - bch2_btree_iter_set_pos(iter, POS(0, test_rand())); + bch2_btree_iter_set_pos(&iter, POS(0, test_rand())); - k = bch2_btree_iter_peek(iter); + k = bch2_btree_iter_peek(&iter); ret = bkey_err(k); if (ret) { bch_err(c, "error in rand_lookup: %i", ret); @@ -559,63 +554,73 @@ static int rand_lookup(struct bch_fs *c, u64 nr) } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } +static int rand_mixed_trans(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_i_cookie *cookie, + u64 i, u64 pos) +{ + struct bkey_s_c k; + int ret; + + bch2_btree_iter_set_pos(iter, POS(0, pos)); + + k = bch2_btree_iter_peek(iter); + ret = bkey_err(k); + if (ret && ret != -EINTR) + bch_err(trans->c, "lookup error in rand_mixed: %i", ret); + if (ret) + return ret; + + if (!(i & 3) && k.k) { + bkey_cookie_init(&cookie->k_i); + cookie->k.p = iter->pos; + bch2_trans_update(trans, iter, &cookie->k_i, 0); + } + + return 0; +} + static int rand_mixed(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; - struct bkey_s_c k; + struct btree_iter iter; + struct bkey_i_cookie cookie; int ret = 0; - u64 i; + u64 i, rand; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, POS_MIN, 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, POS_MIN, 0); for (i = 0; i < nr; i++) { - bch2_btree_iter_set_pos(iter, POS(0, test_rand())); - - k = bch2_btree_iter_peek(iter); - ret = bkey_err(k); + rand = test_rand(); + ret = __bch2_trans_do(&trans, NULL, NULL, 0, + rand_mixed_trans(&trans, &iter, &cookie, i, rand)); if (ret) { - bch_err(c, "lookup error in rand_mixed: %i", ret); + bch_err(c, "update error in rand_mixed: %i", ret); break; } - - if (!(i & 3) && k.k) { - struct bkey_i_cookie k; - - bkey_cookie_init(&k.k_i); - k.k.p = iter->pos; - - ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(&trans, iter, &k.k_i, 0)); - if (ret) { - bch_err(c, "update error in rand_mixed: %i", ret); - break; - } - } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; } static int __do_delete(struct btree_trans *trans, struct bpos pos) { - struct btree_iter *iter; + struct btree_iter iter; struct bkey_i delete; struct bkey_s_c k; int ret = 0; - iter = bch2_trans_get_iter(trans, BTREE_ID_xattrs, pos, - BTREE_ITER_INTENT); - k = bch2_btree_iter_peek(iter); + bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos, + BTREE_ITER_INTENT); + k = bch2_btree_iter_peek(&iter); ret = bkey_err(k); if (ret) goto err; @@ -626,9 +631,9 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos) bkey_init(&delete.k); delete.k.p = k.k->p; - ret = bch2_trans_update(trans, iter, &delete, 0); + ret = bch2_trans_update(trans, &iter, &delete, 0); err: - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_exit(trans, &iter); return ret; } @@ -658,7 +663,7 @@ static int rand_delete(struct bch_fs *c, u64 nr) static int seq_insert(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct bkey_i_cookie insert; int ret = 0; @@ -670,11 +675,11 @@ static int seq_insert(struct bch_fs *c, u64 nr) for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN, BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { - insert.k.p = iter->pos; + insert.k.p = iter.pos; ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(&trans, iter, &insert.k_i, 0)); + bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(&trans, &iter, &insert.k_i, 0)); if (ret) { bch_err(c, "error in seq_insert: %i", ret); break; @@ -683,7 +688,7 @@ static int seq_insert(struct bch_fs *c, u64 nr) if (++i == nr) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; @@ -692,7 +697,7 @@ static int seq_insert(struct bch_fs *c, u64 nr) static int seq_lookup(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret = 0; @@ -700,7 +705,7 @@ static int seq_lookup(struct bch_fs *c, u64 nr) for_each_btree_key(&trans, iter, BTREE_ID_xattrs, POS_MIN, 0, k, ret) ; - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; @@ -709,7 +714,7 @@ static int seq_lookup(struct bch_fs *c, u64 nr) static int seq_overwrite(struct bch_fs *c, u64 nr) { struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; int ret = 0; @@ -722,14 +727,14 @@ static int seq_overwrite(struct bch_fs *c, u64 nr) bkey_reassemble(&u.k_i, k); ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_btree_iter_traverse(iter) ?: - bch2_trans_update(&trans, iter, &u.k_i, 0)); + bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(&trans, &iter, &u.k_i, 0)); if (ret) { bch_err(c, "error in seq_overwrite: %i", ret); break; } } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index af1f415fb5e7..44a556518d4a 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -540,7 +540,7 @@ TRACE_EVENT(copygc_wait, __entry->wait_amount, __entry->until) ); -TRACE_EVENT(trans_get_iter, +TRACE_EVENT(trans_get_path, TP_PROTO(unsigned long trans_ip, unsigned long caller_ip, enum btree_id btree_id, @@ -814,7 +814,7 @@ TRACE_EVENT(iter_traverse, __entry->ret) ); -TRACE_EVENT(iter_set_search_pos, +TRACE_EVENT(path_set_pos, TP_PROTO(unsigned long trans_ip, unsigned long caller_ip, enum btree_id btree_id, diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index bf4164f98743..babbfaadeb3f 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -122,23 +122,22 @@ static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info const char *name, void *buffer, size_t size, int type) { struct bch_hash_info hash = bch2_hash_info_init(trans->c, &inode->ei_inode); - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c_xattr xattr; struct bkey_s_c k; int ret; - iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, &hash, - inode->v.i_ino, - &X_SEARCH(type, name, strlen(name)), - 0); - ret = PTR_ERR_OR_ZERO(iter); + ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash, + inode->v.i_ino, + &X_SEARCH(type, name, strlen(name)), + 0); if (ret) - goto err; + goto err1; - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) - goto err; + goto err2; xattr = bkey_s_c_to_xattr(k); ret = le16_to_cpu(xattr.v->x_val_len); @@ -148,8 +147,9 @@ static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info else memcpy(buffer, xattr_val(xattr.v), ret); } - bch2_trans_iter_put(trans, iter); -err: +err2: + bch2_trans_iter_exit(trans, &iter); +err1: return ret == -ENOENT ? -ENODATA : ret; } @@ -279,7 +279,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) struct bch_fs *c = dentry->d_sb->s_fs_info; struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct btree_trans trans; - struct btree_iter *iter; + struct btree_iter iter; struct bkey_s_c k; struct xattr_buf buf = { .buf = buffer, .len = buffer_size }; u64 inum = dentry->d_inode->i_ino; @@ -301,7 +301,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (ret) break; } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; -- cgit v1.2.3 From 284ae18c1d7aa44232baedf860a004ceb32fea62 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 16 Mar 2021 01:33:39 -0400 Subject: bcachefs: Add subvolume to ei_inode_info Filesystem operations generally operate within a subvolume: at the start of every btree transaction we'll be looking up (and locking) the subvolume to get the current snapshot ID, which we then use for our other btree lookups in BTREE_ITER_FILTER_SNAPSHOTS mode. But inodes don't record what subvolume they're in - they can't, because if they did we'd have to update every single inode within a subvolume when taking a snapshot in order to keep that field up to date. So it needs to be tracked in memory, based on how we got to that inode. Hence this patch adds a subvolume field to ei_inode_info, and switches to iget5() so we can index by it in the inode hash table. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-ioctl.c | 6 ++-- fs/bcachefs/fs.c | 85 ++++++++++++++++++++++++++++++++++++-------------- fs/bcachefs/fs.h | 12 ++++++- 3 files changed, 76 insertions(+), 27 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 6d6368555875..ff6b1739342d 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -192,7 +192,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c, char *kname = NULL; struct qstr qstr; int ret = 0; - u64 inum; + subvol_inum inum = { .subvol = 1 }; kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL); if (!kname) @@ -206,9 +206,9 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c, qstr.name = kname; ret = -ENOENT; - inum = bch2_dirent_lookup(c, src->v.i_ino, &hash, + inum.inum = bch2_dirent_lookup(c, src->v.i_ino, &hash, &qstr); - if (!inum) + if (!inum.inum) goto err1; vinode = bch2_vfs_inode_get(c, inum); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 570ae826ebb5..7a994f3f9d20 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -37,7 +37,7 @@ static struct kmem_cache *bch2_inode_cache; -static void bch2_vfs_inode_init(struct bch_fs *, +static void bch2_vfs_inode_init(struct bch_fs *, subvol_inum, struct bch_inode_info *, struct bch_inode_unpacked *); @@ -209,40 +209,68 @@ int bch2_fs_quota_transfer(struct bch_fs *c, return ret; } -struct inode *bch2_vfs_inode_get(struct bch_fs *c, u64 inum) +static int bch2_iget5_test(struct inode *vinode, void *p) +{ + struct bch_inode_info *inode = to_bch_ei(vinode); + subvol_inum *inum = p; + + return inode->ei_subvol == inum->subvol && + inode->ei_inode.bi_inum == inum->inum; +} + +static int bch2_iget5_set(struct inode *vinode, void *p) +{ + struct bch_inode_info *inode = to_bch_ei(vinode); + subvol_inum *inum = p; + + inode->v.i_ino = inum->inum; + inode->ei_subvol = inum->subvol; + inode->ei_inode.bi_inum = inum->inum; + return 0; +} + +static unsigned bch2_inode_hash(subvol_inum inum) +{ + return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL); +} + +struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) { struct bch_inode_unpacked inode_u; struct bch_inode_info *inode; int ret; - inode = to_bch_ei(iget_locked(c->vfs_sb, inum)); + /* + * debug assert, to be removed when we start creating + * subvolumes/snapshots: + */ + BUG_ON(inum.subvol != BCACHEFS_ROOT_SUBVOL); + + inode = to_bch_ei(iget5_locked(c->vfs_sb, + bch2_inode_hash(inum), + bch2_iget5_test, + bch2_iget5_set, + &inum)); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); if (!(inode->v.i_state & I_NEW)) return &inode->v; - ret = bch2_inode_find_by_inum(c, inum, &inode_u); + ret = bch2_inode_find_by_inum(c, inum.inum, &inode_u); if (ret) { iget_failed(&inode->v); return ERR_PTR(ret); } - bch2_vfs_inode_init(c, inode, &inode_u); + bch2_vfs_inode_init(c, inum, inode, &inode_u); - inode->ei_journal_seq = bch2_inode_journal_seq(&c->journal, inum); + inode->ei_journal_seq = bch2_inode_journal_seq(&c->journal, inum.inum); unlock_new_inode(&inode->v); return &inode->v; } -static int inum_test(struct inode *inode, void *p) -{ - unsigned long *ino = p; - - return *ino == inode->i_ino; -} - static struct bch_inode_info * __bch2_create(struct mnt_idmap *idmap, struct bch_inode_info *dir, struct dentry *dentry, @@ -254,6 +282,7 @@ __bch2_create(struct mnt_idmap *idmap, struct bch_inode_info *inode, *old; struct bch_inode_unpacked inode_u; struct posix_acl *default_acl = NULL, *acl = NULL; + subvol_inum inum; u64 journal_seq = 0; int ret; @@ -310,7 +339,10 @@ err_before_quota: mutex_unlock(&dir->ei_update_lock); } - bch2_vfs_inode_init(c, inode, &inode_u); + inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol; + inum.inum = inode_u.bi_inum; + + bch2_vfs_inode_init(c, inum, inode, &inode_u); journal_seq_copy(c, inode, journal_seq); set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); @@ -323,8 +355,12 @@ err_before_quota: */ inode->v.i_state |= I_CREATING; - old = to_bch_ei(inode_insert5(&inode->v, inode->v.i_ino, - inum_test, NULL, &inode->v.i_ino)); + + old = to_bch_ei(inode_insert5(&inode->v, + bch2_inode_hash(inum), + bch2_iget5_test, + bch2_iget5_set, + &inum)); BUG_ON(!old); if (unlikely(old != inode)) { @@ -370,12 +406,12 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, struct bch_inode_info *dir = to_bch_ei(vdir); struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode); struct inode *vinode = NULL; - u64 inum; + subvol_inum inum = { .subvol = 1 }; - inum = bch2_dirent_lookup(c, dir->v.i_ino, &hash, + inum.inum = bch2_dirent_lookup(c, dir->v.i_ino, &hash, &dentry->d_name); - if (inum) + if (inum.inum) vinode = bch2_vfs_inode_get(c, inum); return d_splice_alias(vinode, dentry); @@ -1098,6 +1134,7 @@ static const struct address_space_operations bch_address_space_operations = { .error_remove_page = generic_error_remove_page, }; +#if 0 static struct inode *bch2_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) { @@ -1131,14 +1168,15 @@ static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *fid, return generic_fh_to_parent(sb, fid, fh_len, fh_type, bch2_nfs_get_inode); } +#endif static const struct export_operations bch_export_ops = { - .fh_to_dentry = bch2_fh_to_dentry, - .fh_to_parent = bch2_fh_to_parent, + //.fh_to_dentry = bch2_fh_to_dentry, + //.fh_to_parent = bch2_fh_to_parent, //.get_parent = bch2_get_parent, }; -static void bch2_vfs_inode_init(struct bch_fs *c, +static void bch2_vfs_inode_init(struct bch_fs *c, subvol_inum inum, struct bch_inode_info *inode, struct bch_inode_unpacked *bi) { @@ -1154,6 +1192,7 @@ static void bch2_vfs_inode_init(struct bch_fs *c, inode->ei_journal_seq = 0; inode->ei_quota_reserved = 0; inode->ei_qid = bch_qid(bi); + inode->ei_subvol = inum.subvol; inode->v.i_mapping->a_ops = &bch_address_space_operations; @@ -1595,7 +1634,7 @@ got_sb: sb->s_flags |= SB_POSIXACL; #endif - vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_INO); + vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); if (IS_ERR(vinode)) { bch_err(c, "error mounting: error getting root inode %i", (int) PTR_ERR(vinode)); diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index c08a828d66cd..6dae425bf616 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -44,10 +44,20 @@ struct bch_inode_info { struct mutex ei_quota_lock; struct bch_qid ei_qid; + u32 ei_subvol; + /* copy of inode in btree: */ struct bch_inode_unpacked ei_inode; }; +static inline subvol_inum inode_inum(struct bch_inode_info *inode) +{ + return (subvol_inum) { + .subvol = inode->ei_subvol, + .inum = inode->ei_inode.bi_inum, + }; +} + /* * Set if we've gotten a btree error for this inode, and thus the vfs inode and * btree inode may be inconsistent: @@ -153,7 +163,7 @@ static inline int bch2_set_projid(struct bch_fs *c, KEY_TYPE_QUOTA_PREALLOC); } -struct inode *bch2_vfs_inode_get(struct bch_fs *, u64); +struct inode *bch2_vfs_inode_get(struct bch_fs *, subvol_inum); /* returns 0 if we want to do the update, or error is passed up */ typedef int (*inode_set_fn)(struct bch_inode_info *, -- cgit v1.2.3 From 6fed42bb7750e217b0d1169ccfccc7639a3e1d3f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 16 Mar 2021 00:28:17 -0400 Subject: bcachefs: Plumb through subvolume id To implement snapshots, we need every filesystem btree operation (every btree operation without a subvolume) to start by looking up the subvolume and getting the current snapshot ID, with bch2_subvolume_get_snapshot() - then, that snapshot ID is used for doing btree lookups in BTREE_ITER_FILTER_SNAPSHOTS mode. This patch adds those bch2_subvolume_get_snapshot() calls, and also switches to passing around a subvol_inum instead of just an inode number. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 25 +++++----- fs/bcachefs/acl.h | 11 ++--- fs/bcachefs/dirent.c | 107 +++++++++++++++++++++++++++------------- fs/bcachefs/dirent.h | 29 +++++------ fs/bcachefs/extents.c | 32 ------------ fs/bcachefs/extents.h | 1 - fs/bcachefs/fs-common.c | 127 ++++++++++++++++++++++++++++++------------------ fs/bcachefs/fs-common.h | 21 ++++---- fs/bcachefs/fs-io.c | 117 +++++++++++++++++++++++++++++++++++++++----- fs/bcachefs/fs-ioctl.c | 8 ++- fs/bcachefs/fs.c | 77 +++++++++++++++++------------ fs/bcachefs/fs.h | 4 ++ fs/bcachefs/fsck.c | 5 +- fs/bcachefs/inode.c | 109 +++++++++++++++++++++++++++++++++-------- fs/bcachefs/inode.h | 7 +-- fs/bcachefs/io.c | 5 +- fs/bcachefs/move.c | 3 +- fs/bcachefs/recovery.c | 5 +- fs/bcachefs/reflink.c | 18 ++++++- fs/bcachefs/reflink.h | 4 +- fs/bcachefs/str_hash.h | 41 +++++++++++----- fs/bcachefs/xattr.c | 23 +++++++-- fs/bcachefs/xattr.h | 3 +- 23 files changed, 526 insertions(+), 256 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 93b78e4e6e0d..2afa15b26700 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -230,7 +230,7 @@ retry: bch2_trans_begin(&trans); ret = bch2_hash_lookup(&trans, &iter, bch2_xattr_hash_desc, - &hash, inode->v.i_ino, + &hash, inode_inum(inode), &X_SEARCH(acl_to_xattr_type(type), "", 0), 0); if (ret) { @@ -260,11 +260,11 @@ out: return acl; } -int bch2_set_acl_trans(struct btree_trans *trans, +int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum, struct bch_inode_unpacked *inode_u, - const struct bch_hash_info *hash_info, struct posix_acl *acl, int type) { + struct bch_hash_info hash_info = bch2_hash_info_init(trans->c, inode_u); int ret; if (type == ACL_TYPE_DEFAULT && @@ -277,14 +277,14 @@ int bch2_set_acl_trans(struct btree_trans *trans, if (IS_ERR(xattr)) return PTR_ERR(xattr); - ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info, - inode_u->bi_inum, &xattr->k_i, 0); + ret = bch2_hash_set(trans, bch2_xattr_hash_desc, &hash_info, + inum, &xattr->k_i, 0); } else { struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0); - ret = bch2_hash_delete(trans, bch2_xattr_hash_desc, hash_info, - inode_u->bi_inum, &search); + ret = bch2_hash_delete(trans, bch2_xattr_hash_desc, &hash_info, + inum, &search); } return ret == -ENOENT ? 0 : ret; @@ -299,7 +299,6 @@ int bch2_set_acl(struct mnt_idmap *idmap, struct btree_trans trans; struct btree_iter inode_iter = { NULL }; struct bch_inode_unpacked inode_u; - struct bch_hash_info hash_info; struct posix_acl *acl; umode_t mode; int ret; @@ -310,7 +309,7 @@ retry: bch2_trans_begin(&trans); acl = _acl; - ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode->v.i_ino, + ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode_inum(inode), BTREE_ITER_INTENT); if (ret) goto btree_err; @@ -323,9 +322,7 @@ retry: goto btree_err; } - hash_info = bch2_hash_info_init(c, &inode_u); - - ret = bch2_set_acl_trans(&trans, &inode_u, &hash_info, acl, type); + ret = bch2_set_acl_trans(&trans, inode_inum(inode), &inode_u, acl, type); if (ret) goto btree_err; @@ -354,7 +351,7 @@ err: return ret; } -int bch2_acl_chmod(struct btree_trans *trans, +int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum, struct bch_inode_unpacked *inode, umode_t mode, struct posix_acl **new_acl) @@ -368,7 +365,7 @@ int bch2_acl_chmod(struct btree_trans *trans, int ret; ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, - &hash_info, inode->bi_inum, + &hash_info, inum, &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), BTREE_ITER_INTENT); if (ret) diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h index f11eb9d4592c..bb21d8d696a2 100644 --- a/fs/bcachefs/acl.h +++ b/fs/bcachefs/acl.h @@ -28,25 +28,24 @@ typedef struct { struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int); -int bch2_set_acl_trans(struct btree_trans *, +int bch2_set_acl_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, - const struct bch_hash_info *, struct posix_acl *, int); int bch2_set_acl(struct mnt_idmap *, struct dentry *, struct posix_acl *, int); -int bch2_acl_chmod(struct btree_trans *, struct bch_inode_unpacked *, +int bch2_acl_chmod(struct btree_trans *, subvol_inum, + struct bch_inode_unpacked *, umode_t, struct posix_acl **); #else -static inline int bch2_set_acl_trans(struct btree_trans *trans, +static inline int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum, struct bch_inode_unpacked *inode_u, - const struct bch_hash_info *hash_info, struct posix_acl *acl, int type) { return 0; } -static inline int bch2_acl_chmod(struct btree_trans *trans, +static inline int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum, struct bch_inode_unpacked *inode, umode_t mode, struct posix_acl **new_acl) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index f3aef0686928..f290580594ce 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -8,6 +8,7 @@ #include "fs.h" #include "keylist.h" #include "str_hash.h" +#include "subvolume.h" #include @@ -150,8 +151,8 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, return dirent; } -int bch2_dirent_create(struct btree_trans *trans, - u64 dir_inum, const struct bch_hash_info *hash_info, +int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir, + const struct bch_hash_info *hash_info, u8 type, const struct qstr *name, u64 dst_inum, u64 *dir_offset, int flags) { @@ -164,7 +165,7 @@ int bch2_dirent_create(struct btree_trans *trans, return ret; ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info, - dir_inum, &dirent->k_i, flags); + dir, &dirent->k_i, flags); *dir_offset = dirent->k.p.offset; return ret; @@ -223,31 +224,40 @@ err: return ret; } -int bch2_dirent_read_target(struct btree_trans *trans, - struct bkey_s_c_dirent d, u64 *target) +static int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, + struct bkey_s_c_dirent d, subvol_inum *target) { - u32 subvol, snapshot; + u32 snapshot; + int ret = 0; - return __bch2_dirent_read_target(trans, d, &subvol, - &snapshot, target, false); + ret = __bch2_dirent_read_target(trans, d, &target->subvol, &snapshot, + &target->inum, false); + if (!target->subvol) + target->subvol = dir.subvol; + + return ret; } int bch2_dirent_rename(struct btree_trans *trans, - u64 src_dir, struct bch_hash_info *src_hash, - u64 dst_dir, struct bch_hash_info *dst_hash, - const struct qstr *src_name, u64 *src_inum, u64 *src_offset, - const struct qstr *dst_name, u64 *dst_inum, u64 *dst_offset, - enum bch_rename_mode mode) + subvol_inum src_dir, struct bch_hash_info *src_hash, + subvol_inum dst_dir, struct bch_hash_info *dst_hash, + const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset, + const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset, + enum bch_rename_mode mode) { struct btree_iter src_iter = { NULL }; struct btree_iter dst_iter = { NULL }; struct bkey_s_c old_src, old_dst; struct bkey_i_dirent *new_src = NULL, *new_dst = NULL; struct bpos dst_pos = - POS(dst_dir, bch2_dirent_hash(dst_hash, dst_name)); + POS(dst_dir.inum, bch2_dirent_hash(dst_hash, dst_name)); int ret = 0; - *src_inum = *dst_inum = 0; + if (src_dir.subvol != dst_dir.subvol) + return -EXDEV; + + memset(src_inum, 0, sizeof(*src_inum)); + memset(dst_inum, 0, sizeof(*dst_inum)); /* * Lookup dst: @@ -270,8 +280,12 @@ int bch2_dirent_rename(struct btree_trans *trans, if (ret) goto out; - if (mode != BCH_RENAME) - *dst_inum = le64_to_cpu(bkey_s_c_to_dirent(old_dst).v->d_inum); + if (mode != BCH_RENAME) { + ret = bch2_dirent_read_target(trans, dst_dir, + bkey_s_c_to_dirent(old_dst), dst_inum); + if (ret) + goto out; + } if (mode != BCH_RENAME_EXCHANGE) *src_offset = dst_iter.pos.offset; @@ -287,7 +301,10 @@ int bch2_dirent_rename(struct btree_trans *trans, if (ret) goto out; - *src_inum = le64_to_cpu(bkey_s_c_to_dirent(old_src).v->d_inum); + ret = bch2_dirent_read_target(trans, src_dir, + bkey_s_c_to_dirent(old_src), src_inum); + if (ret) + goto out; /* Create new dst key: */ new_dst = dirent_create_key(trans, 0, dst_name, 0); @@ -376,17 +393,22 @@ int bch2_dirent_delete_at(struct btree_trans *trans, int __bch2_dirent_lookup_trans(struct btree_trans *trans, struct btree_iter *iter, - u64 dir_inum, + subvol_inum dir, const struct bch_hash_info *hash_info, - const struct qstr *name, u64 *inum, + const struct qstr *name, subvol_inum *inum, unsigned flags) { struct bkey_s_c k; struct bkey_s_c_dirent d; + u32 snapshot; int ret; + ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot); + if (ret) + return ret; + ret = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc, - hash_info, dir_inum, name, flags); + hash_info, dir, name, flags); if (ret) return ret; @@ -399,44 +421,49 @@ int __bch2_dirent_lookup_trans(struct btree_trans *trans, d = bkey_s_c_to_dirent(k); - ret = bch2_dirent_read_target(trans, d, inum); + ret = bch2_dirent_read_target(trans, dir, d, inum); if (ret) bch2_trans_iter_exit(trans, iter); return ret; } -u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum, +u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir, const struct bch_hash_info *hash_info, - const struct qstr *name) + const struct qstr *name, subvol_inum *inum) { struct btree_trans trans; struct btree_iter iter; - u64 inum = 0; - int ret = 0; + int ret; bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); - ret = __bch2_dirent_lookup_trans(&trans, &iter, dir_inum, hash_info, - name, &inum, 0); + + ret = __bch2_dirent_lookup_trans(&trans, &iter, dir, hash_info, + name, inum, 0); bch2_trans_iter_exit(&trans, &iter); if (ret == -EINTR) goto retry; bch2_trans_exit(&trans); - return inum; + return ret; } -int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum) +int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) { struct btree_iter iter; struct bkey_s_c k; + u32 snapshot; int ret; + ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot); + if (ret) + return ret; + for_each_btree_key(trans, iter, BTREE_ID_dirents, - POS(dir_inum, 0), 0, k, ret) { - if (k.k->p.inode > dir_inum) + SPOS(dir.inum, 0, snapshot), 0, k, ret) { + if (k.k->p.inode > dir.inum) break; if (k.k->type == KEY_TYPE_dirent) { @@ -449,19 +476,26 @@ int bch2_empty_dir_trans(struct btree_trans *trans, u64 dir_inum) return ret; } -int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx) +int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) { struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_dirent dirent; + u32 snapshot; int ret; bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); + + ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + if (ret) + goto err; for_each_btree_key(&trans, iter, BTREE_ID_dirents, - POS(inum, ctx->pos), 0, k, ret) { - if (k.k->p.inode > inum) + SPOS(inum.inum, ctx->pos, snapshot), 0, k, ret) { + if (k.k->p.inode > inum.inum) break; if (k.k->type != KEY_TYPE_dirent) @@ -482,6 +516,9 @@ int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx) ctx->pos = dirent.k->p.offset + 1; } bch2_trans_iter_exit(&trans, &iter); +err: + if (ret == -EINTR) + goto retry; ret = bch2_trans_exit(&trans) ?: ret; diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 3cd05a2454e1..88b784a99cb5 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -29,7 +29,7 @@ static inline unsigned dirent_val_u64s(unsigned len) sizeof(u64)); } -int bch2_dirent_create(struct btree_trans *, u64, +int bch2_dirent_create(struct btree_trans *, subvol_inum, const struct bch_hash_info *, u8, const struct qstr *, u64, u64 *, int); @@ -40,9 +40,6 @@ int bch2_dirent_delete_at(struct btree_trans *, int __bch2_dirent_read_target(struct btree_trans *, struct bkey_s_c_dirent, u32 *, u32 *, u64 *, bool); -int bch2_dirent_read_target(struct btree_trans *, - struct bkey_s_c_dirent, u64 *); - static inline unsigned vfs_d_type(unsigned type) { return type == DT_SUBVOL ? DT_DIR : type; @@ -55,20 +52,20 @@ enum bch_rename_mode { }; int bch2_dirent_rename(struct btree_trans *, - u64, struct bch_hash_info *, - u64, struct bch_hash_info *, - const struct qstr *, u64 *, u64 *, - const struct qstr *, u64 *, u64 *, + subvol_inum, struct bch_hash_info *, + subvol_inum, struct bch_hash_info *, + const struct qstr *, subvol_inum *, u64 *, + const struct qstr *, subvol_inum *, u64 *, enum bch_rename_mode); -int __bch2_dirent_lookup_trans(struct btree_trans *, struct btree_iter *, u64, - const struct bch_hash_info *, - const struct qstr *, u64 *, - unsigned); -u64 bch2_dirent_lookup(struct bch_fs *, u64, const struct bch_hash_info *, - const struct qstr *); +int __bch2_dirent_lookup_trans(struct btree_trans *, struct btree_iter *, + subvol_inum, const struct bch_hash_info *, + const struct qstr *, subvol_inum *, unsigned); +u64 bch2_dirent_lookup(struct bch_fs *, subvol_inum, + const struct bch_hash_info *, + const struct qstr *, subvol_inum *); -int bch2_empty_dir_trans(struct btree_trans *, u64); -int bch2_readdir(struct bch_fs *, u64, struct dir_context *); +int bch2_empty_dir_trans(struct btree_trans *, subvol_inum); +int bch2_readdir(struct bch_fs *, subvol_inum, struct dir_context *); #endif /* _BCACHEFS_DIRENT_H */ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 0190605711e5..966d6ef41793 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -611,38 +611,6 @@ bool bch2_bkey_is_incompressible(struct bkey_s_c k) return false; } -bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, - unsigned nr_replicas, bool compressed) -{ - struct btree_trans trans; - struct btree_iter iter; - struct bpos end = pos; - struct bkey_s_c k; - bool ret = true; - int err; - - end.offset += size; - - bch2_trans_init(&trans, c, 0, 0); - - for_each_btree_key(&trans, iter, BTREE_ID_extents, pos, - BTREE_ITER_SLOTS, k, err) { - if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) - break; - - if (nr_replicas > bch2_bkey_replicas(c, k) || - (!compressed && bch2_bkey_sectors_compressed(k))) { - ret = false; - break; - } - } - bch2_trans_iter_exit(&trans, &iter); - - bch2_trans_exit(&trans); - - return ret; -} - unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 43cef0a3bdf3..afd3067bb64e 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -567,7 +567,6 @@ unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c); unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c); bool bch2_bkey_is_incompressible(struct bkey_s_c); unsigned bch2_bkey_sectors_compressed(struct bkey_s_c); -bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned, bool); unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c); unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index 96b09b005d0b..02bf32cc7659 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -6,28 +6,38 @@ #include "dirent.h" #include "fs-common.h" #include "inode.h" +#include "subvolume.h" #include "xattr.h" #include -int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, +int bch2_create_trans(struct btree_trans *trans, + subvol_inum dir, struct bch_inode_unpacked *dir_u, struct bch_inode_unpacked *new_inode, const struct qstr *name, uid_t uid, gid_t gid, umode_t mode, dev_t rdev, struct posix_acl *default_acl, - struct posix_acl *acl) + struct posix_acl *acl, + unsigned flags) { struct bch_fs *c = trans->c; struct btree_iter dir_iter = { NULL }; struct btree_iter inode_iter = { NULL }; - struct bch_hash_info hash = bch2_hash_info_init(c, new_inode); + subvol_inum new_inum = dir; u64 now = bch2_current_time(c); u64 cpu = raw_smp_processor_id(); u64 dir_offset = 0; + u64 dir_target; + u32 snapshot; + unsigned dir_type; int ret; - ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, BTREE_ITER_INTENT); + ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot); + if (ret) + goto err; + + ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT); if (ret) goto err; @@ -36,19 +46,23 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, if (!name) new_inode->bi_flags |= BCH_INODE_UNLINKED; - ret = bch2_inode_create(trans, &inode_iter, new_inode, U32_MAX, cpu); + ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu); if (ret) goto err; + new_inum.inum = new_inode->bi_inum; + dir_target = new_inode->bi_inum; + dir_type = mode_to_type(new_inode->bi_mode); + if (default_acl) { - ret = bch2_set_acl_trans(trans, new_inode, &hash, + ret = bch2_set_acl_trans(trans, new_inum, new_inode, default_acl, ACL_TYPE_DEFAULT); if (ret) goto err; } if (acl) { - ret = bch2_set_acl_trans(trans, new_inode, &hash, + ret = bch2_set_acl_trans(trans, new_inum, new_inode, acl, ACL_TYPE_ACCESS); if (ret) goto err; @@ -56,18 +70,19 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, if (name) { struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u); - dir_u->bi_mtime = dir_u->bi_ctime = now; if (S_ISDIR(new_inode->bi_mode)) dir_u->bi_nlink++; + dir_u->bi_mtime = dir_u->bi_ctime = now; ret = bch2_inode_write(trans, &dir_iter, dir_u); if (ret) goto err; - ret = bch2_dirent_create(trans, dir_inum, &dir_hash, - mode_to_type(new_inode->bi_mode), - name, new_inode->bi_inum, + ret = bch2_dirent_create(trans, dir, &dir_hash, + dir_type, + name, + dir_target, &dir_offset, BCH_HASH_SET_MUST_CREATE); if (ret) @@ -79,9 +94,8 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, new_inode->bi_dir_offset = dir_offset; } - /* XXX use bch2_btree_iter_set_snapshot() */ - inode_iter.snapshot = U32_MAX; - bch2_btree_iter_set_pos(&inode_iter, SPOS(0, new_inode->bi_inum, U32_MAX)); + inode_iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS; + bch2_btree_iter_set_snapshot(&inode_iter, snapshot); ret = bch2_btree_iter_traverse(&inode_iter) ?: bch2_inode_write(trans, &inode_iter, new_inode); @@ -91,9 +105,10 @@ err: return ret; } -int bch2_link_trans(struct btree_trans *trans, u64 dir_inum, - u64 inum, struct bch_inode_unpacked *dir_u, - struct bch_inode_unpacked *inode_u, const struct qstr *name) +int bch2_link_trans(struct btree_trans *trans, + subvol_inum dir, struct bch_inode_unpacked *dir_u, + subvol_inum inum, struct bch_inode_unpacked *inode_u, + const struct qstr *name) { struct bch_fs *c = trans->c; struct btree_iter dir_iter = { NULL }; @@ -103,6 +118,9 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum, u64 dir_offset = 0; int ret; + if (dir.subvol != inum.subvol) + return -EXDEV; + ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT); if (ret) goto err; @@ -110,7 +128,7 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum, inode_u->bi_ctime = now; bch2_inode_nlink_inc(inode_u); - ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, BTREE_ITER_INTENT); + ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT); if (ret) goto err; @@ -118,15 +136,15 @@ int bch2_link_trans(struct btree_trans *trans, u64 dir_inum, dir_hash = bch2_hash_info_init(c, dir_u); - ret = bch2_dirent_create(trans, dir_inum, &dir_hash, + ret = bch2_dirent_create(trans, dir, &dir_hash, mode_to_type(inode_u->bi_mode), - name, inum, &dir_offset, + name, inum.inum, &dir_offset, BCH_HASH_SET_MUST_CREATE); if (ret) goto err; if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) { - inode_u->bi_dir = dir_inum; + inode_u->bi_dir = dir.inum; inode_u->bi_dir_offset = dir_offset; } @@ -139,7 +157,8 @@ err: } int bch2_unlink_trans(struct btree_trans *trans, - u64 dir_inum, struct bch_inode_unpacked *dir_u, + subvol_inum dir, + struct bch_inode_unpacked *dir_u, struct bch_inode_unpacked *inode_u, const struct qstr *name) { @@ -148,39 +167,49 @@ int bch2_unlink_trans(struct btree_trans *trans, struct btree_iter dirent_iter = { NULL }; struct btree_iter inode_iter = { NULL }; struct bch_hash_info dir_hash; - u64 inum, now = bch2_current_time(c); - struct bkey_s_c k; + subvol_inum inum; + u64 now = bch2_current_time(c); int ret; - ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir_inum, BTREE_ITER_INTENT); + ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT); if (ret) goto err; dir_hash = bch2_hash_info_init(c, dir_u); - ret = __bch2_dirent_lookup_trans(trans, &dirent_iter, dir_inum, &dir_hash, + ret = __bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash, name, &inum, BTREE_ITER_INTENT); if (ret) goto err; - ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT); + ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, + BTREE_ITER_INTENT); if (ret) goto err; - if (inode_u->bi_dir == k.k->p.inode && - inode_u->bi_dir_offset == k.k->p.offset) { + if (inode_u->bi_dir == dirent_iter.pos.inode && + inode_u->bi_dir_offset == dirent_iter.pos.offset) { inode_u->bi_dir = 0; inode_u->bi_dir_offset = 0; } + if (S_ISDIR(inode_u->bi_mode)) { + ret = bch2_empty_dir_trans(trans, inum); + if (ret) + goto err; + } + + if (dir.subvol != inum.subvol) { + ret = bch2_subvolume_delete(trans, inum.subvol, false); + if (ret) + goto err; + } + dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now; dir_u->bi_nlink -= S_ISDIR(inode_u->bi_mode); bch2_inode_nlink_dec(inode_u); - ret = (S_ISDIR(inode_u->bi_mode) - ? bch2_empty_dir_trans(trans, inum) - : 0) ?: - bch2_dirent_delete_at(trans, &dir_hash, &dirent_iter) ?: + ret = bch2_dirent_delete_at(trans, &dir_hash, &dirent_iter) ?: bch2_inode_write(trans, &dir_iter, dir_u) ?: bch2_inode_write(trans, &inode_iter, inode_u); err: @@ -215,8 +244,8 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u, } int bch2_rename_trans(struct btree_trans *trans, - u64 src_dir, struct bch_inode_unpacked *src_dir_u, - u64 dst_dir, struct bch_inode_unpacked *dst_dir_u, + subvol_inum src_dir, struct bch_inode_unpacked *src_dir_u, + subvol_inum dst_dir, struct bch_inode_unpacked *dst_dir_u, struct bch_inode_unpacked *src_inode_u, struct bch_inode_unpacked *dst_inode_u, const struct qstr *src_name, @@ -229,7 +258,8 @@ int bch2_rename_trans(struct btree_trans *trans, struct btree_iter src_inode_iter = { NULL }; struct btree_iter dst_inode_iter = { NULL }; struct bch_hash_info src_hash, dst_hash; - u64 src_inode, src_offset, dst_inode, dst_offset; + subvol_inum src_inum, dst_inum; + u64 src_offset, dst_offset; u64 now = bch2_current_time(c); int ret; @@ -240,7 +270,8 @@ int bch2_rename_trans(struct btree_trans *trans, src_hash = bch2_hash_info_init(c, src_dir_u); - if (dst_dir != src_dir) { + if (dst_dir.inum != src_dir.inum || + dst_dir.subvol != src_dir.subvol) { ret = bch2_inode_peek(trans, &dst_dir_iter, dst_dir_u, dst_dir, BTREE_ITER_INTENT); if (ret) @@ -255,19 +286,19 @@ int bch2_rename_trans(struct btree_trans *trans, ret = bch2_dirent_rename(trans, src_dir, &src_hash, dst_dir, &dst_hash, - src_name, &src_inode, &src_offset, - dst_name, &dst_inode, &dst_offset, + src_name, &src_inum, &src_offset, + dst_name, &dst_inum, &dst_offset, mode); if (ret) goto err; - ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inode, + ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inum, BTREE_ITER_INTENT); if (ret) goto err; - if (dst_inode) { - ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inode, + if (dst_inum.inum) { + ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inum, BTREE_ITER_INTENT); if (ret) goto err; @@ -298,7 +329,7 @@ int bch2_rename_trans(struct btree_trans *trans, } if (S_ISDIR(dst_inode_u->bi_mode) && - bch2_empty_dir_trans(trans, dst_inode)) { + bch2_empty_dir_trans(trans, dst_inum)) { ret = -ENOTEMPTY; goto err; } @@ -322,7 +353,7 @@ int bch2_rename_trans(struct btree_trans *trans, dst_dir_u->bi_nlink++; } - if (dst_inode && S_ISDIR(dst_inode_u->bi_mode)) { + if (dst_inum.inum && S_ISDIR(dst_inode_u->bi_mode)) { dst_dir_u->bi_nlink--; src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE; } @@ -333,22 +364,22 @@ int bch2_rename_trans(struct btree_trans *trans, src_dir_u->bi_mtime = now; src_dir_u->bi_ctime = now; - if (src_dir != dst_dir) { + if (src_dir.inum != dst_dir.inum) { dst_dir_u->bi_mtime = now; dst_dir_u->bi_ctime = now; } src_inode_u->bi_ctime = now; - if (dst_inode) + if (dst_inum.inum) dst_inode_u->bi_ctime = now; ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?: - (src_dir != dst_dir + (src_dir.inum != dst_dir.inum ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u) : 0 ) ?: bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?: - (dst_inode + (dst_inum.inum ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u) : 0 ); err: diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h index 2273b7961c9b..1bb2ac4dc13a 100644 --- a/fs/bcachefs/fs-common.h +++ b/fs/bcachefs/fs-common.h @@ -4,27 +4,30 @@ struct posix_acl; -int bch2_create_trans(struct btree_trans *, u64, +#define BCH_CREATE_TMPFILE (1U << 0) + +int bch2_create_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, struct bch_inode_unpacked *, const struct qstr *, uid_t, gid_t, umode_t, dev_t, struct posix_acl *, - struct posix_acl *); + struct posix_acl *, + unsigned); -int bch2_link_trans(struct btree_trans *, u64, - u64, struct bch_inode_unpacked *, - struct bch_inode_unpacked *, +int bch2_link_trans(struct btree_trans *, + subvol_inum, struct bch_inode_unpacked *, + subvol_inum, struct bch_inode_unpacked *, const struct qstr *); -int bch2_unlink_trans(struct btree_trans *, - u64, struct bch_inode_unpacked *, +int bch2_unlink_trans(struct btree_trans *, subvol_inum, + struct bch_inode_unpacked *, struct bch_inode_unpacked *, const struct qstr *); int bch2_rename_trans(struct btree_trans *, - u64, struct bch_inode_unpacked *, - u64, struct bch_inode_unpacked *, + subvol_inum, struct bch_inode_unpacked *, + subvol_inum, struct bch_inode_unpacked *, struct bch_inode_unpacked *, struct bch_inode_unpacked *, const struct qstr *, diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 909db2f104cd..7a0772195182 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -1790,6 +1790,49 @@ ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) /* O_DIRECT writes */ +static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, + u64 offset, u64 size, + unsigned nr_replicas, bool compressed) +{ + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + u64 end = offset + size; + u32 snapshot; + bool ret = true; + int err; + + bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); + + err = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + if (err) + goto err; + + for_each_btree_key(&trans, iter, BTREE_ID_extents, + SPOS(inum.inum, offset, snapshot), + BTREE_ITER_SLOTS, k, err) { + if (bkey_cmp(bkey_start_pos(k.k), POS(inum.inum, end)) >= 0) + break; + + if (nr_replicas > bch2_bkey_replicas(c, k) || + (!compressed && bch2_bkey_sectors_compressed(k))) { + ret = false; + break; + } + } + + offset = iter.pos.offset; + bch2_trans_iter_exit(&trans, &iter); +err: + if (err == -EINTR) + goto retry; + bch2_trans_exit(&trans); + + return err ? false : ret; +} + /* * We're going to return -EIOCBQUEUED, but we haven't finished consuming the * iov_iter yet, so we need to stash a copy of the iovec: it might be on the @@ -1911,8 +1954,8 @@ static long bch2_dio_write_loop(struct dio_write *dio) ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio), dio->op.opts.data_replicas, 0); if (unlikely(ret) && - !bch2_check_range_allocated(c, dio->op.pos, - bio_sectors(bio), + !bch2_check_range_allocated(c, inode_inum(inode), + dio->op.pos.offset, bio_sectors(bio), dio->op.opts.data_replicas, dio->op.opts.compression != 0)) goto err; @@ -2141,9 +2184,9 @@ out: /* truncate: */ -static inline int range_has_data(struct bch_fs *c, - struct bpos start, - struct bpos end) +static inline int range_has_data(struct bch_fs *c, u32 subvol, + struct bpos start, + struct bpos end) { struct btree_trans trans; struct btree_iter iter; @@ -2151,6 +2194,12 @@ static inline int range_has_data(struct bch_fs *c, int ret = 0; bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); + + ret = bch2_subvolume_get_snapshot(&trans, subvol, &start.snapshot); + if (ret) + goto err; for_each_btree_key(&trans, iter, BTREE_ID_extents, start, 0, k, ret) { if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) @@ -2161,7 +2210,11 @@ static inline int range_has_data(struct bch_fs *c, break; } } + start = iter.pos; bch2_trans_iter_exit(&trans, &iter); +err: + if (ret == -EINTR) + goto retry; return bch2_trans_exit(&trans) ?: ret; } @@ -2193,7 +2246,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode, * XXX: we're doing two index lookups when we end up reading the * page */ - ret = range_has_data(c, + ret = range_has_data(c, inode->ei_subvol, POS(inode->v.i_ino, index << PAGE_SECTOR_SHIFT), POS(inode->v.i_ino, (index + 1) << PAGE_SECTOR_SHIFT)); if (ret <= 0) @@ -2327,7 +2380,7 @@ int bch2_truncate(struct mnt_idmap *idmap, inode_dio_wait(&inode->v); bch2_pagecache_block_get(&inode->ei_pagecache_lock); - ret = bch2_inode_find_by_inum(c, inode->v.i_ino, &inode_u); + ret = bch2_inode_find_by_inum(c, inode_inum(inode), &inode_u); if (ret) goto err; @@ -2551,6 +2604,18 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, struct bpos move_pos = POS(inode->v.i_ino, offset >> 9); struct bpos atomic_end; unsigned trigger_flags = 0; + u32 snapshot; + + bch2_trans_begin(&trans); + + ret = bch2_subvolume_get_snapshot(&trans, + inode->ei_subvol, &snapshot); + if (ret) + continue; + + bch2_btree_iter_set_snapshot(&src, snapshot); + bch2_btree_iter_set_snapshot(&dst, snapshot); + bch2_btree_iter_set_snapshot(&del, snapshot); bch2_trans_begin(&trans); @@ -2671,9 +2736,17 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, struct bkey_i_reservation reservation; struct bkey_s_c k; unsigned sectors; + u32 snapshot; bch2_trans_begin(&trans); + ret = bch2_subvolume_get_snapshot(&trans, + inode->ei_subvol, &snapshot); + if (ret) + goto bkey_err; + + bch2_btree_iter_set_snapshot(&iter, snapshot); + k = bch2_btree_iter_peek_slot(&iter); if ((ret = bkey_err(k))) goto bkey_err; @@ -2918,8 +2991,8 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, mark_range_unallocated(src, pos_src, pos_src + aligned_len); ret = bch2_remap_range(c, - POS(dst->v.i_ino, pos_dst >> 9), - POS(src->v.i_ino, pos_src >> 9), + inode_inum(dst), pos_dst >> 9, + inode_inum(src), pos_src >> 9, aligned_len >> 9, &dst->ei_journal_seq, pos_dst + len, &i_sectors_delta); @@ -3012,7 +3085,9 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; + subvol_inum inum = inode_inum(inode); u64 isize, next_data = MAX_LFS_FILESIZE; + u32 snapshot; int ret; isize = i_size_read(&inode->v); @@ -3020,9 +3095,15 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) return -ENXIO; bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); + + ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + if (ret) + goto err; for_each_btree_key(&trans, iter, BTREE_ID_extents, - POS(inode->v.i_ino, offset >> 9), 0, k, ret) { + SPOS(inode->v.i_ino, offset >> 9, snapshot), 0, k, ret) { if (k.k->p.inode != inode->v.i_ino) { break; } else if (bkey_extent_is_data(k.k)) { @@ -3032,6 +3113,9 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) break; } bch2_trans_iter_exit(&trans, &iter); +err: + if (ret == -EINTR) + goto retry; ret = bch2_trans_exit(&trans) ?: ret; if (ret) @@ -3108,7 +3192,9 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; + subvol_inum inum = inode_inum(inode); u64 isize, next_hole = MAX_LFS_FILESIZE; + u32 snapshot; int ret; isize = i_size_read(&inode->v); @@ -3116,9 +3202,15 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) return -ENXIO; bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); + + ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + if (ret) + goto err; for_each_btree_key(&trans, iter, BTREE_ID_extents, - POS(inode->v.i_ino, offset >> 9), + SPOS(inode->v.i_ino, offset >> 9, snapshot), BTREE_ITER_SLOTS, k, ret) { if (k.k->p.inode != inode->v.i_ino) { next_hole = bch2_seek_pagecache_hole(&inode->v, @@ -3136,6 +3228,9 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) } } bch2_trans_iter_exit(&trans, &iter); +err: + if (ret == -EINTR) + goto retry; ret = bch2_trans_exit(&trans) ?: ret; if (ret) diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index ff6b1739342d..91f52ab9b4e2 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -192,7 +192,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c, char *kname = NULL; struct qstr qstr; int ret = 0; - subvol_inum inum = { .subvol = 1 }; + subvol_inum inum; kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL); if (!kname) @@ -205,10 +205,8 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c, qstr.len = ret; qstr.name = kname; - ret = -ENOENT; - inum.inum = bch2_dirent_lookup(c, src->v.i_ino, &hash, - &qstr); - if (!inum.inum) + ret = bch2_dirent_lookup(c, inode_inum(src), &hash, &qstr, &inum); + if (ret) goto err1; vinode = bch2_vfs_inode_get(c, inum); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 7a994f3f9d20..0d47d9d5737b 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -150,7 +150,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, retry: bch2_trans_begin(&trans); - ret = bch2_inode_peek(&trans, &iter, &inode_u, inode->v.i_ino, + ret = bch2_inode_peek(&trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_INTENT) ?: (set ? set(inode, &inode_u, p) : 0) ?: bch2_inode_write(&trans, &iter, &inode_u) ?: @@ -256,7 +256,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) if (!(inode->v.i_state & I_NEW)) return &inode->v; - ret = bch2_inode_find_by_inum(c, inum.inum, &inode_u); + ret = bch2_inode_find_by_inum(c, inum, &inode_u); if (ret) { iget_failed(&inode->v); return ERR_PTR(ret); @@ -271,10 +271,10 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) return &inode->v; } -static struct bch_inode_info * +struct bch_inode_info * __bch2_create(struct mnt_idmap *idmap, struct bch_inode_info *dir, struct dentry *dentry, - umode_t mode, dev_t rdev, bool tmpfile) + umode_t mode, dev_t rdev, unsigned flags) { struct bch_fs *c = dir->v.i_sb->s_fs_info; struct btree_trans trans; @@ -303,20 +303,23 @@ __bch2_create(struct mnt_idmap *idmap, bch2_inode_init_early(c, &inode_u); - if (!tmpfile) + if (!(flags & BCH_CREATE_TMPFILE)) mutex_lock(&dir->ei_update_lock); bch2_trans_init(&trans, c, 8, - 2048 + (!tmpfile ? dentry->d_name.len : 0)); + 2048 + (!(flags & BCH_CREATE_TMPFILE) + ? dentry->d_name.len : 0)); retry: bch2_trans_begin(&trans); - ret = bch2_create_trans(&trans, dir->v.i_ino, &dir_u, &inode_u, - !tmpfile ? &dentry->d_name : NULL, + ret = bch2_create_trans(&trans, + inode_inum(dir), &dir_u, &inode_u, + !(flags & BCH_CREATE_TMPFILE) + ? &dentry->d_name : NULL, from_kuid(i_user_ns(&dir->v), current_fsuid()), from_kgid(i_user_ns(&dir->v), current_fsgid()), mode, rdev, - default_acl, acl) ?: + default_acl, acl, flags) ?: bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, KEY_TYPE_QUOTA_PREALLOC); if (unlikely(ret)) @@ -332,7 +335,7 @@ err_before_quota: goto err_trans; } - if (!tmpfile) { + if (!(flags & BCH_CREATE_TMPFILE)) { bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); journal_seq_copy(c, dir, journal_seq); @@ -387,7 +390,7 @@ err: posix_acl_release(acl); return inode; err_trans: - if (!tmpfile) + if (!(flags & BCH_CREATE_TMPFILE)) mutex_unlock(&dir->ei_update_lock); bch2_trans_exit(&trans); @@ -407,11 +410,12 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode); struct inode *vinode = NULL; subvol_inum inum = { .subvol = 1 }; + int ret; - inum.inum = bch2_dirent_lookup(c, dir->v.i_ino, &hash, - &dentry->d_name); + ret = bch2_dirent_lookup(c, inode_inum(dir), &hash, + &dentry->d_name, &inum); - if (inum.inum) + if (!ret) vinode = bch2_vfs_inode_get(c, inum); return d_splice_alias(vinode, dentry); @@ -422,7 +426,7 @@ static int bch2_mknod(struct mnt_idmap *idmap, umode_t mode, dev_t rdev) { struct bch_inode_info *inode = - __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev, false); + __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev, 0); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -452,8 +456,8 @@ static int __bch2_link(struct bch_fs *c, ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, 0, bch2_link_trans(&trans, - dir->v.i_ino, - inode->v.i_ino, &dir_u, &inode_u, + inode_inum(dir), &dir_u, + inode_inum(inode), &inode_u, &dentry->d_name)); if (likely(!ret)) { @@ -504,7 +508,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) ret = __bch2_trans_do(&trans, NULL, &dir->ei_journal_seq, BTREE_INSERT_NOFAIL, bch2_unlink_trans(&trans, - dir->v.i_ino, &dir_u, + inode_inum(dir), &dir_u, &inode_u, &dentry->d_name)); if (likely(!ret)) { @@ -531,7 +535,8 @@ static int bch2_symlink(struct mnt_idmap *idmap, struct bch_inode_info *dir = to_bch_ei(vdir), *inode; int ret; - inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0, true); + inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0, + BCH_CREATE_TMPFILE); if (unlikely(IS_ERR(inode))) return PTR_ERR(inode); @@ -624,8 +629,8 @@ static int bch2_rename2(struct mnt_idmap *idmap, ret = __bch2_trans_do(&trans, NULL, &journal_seq, 0, bch2_rename_trans(&trans, - src_dir->v.i_ino, &src_dir_u, - dst_dir->v.i_ino, &dst_dir_u, + inode_inum(src_dir), &src_dir_u, + inode_inum(dst_dir), &dst_dir_u, &src_inode_u, &dst_inode_u, &src_dentry->d_name, @@ -748,7 +753,7 @@ retry: kfree(acl); acl = NULL; - ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode->v.i_ino, + ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode_inum(inode), BTREE_ITER_INTENT); if (ret) goto btree_err; @@ -756,7 +761,8 @@ retry: bch2_setattr_copy(idmap, inode, &inode_u, attr); if (attr->ia_valid & ATTR_MODE) { - ret = bch2_acl_chmod(&trans, &inode_u, inode_u.bi_mode, &acl); + ret = bch2_acl_chmod(&trans, inode_inum(inode), &inode_u, + inode_u.bi_mode, &acl); if (ret) goto btree_err; } @@ -848,7 +854,8 @@ static int bch2_tmpfile(struct mnt_idmap *idmap, { struct bch_inode_info *inode = __bch2_create(idmap, to_bch_ei(vdir), - file->f_path.dentry, mode, 0, true); + file->f_path.dentry, mode, 0, + BCH_CREATE_TMPFILE); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -923,6 +930,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); unsigned offset_into_extent, sectors; bool have_extent = false; + u32 snapshot; int ret = 0; ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC); @@ -932,15 +940,21 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (start + len < start) return -EINVAL; + start >>= 9; + bch2_bkey_buf_init(&cur); bch2_bkey_buf_init(&prev); bch2_trans_init(&trans, c, 0, 0); - - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, - POS(ei->v.i_ino, start >> 9), 0); retry: bch2_trans_begin(&trans); + ret = bch2_subvolume_get_snapshot(&trans, ei->ei_subvol, &snapshot); + if (ret) + goto err; + + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + SPOS(ei->v.i_ino, start, snapshot), 0); + while ((k = bch2_btree_iter_peek(&iter)).k && !(ret = bkey_err(k)) && bkey_cmp(iter.pos, end) < 0) { @@ -989,7 +1003,9 @@ retry: bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, iter.pos.offset + sectors)); } - + start = iter.pos.offset; + bch2_trans_iter_exit(&trans, &iter); +err: if (ret == -EINTR) goto retry; @@ -997,7 +1013,6 @@ retry: ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), FIEMAP_EXTENT_LAST); - bch2_trans_iter_exit(&trans, &iter); ret = bch2_trans_exit(&trans) ?: ret; bch2_bkey_buf_exit(&cur, c); bch2_bkey_buf_exit(&prev, c); @@ -1034,7 +1049,7 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit_dots(file, ctx)) return 0; - return bch2_readdir(c, inode->v.i_ino, ctx); + return bch2_readdir(c, inode_inum(inode), ctx); } static const struct file_operations bch_file_operations = { @@ -1290,7 +1305,7 @@ static void bch2_evict_inode(struct inode *vinode) KEY_TYPE_QUOTA_WARN); bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, KEY_TYPE_QUOTA_WARN); - bch2_inode_rm(c, inode->v.i_ino, true); + bch2_inode_rm(c, inode_inum(inode), true); } } diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 6dae425bf616..aa755987b36c 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -144,6 +144,10 @@ struct bch_inode_unpacked; #ifndef NO_BCACHEFS_FS +struct bch_inode_info * +__bch2_create(struct mnt_idmap *, struct bch_inode_info *, + struct dentry *, umode_t, dev_t, unsigned); + int bch2_fs_quota_transfer(struct bch_fs *, struct bch_inode_info *, struct bch_qid, diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index e4ca05aae76c..40b107715cdd 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -858,7 +858,10 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, d = bkey_s_c_to_dirent(k); d_inum = le64_to_cpu(d.v->d_inum); - ret = bch2_dirent_read_target(trans, d, &d_inum); + ret = __bch2_dirent_read_target(&trans, d, + &target_subvol, + &target_snapshot, + &target_inum); if (ret && ret != -ENOENT) return ret; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 3b19dc6b9ddc..7fccf842a46b 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -6,6 +6,7 @@ #include "btree_update.h" #include "error.h" #include "extents.h" +#include "extent_update.h" #include "inode.h" #include "str_hash.h" #include "subvolume.h" @@ -296,15 +297,21 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode, int bch2_inode_peek(struct btree_trans *trans, struct btree_iter *iter, struct bch_inode_unpacked *inode, - u64 inum, unsigned flags) + subvol_inum inum, unsigned flags) { struct bkey_s_c k; + u32 snapshot; int ret; if (trans->c->opts.inodes_use_key_cache) flags |= BTREE_ITER_CACHED; - bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, inum), flags); + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + if (ret) + return ret; + + bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, + SPOS(0, inum.inum, snapshot), flags); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) @@ -486,6 +493,9 @@ static inline u32 bkey_generation(struct bkey_s_c k) } } +/* + * This just finds an empty slot: + */ int bch2_inode_create(struct btree_trans *trans, struct btree_iter *iter, struct bch_inode_unpacked *inode_u, @@ -585,16 +595,74 @@ found_slot: return 0; } -int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached) +static int bch2_inode_delete_keys(struct btree_trans *trans, + subvol_inum inum, enum btree_id id) +{ + u64 offset = 0; + int ret = 0; + + while (!ret || ret == -EINTR) { + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_i delete; + u32 snapshot; + + bch2_trans_begin(trans); + + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + if (ret) + continue; + + bch2_trans_iter_init(trans, &iter, id, + SPOS(inum.inum, offset, snapshot), + BTREE_ITER_INTENT); + k = bch2_btree_iter_peek(&iter); + + if (!k.k || iter.pos.inode != inum.inum) { + bch2_trans_iter_exit(trans, &iter); + break; + } + + ret = bkey_err(k); + if (ret) + goto err; + + bkey_init(&delete.k); + delete.k.p = iter.pos; + + if (btree_node_type_is_extents(iter.btree_id)) { + unsigned max_sectors = + min_t(u64, U64_MAX - iter.pos.offset, + KEY_SIZE_MAX & (~0 << trans->c->block_bits)); + + /* create the biggest key we can */ + bch2_key_resize(&delete.k, max_sectors); + + ret = bch2_extent_trim_atomic(trans, &iter, &delete); + if (ret) + goto err; + } + + ret = bch2_trans_update(trans, &iter, &delete, 0) ?: + bch2_trans_commit(trans, NULL, NULL, + BTREE_INSERT_NOFAIL); +err: + offset = iter.pos.offset; + bch2_trans_iter_exit(trans, &iter); + } + + return ret; +} + +int bch2_inode_rm(struct bch_fs *c, subvol_inum inum, bool cached) { struct btree_trans trans; struct btree_iter iter = { NULL }; struct bkey_i_inode_generation delete; - struct bpos start = POS(inode_nr, 0); - struct bpos end = POS(inode_nr + 1, 0); struct bch_inode_unpacked inode_u; struct bkey_s_c k; unsigned iter_flags = BTREE_ITER_INTENT; + u32 snapshot; int ret; if (cached && c->opts.inodes_use_key_cache) @@ -610,19 +678,20 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr, bool cached) * XXX: the dirent could ideally would delete whiteouts when they're no * longer needed */ - ret = bch2_btree_delete_range_trans(&trans, BTREE_ID_extents, - start, end, NULL) ?: - bch2_btree_delete_range_trans(&trans, BTREE_ID_xattrs, - start, end, NULL) ?: - bch2_btree_delete_range_trans(&trans, BTREE_ID_dirents, - start, end, NULL); + ret = bch2_inode_delete_keys(&trans, inum, BTREE_ID_extents) ?: + bch2_inode_delete_keys(&trans, inum, BTREE_ID_xattrs) ?: + bch2_inode_delete_keys(&trans, inum, BTREE_ID_dirents); if (ret) goto err; retry: bch2_trans_begin(&trans); + ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + if (ret) + goto err; + bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes, - POS(0, inode_nr), iter_flags); + SPOS(0, inum.inum, snapshot), iter_flags); k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); @@ -632,7 +701,7 @@ retry: if (k.k->type != KEY_TYPE_inode) { bch2_fs_inconsistent(trans.c, "inode %llu not found when deleting", - inode_nr); + inum.inum); ret = -EIO; goto err; } @@ -662,20 +731,22 @@ err: return ret; } -static int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr, +static int bch2_inode_find_by_inum_trans(struct btree_trans *trans, + subvol_inum inum, struct bch_inode_unpacked *inode) { - struct btree_iter iter = { NULL }; + struct btree_iter iter; int ret; - ret = bch2_inode_peek(trans, &iter, inode, inode_nr, 0); - bch2_trans_iter_exit(trans, &iter); + ret = bch2_inode_peek(trans, &iter, inode, inum, 0); + if (!ret) + bch2_trans_iter_exit(trans, &iter); return ret; } -int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, +int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum, struct bch_inode_unpacked *inode) { return bch2_trans_do(c, NULL, NULL, 0, - bch2_inode_find_by_inum_trans(&trans, inode_nr, inode)); + bch2_inode_find_by_inum_trans(&trans, inum, inode)); } diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 25bef104ebcc..9e84cddcc6cb 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -58,7 +58,7 @@ int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *); void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *); int bch2_inode_peek(struct btree_trans *, struct btree_iter *, - struct bch_inode_unpacked *, u64, unsigned); + struct bch_inode_unpacked *, subvol_inum, unsigned); int bch2_inode_write(struct btree_trans *, struct btree_iter *, struct bch_inode_unpacked *); @@ -74,9 +74,10 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *, int bch2_inode_create(struct btree_trans *, struct btree_iter *, struct bch_inode_unpacked *, u32, u64); -int bch2_inode_rm(struct bch_fs *, u64, bool); +int bch2_inode_rm(struct bch_fs *, subvol_inum, bool); -int bch2_inode_find_by_inum(struct bch_fs *, u64, struct bch_inode_unpacked *); +int bch2_inode_find_by_inum(struct bch_fs *, subvol_inum, + struct bch_inode_unpacked *); static inline struct bch_io_opts bch2_inode_opts_get(struct bch_inode_unpacked *inode) { diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index f95ceb820faa..0f5e0099b848 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -325,7 +325,10 @@ int bch2_extent_update(struct btree_trans *trans, struct bch_inode_unpacked inode_u; ret = bch2_inode_peek(trans, &inode_iter, &inode_u, - k->k.p.inode, BTREE_ITER_INTENT); + (subvol_inum) { + .subvol = BCACHEFS_ROOT_SUBVOL, + .inum = k->k.p.inode, + }, BTREE_ITER_INTENT); if (ret) return ret; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index eb2b91f7e682..9dc6684139de 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -581,7 +581,8 @@ static int __bch2_move_data(struct bch_fs *c, stats->pos = start; bch2_trans_iter_init(&trans, &iter, btree_id, start, - BTREE_ITER_PREFETCH); + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS); if (rate) bch2_ratelimit_reset(rate); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 2aab57cf09e1..47c8fecc6839 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1480,11 +1480,12 @@ int bch2_fs_initialize(struct bch_fs *c) err = "error creating lost+found"; ret = bch2_trans_do(c, NULL, NULL, 0, - bch2_create_trans(&trans, BCACHEFS_ROOT_INO, + bch2_create_trans(&trans, + BCACHEFS_ROOT_SUBVOL_INUM, &root_inode, &lostfound_inode, &lostfound, 0, 0, S_IFDIR|0700, 0, - NULL, NULL)); + NULL, NULL, 0)); if (ret) { bch_err(c, "error creating lost+found"); goto err; diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 576cfbccf5b5..be4b47bc7438 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -7,6 +7,7 @@ #include "inode.h" #include "io.h" #include "reflink.h" +#include "subvolume.h" #include @@ -197,7 +198,8 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) } s64 bch2_remap_range(struct bch_fs *c, - struct bpos dst_start, struct bpos src_start, + subvol_inum dst_inum, u64 dst_offset, + subvol_inum src_inum, u64 src_offset, u64 remap_sectors, u64 *journal_seq, u64 new_i_size, s64 *i_sectors_delta) { @@ -205,6 +207,8 @@ s64 bch2_remap_range(struct bch_fs *c, struct btree_iter dst_iter, src_iter; struct bkey_s_c src_k; struct bkey_buf new_dst, new_src; + struct bpos dst_start = POS(dst_inum.inum, dst_offset); + struct bpos src_start = POS(src_inum.inum, src_offset); struct bpos dst_end = dst_start, src_end = src_start; struct bpos src_want; u64 dst_done; @@ -238,6 +242,16 @@ s64 bch2_remap_range(struct bch_fs *c, break; } + ret = bch2_subvolume_get_snapshot(&trans, src_inum.subvol, + &src_iter.snapshot); + if (ret) + continue; + + ret = bch2_subvolume_get_snapshot(&trans, dst_inum.subvol, + &dst_iter.snapshot); + if (ret) + continue; + dst_done = dst_iter.pos.offset - dst_start.offset; src_want = POS(src_start.inode, src_start.offset + dst_done); bch2_btree_iter_set_pos(&src_iter, src_want); @@ -311,7 +325,7 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_trans_begin(&trans); ret2 = bch2_inode_peek(&trans, &inode_iter, &inode_u, - dst_start.inode, BTREE_ITER_INTENT); + dst_inum, BTREE_ITER_INTENT); if (!ret2 && inode_u.bi_size < new_i_size) { diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index 68c5cb5a2780..4c1b82860b0b 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -57,7 +57,7 @@ static inline __le64 *bkey_refcount(struct bkey_i *k) } } -s64 bch2_remap_range(struct bch_fs *, struct bpos, struct bpos, - u64, u64 *, u64, s64 *); +s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64, + subvol_inum, u64, u64, u64 *, u64, s64 *); #endif /* _BCACHEFS_REFLINK_H */ diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index c6a132b3c5bb..6418089531ad 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -8,6 +8,7 @@ #include "error.h" #include "inode.h" #include "siphash.h" +#include "subvolume.h" #include "super.h" #include @@ -144,16 +145,21 @@ bch2_hash_lookup(struct btree_trans *trans, struct btree_iter *iter, const struct bch_hash_desc desc, const struct bch_hash_info *info, - u64 inode, const void *key, + subvol_inum inum, const void *key, unsigned flags) { struct bkey_s_c k; + u32 snapshot; int ret; + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + if (ret) + return ret; + for_each_btree_key(trans, *iter, desc.btree_id, - POS(inode, desc.hash_key(info, key)), + SPOS(inum.inum, desc.hash_key(info, key), snapshot), BTREE_ITER_SLOTS|flags, k, ret) { - if (iter->pos.inode != inode) + if (iter->pos.inode != inum.inum) break; if (k.k->type == desc.key_type) { @@ -176,15 +182,20 @@ bch2_hash_hole(struct btree_trans *trans, struct btree_iter *iter, const struct bch_hash_desc desc, const struct bch_hash_info *info, - u64 inode, const void *key) + subvol_inum inum, const void *key) { struct bkey_s_c k; + u32 snapshot; int ret; + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + if (ret) + return ret; + for_each_btree_key(trans, *iter, desc.btree_id, - POS(inode, desc.hash_key(info, key)), + SPOS(inum.inum, desc.hash_key(info, key), snapshot), BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { - if (iter->pos.inode != inode) + if (iter->pos.inode != inum.inum) break; if (k.k->type != desc.key_type) @@ -229,17 +240,25 @@ static __always_inline int bch2_hash_set(struct btree_trans *trans, const struct bch_hash_desc desc, const struct bch_hash_info *info, - u64 inode, struct bkey_i *insert, int flags) + subvol_inum inum, + struct bkey_i *insert, int flags) { struct btree_iter iter, slot = { NULL }; struct bkey_s_c k; bool found = false; + u32 snapshot; int ret; + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + if (ret) + return ret; + for_each_btree_key(trans, iter, desc.btree_id, - POS(inode, desc.hash_bkey(info, bkey_i_to_s_c(insert))), + SPOS(inum.inum, + desc.hash_bkey(info, bkey_i_to_s_c(insert)), + snapshot), BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { - if (iter.pos.inode != inode) + if (iter.pos.inode != inum.inum) break; if (k.k->type == desc.key_type) { @@ -313,12 +332,12 @@ static __always_inline int bch2_hash_delete(struct btree_trans *trans, const struct bch_hash_desc desc, const struct bch_hash_info *info, - u64 inode, const void *key) + subvol_inum inum, const void *key) { struct btree_iter iter; int ret; - ret = bch2_hash_lookup(trans, &iter, desc, info, inode, key, + ret = bch2_hash_lookup(trans, &iter, desc, info, inum, key, BTREE_ITER_INTENT); if (ret) return ret; diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index babbfaadeb3f..ff81a25698ff 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -128,7 +128,7 @@ static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info int ret; ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash, - inode->v.i_ino, + inode_inum(inode), &X_SEARCH(type, name, strlen(name)), 0); if (ret) @@ -160,7 +160,7 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, bch2_xattr_get_trans(&trans, inode, name, buffer, size, type)); } -int bch2_xattr_set(struct btree_trans *trans, u64 inum, +int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, const struct bch_hash_info *hash_info, const char *name, const void *value, size_t size, int type, int flags) @@ -282,13 +282,21 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) struct btree_iter iter; struct bkey_s_c k; struct xattr_buf buf = { .buf = buffer, .len = buffer_size }; - u64 inum = dentry->d_inode->i_ino; + u64 offset = 0, inum = inode->ei_inode.bi_inum; + u32 snapshot; int ret; bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); + iter = (struct btree_iter) { NULL }; + + ret = bch2_subvolume_get_snapshot(&trans, inode->ei_subvol, &snapshot); + if (ret) + goto err; for_each_btree_key(&trans, iter, BTREE_ID_xattrs, - POS(inum, 0), 0, k, ret) { + SPOS(inum, offset, snapshot), 0, k, ret) { BUG_ON(k.k->p.inode < inum); if (k.k->p.inode > inum) @@ -301,7 +309,12 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (ret) break; } + + offset = iter.pos.offset; bch2_trans_iter_exit(&trans, &iter); +err: + if (ret == -EINTR) + goto retry; ret = bch2_trans_exit(&trans) ?: ret; @@ -340,7 +353,7 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler, struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0, - bch2_xattr_set(&trans, inode->v.i_ino, &hash, + bch2_xattr_set(&trans, inode_inum(inode), &hash, name, value, size, handler->flags, flags)); } diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index 4151065ab853..f4f896545e1c 100644 --- a/fs/bcachefs/xattr.h +++ b/fs/bcachefs/xattr.h @@ -39,7 +39,8 @@ struct bch_inode_info; int bch2_xattr_get(struct bch_fs *, struct bch_inode_info *, const char *, void *, size_t, int); -int bch2_xattr_set(struct btree_trans *, u64, const struct bch_hash_info *, +int bch2_xattr_set(struct btree_trans *, subvol_inum, + const struct bch_hash_info *, const char *, const void *, size_t, int, int); ssize_t bch2_xattr_list(struct dentry *, char *, size_t); -- cgit v1.2.3 From 42d237320e9817a94f3a0a2de28156523596b086 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 16 Mar 2021 23:28:43 -0400 Subject: bcachefs: Snapshot creation, deletion This is the final patch in the patch series implementing snapshots. This patch implements two new ioctls that work like creation and deletion of directories, but fancier. - BCH_IOCTL_SUBVOLUME_CREATE, for creating new subvolumes and snaphots - BCH_IOCTL_SUBVOLUME_DESTROY, for deleting subvolumes and snapshots Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 8 --- fs/bcachefs/dirent.h | 4 -- fs/bcachefs/fs-common.c | 182 +++++++++++++++++++++++++++++++++++++++--------- fs/bcachefs/fs-common.h | 7 +- fs/bcachefs/fs-ioctl.c | 168 ++++++++++++++++++++++++++++++++++++++++++++ fs/bcachefs/fs.c | 29 ++++---- fs/bcachefs/fs.h | 3 +- fs/bcachefs/fsck.c | 7 +- fs/bcachefs/recovery.c | 2 +- fs/bcachefs/str_hash.h | 7 +- 10 files changed, 348 insertions(+), 69 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index f290580594ce..8653a106809d 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -383,14 +383,6 @@ out: return ret; } -int bch2_dirent_delete_at(struct btree_trans *trans, - const struct bch_hash_info *hash_info, - struct btree_iter *iter) -{ - return bch2_hash_delete_at(trans, bch2_dirent_hash_desc, - hash_info, iter); -} - int __bch2_dirent_lookup_trans(struct btree_trans *trans, struct btree_iter *iter, subvol_inum dir, diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 88b784a99cb5..e7f65fbd8e65 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -33,10 +33,6 @@ int bch2_dirent_create(struct btree_trans *, subvol_inum, const struct bch_hash_info *, u8, const struct qstr *, u64, u64 *, int); -int bch2_dirent_delete_at(struct btree_trans *, - const struct bch_hash_info *, - struct btree_iter *); - int __bch2_dirent_read_target(struct btree_trans *, struct bkey_s_c_dirent, u32 *, u32 *, u64 *, bool); diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index 02bf32cc7659..3e8e3c5bf870 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -11,6 +11,11 @@ #include +static inline int is_subdir_for_nlink(struct bch_inode_unpacked *inode) +{ + return S_ISDIR(inode->bi_mode) && !inode->bi_subvol; +} + int bch2_create_trans(struct btree_trans *trans, subvol_inum dir, struct bch_inode_unpacked *dir_u, @@ -19,6 +24,7 @@ int bch2_create_trans(struct btree_trans *trans, uid_t uid, gid_t gid, umode_t mode, dev_t rdev, struct posix_acl *default_acl, struct posix_acl *acl, + subvol_inum snapshot_src, unsigned flags) { struct bch_fs *c = trans->c; @@ -27,10 +33,9 @@ int bch2_create_trans(struct btree_trans *trans, subvol_inum new_inum = dir; u64 now = bch2_current_time(c); u64 cpu = raw_smp_processor_id(); - u64 dir_offset = 0; u64 dir_target; u32 snapshot; - unsigned dir_type; + unsigned dir_type = mode_to_type(mode); int ret; ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot); @@ -41,37 +46,122 @@ int bch2_create_trans(struct btree_trans *trans, if (ret) goto err; - bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u); + if (!(flags & BCH_CREATE_SNAPSHOT)) { + /* Normal create path - allocate a new inode: */ + bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u); - if (!name) - new_inode->bi_flags |= BCH_INODE_UNLINKED; + if (flags & BCH_CREATE_TMPFILE) + new_inode->bi_flags |= BCH_INODE_UNLINKED; - ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu); - if (ret) - goto err; + ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu); + if (ret) + goto err; + + snapshot_src = (subvol_inum) { 0 }; + } else { + /* + * Creating a snapshot - we're not allocating a new inode, but + * we do have to lookup the root inode of the subvolume we're + * snapshotting and update it (in the new snapshot): + */ + + if (!snapshot_src.inum) { + /* Inode wasn't specified, just snapshot: */ + struct btree_iter subvol_iter; + struct bkey_s_c k; + + bch2_trans_iter_init(trans, &subvol_iter, BTREE_ID_subvolumes, + POS(0, snapshot_src.subvol), 0); + k = bch2_btree_iter_peek_slot(&subvol_iter); + + ret = bkey_err(k); + if (!ret && k.k->type != KEY_TYPE_subvolume) { + bch_err(c, "subvolume %u not found", + snapshot_src.subvol); + ret = -ENOENT; + } + + if (!ret) + snapshot_src.inum = le64_to_cpu(bkey_s_c_to_subvolume(k).v->inode); + bch2_trans_iter_exit(trans, &subvol_iter); + + if (ret) + goto err; + } + + ret = bch2_inode_peek(trans, &inode_iter, new_inode, snapshot_src, + BTREE_ITER_INTENT); + if (ret) + goto err; + + if (new_inode->bi_subvol != snapshot_src.subvol) { + /* Not a subvolume root: */ + ret = -EINVAL; + goto err; + } + + /* + * If we're not root, we have to own the subvolume being + * snapshotted: + */ + if (uid && new_inode->bi_uid != uid) { + ret = -EPERM; + goto err; + } + + flags |= BCH_CREATE_SUBVOL; + } new_inum.inum = new_inode->bi_inum; dir_target = new_inode->bi_inum; - dir_type = mode_to_type(new_inode->bi_mode); - if (default_acl) { - ret = bch2_set_acl_trans(trans, new_inum, new_inode, - default_acl, ACL_TYPE_DEFAULT); + if (flags & BCH_CREATE_SUBVOL) { + u32 new_subvol, dir_snapshot; + + ret = bch2_subvolume_create(trans, new_inode->bi_inum, + snapshot_src.subvol, + &new_subvol, &snapshot, + (flags & BCH_CREATE_SNAPSHOT_RO) != 0); if (ret) goto err; - } - if (acl) { - ret = bch2_set_acl_trans(trans, new_inum, new_inode, - acl, ACL_TYPE_ACCESS); + new_inode->bi_parent_subvol = dir.subvol; + new_inode->bi_subvol = new_subvol; + new_inum.subvol = new_subvol; + dir_target = new_subvol; + dir_type = DT_SUBVOL; + + ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &dir_snapshot); + if (ret) + goto err; + + bch2_btree_iter_set_snapshot(&dir_iter, dir_snapshot); + ret = bch2_btree_iter_traverse(&dir_iter); if (ret) goto err; } - if (name) { + if (!(flags & BCH_CREATE_SNAPSHOT)) { + if (default_acl) { + ret = bch2_set_acl_trans(trans, new_inum, new_inode, + default_acl, ACL_TYPE_DEFAULT); + if (ret) + goto err; + } + + if (acl) { + ret = bch2_set_acl_trans(trans, new_inum, new_inode, + acl, ACL_TYPE_ACCESS); + if (ret) + goto err; + } + } + + if (!(flags & BCH_CREATE_TMPFILE)) { struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u); + u64 dir_offset; - if (S_ISDIR(new_inode->bi_mode)) + if (is_subdir_for_nlink(new_inode)) dir_u->bi_nlink++; dir_u->bi_mtime = dir_u->bi_ctime = now; @@ -87,11 +177,11 @@ int bch2_create_trans(struct btree_trans *trans, BCH_HASH_SET_MUST_CREATE); if (ret) goto err; - } - if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) { - new_inode->bi_dir = dir_u->bi_inum; - new_inode->bi_dir_offset = dir_offset; + if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) { + new_inode->bi_dir = dir_u->bi_inum; + new_inode->bi_dir_offset = dir_offset; + } } inode_iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS; @@ -160,7 +250,8 @@ int bch2_unlink_trans(struct btree_trans *trans, subvol_inum dir, struct bch_inode_unpacked *dir_u, struct bch_inode_unpacked *inode_u, - const struct qstr *name) + const struct qstr *name, + int deleting_snapshot) { struct bch_fs *c = trans->c; struct btree_iter dir_iter = { NULL }; @@ -169,6 +260,7 @@ int bch2_unlink_trans(struct btree_trans *trans, struct bch_hash_info dir_hash; subvol_inum inum; u64 now = bch2_current_time(c); + struct bkey_s_c k; int ret; ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT); @@ -187,29 +279,51 @@ int bch2_unlink_trans(struct btree_trans *trans, if (ret) goto err; - if (inode_u->bi_dir == dirent_iter.pos.inode && - inode_u->bi_dir_offset == dirent_iter.pos.offset) { - inode_u->bi_dir = 0; - inode_u->bi_dir_offset = 0; + if (deleting_snapshot == 1 && !inode_u->bi_subvol) { + ret = -ENOENT; + goto err; } - if (S_ISDIR(inode_u->bi_mode)) { + if (deleting_snapshot <= 0 && S_ISDIR(inode_u->bi_mode)) { ret = bch2_empty_dir_trans(trans, inum); if (ret) goto err; } - if (dir.subvol != inum.subvol) { - ret = bch2_subvolume_delete(trans, inum.subvol, false); + if (inode_u->bi_subvol) { + ret = bch2_subvolume_delete(trans, inode_u->bi_subvol, + deleting_snapshot); + if (ret) + goto err; + + k = bch2_btree_iter_peek_slot(&dirent_iter); + ret = bkey_err(k); + if (ret) + goto err; + + /* + * If we're deleting a subvolume, we need to really delete the + * dirent, not just emit a whiteout in the current snapshot: + */ + bch2_btree_iter_set_snapshot(&dirent_iter, k.k->p.snapshot); + ret = bch2_btree_iter_traverse(&dirent_iter); if (ret) goto err; } + if (inode_u->bi_dir == dirent_iter.pos.inode && + inode_u->bi_dir_offset == dirent_iter.pos.offset) { + inode_u->bi_dir = 0; + inode_u->bi_dir_offset = 0; + } + dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now; - dir_u->bi_nlink -= S_ISDIR(inode_u->bi_mode); + dir_u->bi_nlink -= is_subdir_for_nlink(inode_u); bch2_inode_nlink_dec(inode_u); - ret = bch2_dirent_delete_at(trans, &dir_hash, &dirent_iter) ?: + ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, + &dir_hash, &dirent_iter, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: bch2_inode_write(trans, &dir_iter, dir_u) ?: bch2_inode_write(trans, &inode_iter, inode_u); err: @@ -348,12 +462,12 @@ int bch2_rename_trans(struct btree_trans *trans, goto err; } - if (S_ISDIR(src_inode_u->bi_mode)) { + if (is_subdir_for_nlink(src_inode_u)) { src_dir_u->bi_nlink--; dst_dir_u->bi_nlink++; } - if (dst_inum.inum && S_ISDIR(dst_inode_u->bi_mode)) { + if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) { dst_dir_u->bi_nlink--; src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE; } diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h index 1bb2ac4dc13a..9bb0a9676147 100644 --- a/fs/bcachefs/fs-common.h +++ b/fs/bcachefs/fs-common.h @@ -5,6 +5,9 @@ struct posix_acl; #define BCH_CREATE_TMPFILE (1U << 0) +#define BCH_CREATE_SUBVOL (1U << 1) +#define BCH_CREATE_SNAPSHOT (1U << 2) +#define BCH_CREATE_SNAPSHOT_RO (1U << 3) int bch2_create_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, @@ -13,7 +16,7 @@ int bch2_create_trans(struct btree_trans *, subvol_inum, uid_t, gid_t, umode_t, dev_t, struct posix_acl *, struct posix_acl *, - unsigned); + subvol_inum, unsigned); int bch2_link_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, @@ -23,7 +26,7 @@ int bch2_link_trans(struct btree_trans *, int bch2_unlink_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, struct bch_inode_unpacked *, - const struct qstr *); + const struct qstr *, int); int bch2_rename_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 91f52ab9b4e2..ae402d350d4c 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -10,7 +10,11 @@ #include "quota.h" #include +#include #include +#include +#include +#include #define FS_IOC_GOINGDOWN _IOR('X', 125, __u32) #define FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */ @@ -292,6 +296,154 @@ err: return ret; } +static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, + struct bch_ioctl_subvolume arg) +{ + struct inode *dir; + struct bch_inode_info *inode; + struct user_namespace *s_user_ns; + struct dentry *dst_dentry; + struct path src_path, dst_path; + int how = LOOKUP_FOLLOW; + int error; + subvol_inum snapshot_src = { 0 }; + unsigned lookup_flags = 0; + unsigned create_flags = BCH_CREATE_SUBVOL; + + if (arg.flags & ~(BCH_SUBVOL_SNAPSHOT_CREATE| + BCH_SUBVOL_SNAPSHOT_RO)) + return -EINVAL; + + if (!(arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) && + (arg.src_ptr || + (arg.flags & BCH_SUBVOL_SNAPSHOT_RO))) + return -EINVAL; + + if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) + create_flags |= BCH_CREATE_SNAPSHOT; + + if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO) + create_flags |= BCH_CREATE_SNAPSHOT_RO; + + /* why do we need this lock? */ + down_read(&c->vfs_sb->s_umount); + + if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) + sync_inodes_sb(c->vfs_sb); +retry: + if (arg.src_ptr) { + error = user_path_at(arg.dirfd, + (const char __user *)(unsigned long)arg.src_ptr, + how, &src_path); + if (error) + goto err1; + + if (src_path.dentry->d_sb->s_fs_info != c) { + path_put(&src_path); + error = -EXDEV; + goto err1; + } + + snapshot_src = inode_inum(to_bch_ei(src_path.dentry->d_inode)); + } + + dst_dentry = user_path_create(arg.dirfd, + (const char __user *)(unsigned long)arg.dst_ptr, + &dst_path, lookup_flags); + error = PTR_ERR_OR_ZERO(dst_dentry); + if (error) + goto err2; + + if (dst_dentry->d_sb->s_fs_info != c) { + error = -EXDEV; + goto err3; + } + + if (dst_dentry->d_inode) { + error = -EEXIST; + goto err3; + } + + dir = dst_path.dentry->d_inode; + if (IS_DEADDIR(dir)) { + error = -ENOENT; + goto err3; + } + + s_user_ns = dir->i_sb->s_user_ns; + if (!kuid_has_mapping(s_user_ns, current_fsuid()) || + !kgid_has_mapping(s_user_ns, current_fsgid())) { + error = -EOVERFLOW; + goto err3; + } + + error = inode_permission(file_mnt_idmap(filp), + dir, MAY_WRITE | MAY_EXEC); + if (error) + goto err3; + + if (!IS_POSIXACL(dir)) + arg.mode &= ~current_umask(); + + error = security_path_mkdir(&dst_path, dst_dentry, arg.mode); + if (error) + goto err3; + + if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) && + !arg.src_ptr) + snapshot_src.subvol = to_bch_ei(dir)->ei_inode.bi_subvol; + + inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), + dst_dentry, arg.mode|S_IFDIR, + 0, snapshot_src, create_flags); + error = PTR_ERR_OR_ZERO(inode); + if (error) + goto err3; + + d_instantiate(dst_dentry, &inode->v); + fsnotify_mkdir(dir, dst_dentry); +err3: + done_path_create(&dst_path, dst_dentry); +err2: + if (arg.src_ptr) + path_put(&src_path); + + if (retry_estale(error, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } +err1: + up_read(&c->vfs_sb->s_umount); + + return error; +} + +static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, + struct bch_ioctl_subvolume arg) +{ + struct path path; + int ret = 0; + + if (arg.flags) + return -EINVAL; + + ret = user_path_at(arg.dirfd, + (const char __user *)(unsigned long)arg.dst_ptr, + LOOKUP_FOLLOW, &path); + if (ret) + return ret; + + if (path.dentry->d_sb->s_fs_info != c) { + path_put(&path); + return -EXDEV; + } + + ret = __bch2_unlink(path.dentry->d_parent->d_inode, path.dentry, 1); + path_put(&path); + + return ret; +} + long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct bch_inode_info *inode = file_bch_inode(file); @@ -322,6 +474,22 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) case FS_IOC_GOINGDOWN: return bch2_ioc_goingdown(c, (u32 __user *) arg); + case BCH_IOCTL_SUBVOLUME_CREATE: { + struct bch_ioctl_subvolume i; + + if (copy_from_user(&i, (void __user *) arg, sizeof(i))) + return -EFAULT; + return bch2_ioctl_subvolume_create(c, file, i); + } + + case BCH_IOCTL_SUBVOLUME_DESTROY: { + struct bch_ioctl_subvolume i; + + if (copy_from_user(&i, (void __user *) arg, sizeof(i))) + return -EFAULT; + return bch2_ioctl_subvolume_destroy(c, file, i); + } + default: return bch2_fs_ioctl(c, cmd, (void __user *) arg); } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 0d47d9d5737b..7475830bb33f 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -240,12 +240,6 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) struct bch_inode_info *inode; int ret; - /* - * debug assert, to be removed when we start creating - * subvolumes/snapshots: - */ - BUG_ON(inum.subvol != BCACHEFS_ROOT_SUBVOL); - inode = to_bch_ei(iget5_locked(c->vfs_sb, bch2_inode_hash(inum), bch2_iget5_test, @@ -274,7 +268,8 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) struct bch_inode_info * __bch2_create(struct mnt_idmap *idmap, struct bch_inode_info *dir, struct dentry *dentry, - umode_t mode, dev_t rdev, unsigned flags) + umode_t mode, dev_t rdev, subvol_inum snapshot_src, + unsigned flags) { struct bch_fs *c = dir->v.i_sb->s_fs_info; struct btree_trans trans; @@ -319,7 +314,7 @@ retry: from_kuid(i_user_ns(&dir->v), current_fsuid()), from_kgid(i_user_ns(&dir->v), current_fsgid()), mode, rdev, - default_acl, acl, flags) ?: + default_acl, acl, snapshot_src, flags) ?: bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, KEY_TYPE_QUOTA_PREALLOC); if (unlikely(ret)) @@ -426,7 +421,8 @@ static int bch2_mknod(struct mnt_idmap *idmap, umode_t mode, dev_t rdev) { struct bch_inode_info *inode = - __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev, 0); + __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev, + (subvol_inum) { 0 }, 0); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -493,7 +489,8 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir, return 0; } -static int bch2_unlink(struct inode *vdir, struct dentry *dentry) +int __bch2_unlink(struct inode *vdir, struct dentry *dentry, + int deleting_snapshot) { struct bch_fs *c = vdir->i_sb->s_fs_info; struct bch_inode_info *dir = to_bch_ei(vdir); @@ -509,7 +506,8 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) BTREE_INSERT_NOFAIL, bch2_unlink_trans(&trans, inode_inum(dir), &dir_u, - &inode_u, &dentry->d_name)); + &inode_u, &dentry->d_name, + deleting_snapshot)); if (likely(!ret)) { BUG_ON(inode_u.bi_inum != inode->v.i_ino); @@ -527,6 +525,11 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) return ret; } +static int bch2_unlink(struct inode *vdir, struct dentry *dentry) +{ + return __bch2_unlink(vdir, dentry, -1); +} + static int bch2_symlink(struct mnt_idmap *idmap, struct inode *vdir, struct dentry *dentry, const char *symname) @@ -536,7 +539,7 @@ static int bch2_symlink(struct mnt_idmap *idmap, int ret; inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0, - BCH_CREATE_TMPFILE); + (subvol_inum) { 0 }, BCH_CREATE_TMPFILE); if (unlikely(IS_ERR(inode))) return PTR_ERR(inode); @@ -855,7 +858,7 @@ static int bch2_tmpfile(struct mnt_idmap *idmap, struct bch_inode_info *inode = __bch2_create(idmap, to_bch_ei(vdir), file->f_path.dentry, mode, 0, - BCH_CREATE_TMPFILE); + (subvol_inum) { 0 }, BCH_CREATE_TMPFILE); if (IS_ERR(inode)) return PTR_ERR(inode); diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index aa755987b36c..40898c4d197b 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -146,7 +146,7 @@ struct bch_inode_unpacked; struct bch_inode_info * __bch2_create(struct mnt_idmap *, struct bch_inode_info *, - struct dentry *, umode_t, dev_t, unsigned); + struct dentry *, umode_t, dev_t, subvol_inum, unsigned); int bch2_fs_quota_transfer(struct bch_fs *, struct bch_inode_info *, @@ -183,6 +183,7 @@ int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *, int bch2_setattr_nonsize(struct mnt_idmap *, struct bch_inode_info *, struct iattr *); +int __bch2_unlink(struct inode *, struct dentry *, int); void bch2_vfs_exit(void); int bch2_vfs_init(void); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index f9a6a0b3ce7a..16a1eae9b374 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -307,7 +307,7 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos) bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT); ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, - &dir_hash_info, &iter); + &dir_hash_info, &iter, 0); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -386,7 +386,8 @@ create_lostfound: BTREE_INSERT_LAZY_RW, bch2_create_trans(trans, root_inum, &root, lostfound, &lostfound_str, - 0, 0, S_IFDIR|0700, 0, NULL, NULL, 0)); + 0, 0, S_IFDIR|0700, 0, NULL, NULL, + (subvol_inum) { }, 0)); if (ret) bch_err(c, "error creating lost+found: %i", ret); } @@ -759,7 +760,7 @@ static int fsck_hash_delete_at(struct btree_trans *trans, { int ret; retry: - ret = bch2_hash_delete_at(trans, desc, info, iter) ?: + ret = bch2_hash_delete_at(trans, desc, info, iter, 0) ?: bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 47c8fecc6839..64e0b542e779 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1485,7 +1485,7 @@ int bch2_fs_initialize(struct bch_fs *c) &root_inode, &lostfound_inode, &lostfound, 0, 0, S_IFDIR|0700, 0, - NULL, NULL, 0)); + NULL, NULL, (subvol_inum) { 0 }, 0)); if (ret) { bch_err(c, "error creating lost+found"); goto err; diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 6418089531ad..6486e709b700 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -307,7 +307,8 @@ static __always_inline int bch2_hash_delete_at(struct btree_trans *trans, const struct bch_hash_desc desc, const struct bch_hash_info *info, - struct btree_iter *iter) + struct btree_iter *iter, + unsigned update_flags) { struct bkey_i *delete; int ret; @@ -325,7 +326,7 @@ int bch2_hash_delete_at(struct btree_trans *trans, delete->k.p = iter->pos; delete->k.type = ret ? KEY_TYPE_hash_whiteout : KEY_TYPE_deleted; - return bch2_trans_update(trans, iter, delete, 0); + return bch2_trans_update(trans, iter, delete, update_flags); } static __always_inline @@ -342,7 +343,7 @@ int bch2_hash_delete(struct btree_trans *trans, if (ret) return ret; - ret = bch2_hash_delete_at(trans, desc, info, &iter); + ret = bch2_hash_delete_at(trans, desc, info, &iter, 0); bch2_trans_iter_exit(trans, &iter); return ret; } -- cgit v1.2.3 From 9a796fdb06b56a1811f1afdd40b793e2848a990e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 19 Oct 2021 15:08:00 -0400 Subject: bcachefs: bch2_trans_exit() no longer returns errors Now that peek_node()/next_node() are converted to return errors directly, we don't need bch2_trans_exit() to return errors - it's cleaner this way and wasn't used much anymore. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 8 ++------ fs/bcachefs/btree_iter.h | 2 +- fs/bcachefs/btree_types.h | 1 - fs/bcachefs/btree_update.h | 6 +++--- fs/bcachefs/dirent.c | 2 +- fs/bcachefs/ec.c | 5 +++-- fs/bcachefs/fs-io.c | 7 ++++--- fs/bcachefs/fs.c | 2 +- fs/bcachefs/fsck.c | 9 ++++++--- fs/bcachefs/migrate.c | 4 ++-- fs/bcachefs/move.c | 2 +- fs/bcachefs/quota.c | 6 ++++-- fs/bcachefs/reflink.c | 2 +- fs/bcachefs/sysfs.c | 2 +- fs/bcachefs/xattr.c | 2 +- 15 files changed, 31 insertions(+), 29 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 339b3657683a..25ed4f2ce19c 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1346,10 +1346,8 @@ retry_all: } while (ret); } - if (unlikely(ret == -EIO)) { - trans->error = true; + if (unlikely(ret == -EIO)) goto out; - } BUG_ON(ret && ret != -EINTR); @@ -2781,7 +2779,7 @@ leaked: #endif } -int bch2_trans_exit(struct btree_trans *trans) +void bch2_trans_exit(struct btree_trans *trans) __releases(&c->btree_trans_barrier) { struct btree_insert_entry *i; @@ -2831,8 +2829,6 @@ int bch2_trans_exit(struct btree_trans *trans) trans->mem = (void *) 0x1; trans->paths = (void *) 0x1; - - return trans->error ? -EIO : 0; } static void __maybe_unused diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 1cb4261bd66e..4cd05fd06e64 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -351,7 +351,7 @@ static inline void set_btree_iter_dontneed(struct btree_iter *iter) void *bch2_trans_kmalloc(struct btree_trans *, size_t); void bch2_trans_begin(struct btree_trans *); void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t); -int bch2_trans_exit(struct btree_trans *); +void bch2_trans_exit(struct btree_trans *); void bch2_btree_trans_to_text(struct printbuf *, struct bch_fs *); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 081b82d3848e..14acbdf34f7b 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -377,7 +377,6 @@ struct btree_trans { u8 nr_sorted; u8 nr_updates; bool used_mempool:1; - bool error:1; bool in_traverse_all:1; bool restarted:1; bool paths_sorted:1; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 6f19b67c398f..2ffee9029f34 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -120,14 +120,14 @@ static inline int bch2_trans_commit(struct btree_trans *trans, #define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \ ({ \ struct btree_trans trans; \ - int _ret, _ret2; \ + int _ret; \ \ bch2_trans_init(&trans, (_c), 0, 0); \ _ret = __bch2_trans_do(&trans, _disk_res, _journal_seq, _flags, \ _do); \ - _ret2 = bch2_trans_exit(&trans); \ + bch2_trans_exit(&trans); \ \ - _ret ?: _ret2; \ + _ret; \ }) #define trans_for_each_update(_trans, _i) \ diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index cd5468b15ba2..26df20ad090c 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -491,7 +491,7 @@ err: if (ret == -EINTR) goto retry; - ret = bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); return ret; } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 2c538f9b54f8..7dfa052e9765 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1670,11 +1670,12 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc) bch2_trans_iter_init(&trans, &iter, BTREE_ID_stripes, POS(0, U64_MAX), 0); k = bch2_btree_iter_prev(&iter); - if (!IS_ERR_OR_NULL(k.k)) + ret = bkey_err(k); + if (!ret && k.k) idx = k.k->p.offset + 1; bch2_trans_iter_exit(&trans, &iter); - ret = bch2_trans_exit(&trans); + bch2_trans_exit(&trans); if (ret) return ret; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 736dd71419a5..079c20cbf10e 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -2223,7 +2223,8 @@ err: if (ret == -EINTR) goto retry; - return bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); + return ret; } static int __bch2_truncate_page(struct bch_inode_info *inode, @@ -3125,7 +3126,7 @@ err: if (ret == -EINTR) goto retry; - ret = bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); if (ret) return ret; @@ -3240,7 +3241,7 @@ err: if (ret == -EINTR) goto retry; - ret = bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); if (ret) return ret; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 7475830bb33f..334cd335ff11 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1016,7 +1016,7 @@ err: ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), FIEMAP_EXTENT_LAST); - ret = bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); bch2_bkey_buf_exit(&cur, c); bch2_bkey_buf_exit(&prev, c); return ret < 0 ? ret : 0; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 826a3577ee93..a36bc840a62c 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -981,7 +981,8 @@ static int check_inodes(struct bch_fs *c, bool full) BUG_ON(ret == -EINTR); - return bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); + return ret; } noinline_for_stack @@ -1659,7 +1660,8 @@ fsck_err: goto retry; bch2_trans_iter_exit(&trans, &iter); - return bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); + return ret; } /* Get root directory, create if it doesn't exist: */ @@ -1876,7 +1878,8 @@ static int check_directory_structure(struct bch_fs *c) kfree(path.entries); - return bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); + return ret; } struct nlink_table { diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 111a41159eb2..00ba6e1c92ee 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -100,7 +100,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags } bch2_trans_iter_exit(&trans, &iter); - ret = bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); BUG_ON(ret == -EINTR); @@ -180,7 +180,7 @@ next: ret = 0; err: - ret = bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); bch2_bkey_buf_exit(&k, c); BUG_ON(ret == -EINTR); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 2f608631cc43..af02f2cf6ee0 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -773,7 +773,7 @@ next_nondata: out: bch2_trans_iter_exit(&trans, &iter); - ret = bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); return ret; diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 9b0f4d3f176d..17fd5bf107bb 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -374,7 +374,8 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type) } bch2_trans_iter_exit(&trans, &iter); - return bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); + return ret; } void bch2_fs_quota_exit(struct bch_fs *c) @@ -452,7 +453,8 @@ int bch2_fs_quota_read(struct bch_fs *c) } bch2_trans_iter_exit(&trans, &iter); - return bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); + return ret; } /* Enable/disable/delete quotas for an entire filesystem: */ diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index c63c95fc49b1..9bcf4216a286 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -349,7 +349,7 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_trans_iter_exit(&trans, &inode_iter); } while (ret2 == -EINTR); - ret = bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); bch2_bkey_buf_exit(&new_src, c); bch2_bkey_buf_exit(&new_dst, c); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 92e58f5c6bbf..51eb19b84a28 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -327,7 +327,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c } bch2_trans_iter_exit(&trans, &iter); - ret = bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); if (ret) return ret; diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index ff81a25698ff..dcd2f6a91a72 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -316,7 +316,7 @@ err: if (ret == -EINTR) goto retry; - ret = bch2_trans_exit(&trans) ?: ret; + bch2_trans_exit(&trans); if (ret) return ret; -- cgit v1.2.3 From 2027875bd8318171159495c948461eae2f84936d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Oct 2021 12:03:19 -0400 Subject: bcachefs: Add BCH_SUBVOLUME_UNLINKED Snapshot deletion needs to become a multi step process, where we unlink, then tear down the page cache, then delete the subvolume - the deleting flag is equivalent to an inode with i_nlink = 0. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 4 + fs/bcachefs/bcachefs_format.h | 1 + fs/bcachefs/fs-common.c | 30 ++----- fs/bcachefs/fs-common.h | 2 +- fs/bcachefs/fs-ioctl.c | 2 +- fs/bcachefs/fs.c | 11 ++- fs/bcachefs/fs.h | 2 +- fs/bcachefs/fsck.c | 18 ++++- fs/bcachefs/inode.c | 6 +- fs/bcachefs/subvolume.c | 182 ++++++++++++++++++++++++++++++++++++++---- fs/bcachefs/subvolume.h | 5 +- fs/bcachefs/subvolume_types.h | 11 +++ 12 files changed, 223 insertions(+), 51 deletions(-) create mode 100644 fs/bcachefs/subvolume_types.h (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 1608faae0d0b..567270015008 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -353,6 +353,7 @@ enum bch_time_stats { #include "quota_types.h" #include "rebalance_types.h" #include "replicas_types.h" +#include "subvolume_types.h" #include "super_types.h" /* Number of nodes btree coalesce will try to coalesce at once */ @@ -657,6 +658,9 @@ struct bch_fs { struct bch_snapshot_table __rcu *snapshot_table; struct mutex snapshot_table_lock; struct work_struct snapshot_delete_work; + struct work_struct snapshot_wait_for_pagecache_and_delete_work; + struct snapshot_id_list snapshots_unlinked; + struct mutex snapshots_unlinked_lock; /* BTREE CACHE */ struct bio_set btree_bio; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 481bf643bd6f..8e1423b138a6 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -974,6 +974,7 @@ LE32_BITMASK(BCH_SUBVOLUME_RO, struct bch_subvolume, flags, 0, 1) * can delete it (or whether it should just be rm -rf'd) */ LE32_BITMASK(BCH_SUBVOLUME_SNAP, struct bch_subvolume, flags, 1, 2) +LE32_BITMASK(BCH_SUBVOLUME_UNLINKED, struct bch_subvolume, flags, 2, 3) /* Snapshots */ diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index c49de741e1e3..5f3429e99115 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -239,7 +239,7 @@ int bch2_unlink_trans(struct btree_trans *trans, struct bch_inode_unpacked *dir_u, struct bch_inode_unpacked *inode_u, const struct qstr *name, - int deleting_snapshot) + bool deleting_snapshot) { struct bch_fs *c = trans->c; struct btree_iter dir_iter = { NULL }; @@ -267,35 +267,19 @@ int bch2_unlink_trans(struct btree_trans *trans, if (ret) goto err; - if (deleting_snapshot <= 0 && S_ISDIR(inode_u->bi_mode)) { + if (!deleting_snapshot && S_ISDIR(inode_u->bi_mode)) { ret = bch2_empty_dir_trans(trans, inum); if (ret) goto err; } - if (deleting_snapshot < 0 && - inode_u->bi_subvol) { - struct bch_subvolume s; - - ret = bch2_subvolume_get(trans, inode_u->bi_subvol, true, - BTREE_ITER_CACHED| - BTREE_ITER_WITH_UPDATES, - &s); - if (ret) - goto err; - - if (BCH_SUBVOLUME_SNAP(&s)) - deleting_snapshot = 1; + if (deleting_snapshot && !inode_u->bi_subvol) { + ret = -ENOENT; + goto err; } - if (deleting_snapshot == 1) { - if (!inode_u->bi_subvol) { - ret = -ENOENT; - goto err; - } - - ret = bch2_subvolume_delete(trans, inode_u->bi_subvol, - deleting_snapshot); + if (deleting_snapshot || inode_u->bi_subvol) { + ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol); if (ret) goto err; diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h index 9bb0a9676147..dde237859514 100644 --- a/fs/bcachefs/fs-common.h +++ b/fs/bcachefs/fs-common.h @@ -26,7 +26,7 @@ int bch2_link_trans(struct btree_trans *, int bch2_unlink_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, struct bch_inode_unpacked *, - const struct qstr *, int); + const struct qstr *, bool); int bch2_rename_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index a12b591ec9ca..de94895ace9f 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -441,7 +441,7 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, dir = path.dentry->d_parent->d_inode; - ret = __bch2_unlink(dir, path.dentry, 1); + ret = __bch2_unlink(dir, path.dentry, true); if (!ret) { fsnotify_rmdir(dir, path.dentry); d_delete(path.dentry); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 334cd335ff11..c325e5c4325c 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -490,7 +490,7 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir, } int __bch2_unlink(struct inode *vdir, struct dentry *dentry, - int deleting_snapshot) + bool deleting_snapshot) { struct bch_fs *c = vdir->i_sb->s_fs_info; struct bch_inode_info *dir = to_bch_ei(vdir); @@ -527,7 +527,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, static int bch2_unlink(struct inode *vdir, struct dentry *dentry) { - return __bch2_unlink(vdir, dentry, -1); + return __bch2_unlink(vdir, dentry, false); } static int bch2_symlink(struct mnt_idmap *idmap, @@ -1292,6 +1292,12 @@ static int bch2_vfs_write_inode(struct inode *vinode, return ret; } +static int bch2_drop_inode(struct inode *vinode) +{ + + return generic_drop_inode(vinode); +} + static void bch2_evict_inode(struct inode *vinode) { struct bch_fs *c = vinode->i_sb->s_fs_info; @@ -1496,6 +1502,7 @@ static const struct super_operations bch_super_operations = { .alloc_inode = bch2_alloc_inode, .destroy_inode = bch2_destroy_inode, .write_inode = bch2_vfs_write_inode, + .drop_inode = bch2_drop_inode, .evict_inode = bch2_evict_inode, .sync_fs = bch2_sync_fs, .statfs = bch2_statfs, diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 40898c4d197b..2616b15eb51c 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -183,7 +183,7 @@ int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *, int bch2_setattr_nonsize(struct mnt_idmap *, struct bch_inode_info *, struct iattr *); -int __bch2_unlink(struct inode *, struct dentry *, int); +int __bch2_unlink(struct inode *, struct dentry *, bool); void bch2_vfs_exit(void); int bch2_vfs_init(void); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index a61d380a47b6..6b3eecdef81a 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -256,7 +256,7 @@ retry: /* Subvolume root? */ if (inode_u.bi_subvol) { - ret = bch2_subvolume_delete(trans, inode_u.bi_subvol, -1); + ret = bch2_subvolume_delete(trans, inode_u.bi_subvol); if (ret) goto err; } @@ -992,12 +992,28 @@ static int check_subvols(struct bch_fs *c) struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; + struct bkey_s_c_subvolume subvol; int ret; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); for_each_btree_key(&trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { + if (k.k->type != KEY_TYPE_subvolume) + continue; + + subvol = bkey_s_c_to_subvolume(k); + + if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_LAZY_RW, + bch2_subvolume_delete(&trans, iter.pos.offset)); + if (ret) { + bch_err(c, "error deleting subvolume %llu: %i", + iter.pos.offset, ret); + break; + } + } } bch2_trans_iter_exit(&trans, &iter); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 7fccf842a46b..3ae321a99cee 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -709,11 +709,7 @@ retry: bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u); /* Subvolume root? */ - if (inode_u.bi_subvol) { - ret = bch2_subvolume_delete(&trans, inode_u.bi_subvol, -1); - if (ret) - goto err; - } + BUG_ON(inode_u.bi_subvol); bkey_inode_generation_init(&delete.k_i); delete.k.p = iter.pos; diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 9bd8d61c96fe..58cda98989b1 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -4,6 +4,7 @@ #include "btree_key_cache.h" #include "btree_update.h" #include "error.h" +#include "fs.h" #include "subvolume.h" /* Snapshot tree: */ @@ -541,13 +542,6 @@ err: return ret; } -/* List of snapshot IDs that are being deleted: */ -struct snapshot_id_list { - u32 nr; - u32 size; - u32 *d; -}; - static bool snapshot_list_has_id(struct snapshot_id_list *s, u32 id) { unsigned i; @@ -819,9 +813,11 @@ int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol, return ret; } -/* XXX: mark snapshot id for deletion, walk btree and delete: */ -int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid, - int deleting_snapshot) +/* + * Delete subvolume, mark snapshot ID as deleted, queue up snapshot + * deletion/cleanup: + */ +int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) { struct btree_iter iter; struct bkey_s_c k; @@ -849,12 +845,6 @@ int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid, subvol = bkey_s_c_to_subvolume(k); snapid = le32_to_cpu(subvol.v->snapshot); - if (deleting_snapshot >= 0 && - deleting_snapshot != BCH_SUBVOLUME_SNAP(subvol.v)) { - ret = -ENOENT; - goto err; - } - delete = bch2_trans_kmalloc(trans, sizeof(*delete)); ret = PTR_ERR_OR_ZERO(delete); if (ret) @@ -880,6 +870,163 @@ err: return ret; } +static void bch2_evict_subvolume_inodes(struct bch_fs *c, + struct snapshot_id_list *s) +{ + struct super_block *sb = c->vfs_sb; + struct inode *inode; + + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) || + (inode->i_state & I_FREEING)) + continue; + + d_mark_dontcache(inode); + d_prune_aliases(inode); + } + spin_unlock(&sb->s_inode_list_lock); +again: + cond_resched(); + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) || + (inode->i_state & I_FREEING)) + continue; + + if (!(inode->i_state & I_DONTCACHE)) { + d_mark_dontcache(inode); + d_prune_aliases(inode); + } + + spin_lock(&inode->i_lock); + if (snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) && + !(inode->i_state & I_FREEING)) { + wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_NEW); + DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); + spin_unlock(&inode->i_lock); + spin_unlock(&sb->s_inode_list_lock); + schedule(); + finish_wait(wq, &wait.wq_entry); + goto again; + } + + spin_unlock(&inode->i_lock); + } + spin_unlock(&sb->s_inode_list_lock); +} + +void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) +{ + struct bch_fs *c = container_of(work, struct bch_fs, + snapshot_wait_for_pagecache_and_delete_work); + struct snapshot_id_list s; + u32 *id; + int ret = 0; + + while (!ret) { + mutex_lock(&c->snapshots_unlinked_lock); + s = c->snapshots_unlinked; + memset(&c->snapshots_unlinked, 0, sizeof(c->snapshots_unlinked)); + mutex_unlock(&c->snapshots_unlinked_lock); + + if (!s.nr) + break; + + bch2_evict_subvolume_inodes(c, &s); + + for (id = s.d; id < s.d + s.nr; id++) { + ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL, + bch2_subvolume_delete(&trans, *id)); + if (ret) { + bch_err(c, "error %i deleting subvolume %u", ret, *id); + break; + } + } + + kfree(s.d); + } + + percpu_ref_put(&c->writes); +} + +struct subvolume_unlink_hook { + struct btree_trans_commit_hook h; + u32 subvol; +}; + +int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans, + struct btree_trans_commit_hook *_h) +{ + struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h); + struct bch_fs *c = trans->c; + int ret = 0; + + mutex_lock(&c->snapshots_unlinked_lock); + if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) + ret = snapshot_id_add(&c->snapshots_unlinked, h->subvol); + mutex_unlock(&c->snapshots_unlinked_lock); + + if (ret) + return ret; + + if (unlikely(!percpu_ref_tryget(&c->writes))) + return -EROFS; + + if (!queue_work(system_long_wq, &c->snapshot_wait_for_pagecache_and_delete_work)) + percpu_ref_put(&c->writes); + return 0; +} + +int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_i_subvolume *n; + struct subvolume_unlink_hook *h; + int ret = 0; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolumes, + POS(0, subvolid), + BTREE_ITER_CACHED| + BTREE_ITER_INTENT); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (k.k->type != KEY_TYPE_subvolume) { + bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvolid); + ret = -EIO; + goto err; + } + + n = bch2_trans_kmalloc(trans, sizeof(*n)); + ret = PTR_ERR_OR_ZERO(n); + if (ret) + goto err; + + bkey_reassemble(&n->k_i, k); + SET_BCH_SUBVOLUME_UNLINKED(&n->v, true); + + ret = bch2_trans_update(trans, &iter, &n->k_i, 0); + if (ret) + goto err; + + h = bch2_trans_kmalloc(trans, sizeof(*h)); + ret = PTR_ERR_OR_ZERO(h); + if (ret) + goto err; + + h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook; + h->subvol = subvolid; + bch2_trans_commit_hook(trans, &h->h); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + int bch2_subvolume_create(struct btree_trans *trans, u64 inode, u32 src_subvolid, u32 *new_subvolid, @@ -977,5 +1124,8 @@ err: int bch2_fs_subvolumes_init(struct bch_fs *c) { INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work); + INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work, + bch2_subvolume_wait_for_pagecache_and_delete); + mutex_init(&c->snapshots_unlinked_lock); return 0; } diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index f98c8c0dbea2..45234c9de0f6 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -2,6 +2,8 @@ #ifndef _BCACHEFS_SUBVOLUME_H #define _BCACHEFS_SUBVOLUME_H +#include "subvolume_types.h" + void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); const char *bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c); @@ -108,7 +110,8 @@ int bch2_subvolume_get(struct btree_trans *, unsigned, bool, int, struct bch_subvolume *); int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); -int bch2_subvolume_delete(struct btree_trans *, u32, int); +int bch2_subvolume_delete(struct btree_trans *, u32); +int bch2_subvolume_unlink(struct btree_trans *, u32); int bch2_subvolume_create(struct btree_trans *, u64, u32, u32 *, u32 *, bool); diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h new file mode 100644 index 000000000000..9410b9587591 --- /dev/null +++ b/fs/bcachefs/subvolume_types.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SUBVOLUME_TYPES_H +#define _BCACHEFS_SUBVOLUME_TYPES_H + +struct snapshot_id_list { + u32 nr; + u32 size; + u32 *d; +}; + +#endif /* _BCACHEFS_SUBVOLUME_TYPES_H */ -- cgit v1.2.3 From 41f9b7d39fb11c9f306809681bb6991ac96f9b2e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Oct 2021 16:24:39 -0400 Subject: bcachefs: Move bch2_evict_subvolume_inodes() to fs.c This fixes building in userspace - code that's coupled to the kernel VFS interface should live in fs.c Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 54 ++++++++++++++++++++++++++++++++++++++++------ fs/bcachefs/fs.h | 4 ++++ fs/bcachefs/subvolume.c | 57 ------------------------------------------------- fs/bcachefs/subvolume.h | 10 +++++++++ 4 files changed, 61 insertions(+), 64 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index c325e5c4325c..7647e117013d 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1292,12 +1292,6 @@ static int bch2_vfs_write_inode(struct inode *vinode, return ret; } -static int bch2_drop_inode(struct inode *vinode) -{ - - return generic_drop_inode(vinode); -} - static void bch2_evict_inode(struct inode *vinode) { struct bch_fs *c = vinode->i_sb->s_fs_info; @@ -1318,6 +1312,53 @@ static void bch2_evict_inode(struct inode *vinode) } } +void bch2_evict_subvolume_inodes(struct bch_fs *c, + struct snapshot_id_list *s) +{ + struct super_block *sb = c->vfs_sb; + struct inode *inode; + + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) || + (inode->i_state & I_FREEING)) + continue; + + d_mark_dontcache(inode); + d_prune_aliases(inode); + } + spin_unlock(&sb->s_inode_list_lock); +again: + cond_resched(); + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) || + (inode->i_state & I_FREEING)) + continue; + + if (!(inode->i_state & I_DONTCACHE)) { + d_mark_dontcache(inode); + d_prune_aliases(inode); + } + + spin_lock(&inode->i_lock); + if (snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) && + !(inode->i_state & I_FREEING)) { + wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_NEW); + DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); + spin_unlock(&inode->i_lock); + spin_unlock(&sb->s_inode_list_lock); + schedule(); + finish_wait(wq, &wait.wq_entry); + goto again; + } + + spin_unlock(&inode->i_lock); + } + spin_unlock(&sb->s_inode_list_lock); +} + static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; @@ -1502,7 +1543,6 @@ static const struct super_operations bch_super_operations = { .alloc_inode = bch2_alloc_inode, .destroy_inode = bch2_destroy_inode, .write_inode = bch2_vfs_write_inode, - .drop_inode = bch2_drop_inode, .evict_inode = bch2_evict_inode, .sync_fs = bch2_sync_fs, .statfs = bch2_statfs, diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 2616b15eb51c..38c04282da64 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -185,11 +185,15 @@ int bch2_setattr_nonsize(struct mnt_idmap *, struct iattr *); int __bch2_unlink(struct inode *, struct dentry *, bool); +void bch2_evict_subvolume_inodes(struct bch_fs *, struct snapshot_id_list *); + void bch2_vfs_exit(void); int bch2_vfs_init(void); #else +static inline void bch2_evict_subvolume_inodes(struct bch_fs *c, + struct snapshot_id_list *s) {} static inline void bch2_vfs_exit(void) {} static inline int bch2_vfs_init(void) { return 0; } diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 58cda98989b1..4d385c9e9268 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -542,16 +542,6 @@ err: return ret; } -static bool snapshot_list_has_id(struct snapshot_id_list *s, u32 id) -{ - unsigned i; - - for (i = 0; i < s->nr; i++) - if (id == s->d[i]) - return true; - return false; -} - static int snapshot_id_add(struct snapshot_id_list *s, u32 id) { BUG_ON(snapshot_list_has_id(s, id)); @@ -870,53 +860,6 @@ err: return ret; } -static void bch2_evict_subvolume_inodes(struct bch_fs *c, - struct snapshot_id_list *s) -{ - struct super_block *sb = c->vfs_sb; - struct inode *inode; - - spin_lock(&sb->s_inode_list_lock); - list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) || - (inode->i_state & I_FREEING)) - continue; - - d_mark_dontcache(inode); - d_prune_aliases(inode); - } - spin_unlock(&sb->s_inode_list_lock); -again: - cond_resched(); - spin_lock(&sb->s_inode_list_lock); - list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) || - (inode->i_state & I_FREEING)) - continue; - - if (!(inode->i_state & I_DONTCACHE)) { - d_mark_dontcache(inode); - d_prune_aliases(inode); - } - - spin_lock(&inode->i_lock); - if (snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) && - !(inode->i_state & I_FREEING)) { - wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_NEW); - DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); - prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); - spin_unlock(&inode->i_lock); - spin_unlock(&sb->s_inode_list_lock); - schedule(); - finish_wait(wq, &wait.wq_entry); - goto again; - } - - spin_unlock(&inode->i_lock); - } - spin_unlock(&sb->s_inode_list_lock); -} - void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index 45234c9de0f6..b5067dc68fc7 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -94,6 +94,16 @@ static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, return 0; } +static inline bool snapshot_list_has_id(struct snapshot_id_list *s, u32 id) +{ + unsigned i; + + for (i = 0; i < s->nr; i++) + if (id == s->d[i]) + return true; + return false; +} + int bch2_fs_snapshots_check(struct bch_fs *); void bch2_fs_snapshots_exit(struct bch_fs *); int bch2_fs_snapshots_start(struct bch_fs *); -- cgit v1.2.3 From 3e52c22255143bb86860abf26ef29a077ac30314 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Oct 2021 21:14:23 -0400 Subject: bcachefs: Add journal_seq to inode & alloc keys Add fields to inode & alloc keys that record the journal sequence number when they were most recently modified. For alloc keys, this is needed to know what journal sequence number we have to flush before the bucket can be reused. Currently this is tracked in memory, but we'll be getting rid of the in memory bucket array. For inodes, this is needed for fsync when the inode has been evicted from the vfs cache. Currently we use a bloom filter per outstanding journal buf - but that mechanism has been broken since we added the ability to not issue a flush/fua for every journal write. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 76 +++++++++++++-- fs/bcachefs/alloc_background.h | 26 +++-- fs/bcachefs/bcachefs_format.h | 31 +++++- fs/bcachefs/bkey_methods.c | 4 +- fs/bcachefs/btree_types.h | 7 +- fs/bcachefs/buckets.c | 41 ++++++-- fs/bcachefs/fs.c | 2 +- fs/bcachefs/fsck.c | 58 +++++------ fs/bcachefs/inode.c | 211 ++++++++++++++++++++--------------------- fs/bcachefs/inode.h | 17 +++- fs/bcachefs/io.c | 4 +- fs/bcachefs/move.c | 4 +- fs/bcachefs/quota.c | 5 +- fs/bcachefs/recovery.c | 7 +- 14 files changed, 307 insertions(+), 186 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 3b6af70fa186..10514476cffe 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -147,10 +147,44 @@ static int bch2_alloc_unpack_v2(struct bkey_alloc_unpacked *out, return 0; } -static void bch2_alloc_pack_v2(struct bkey_alloc_buf *dst, +static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out, + struct bkey_s_c k) +{ + struct bkey_s_c_alloc_v3 a = bkey_s_c_to_alloc_v3(k); + const u8 *in = a.v->data; + const u8 *end = bkey_val_end(a); + unsigned fieldnr = 0; + int ret; + u64 v; + + out->gen = a.v->gen; + out->oldest_gen = a.v->oldest_gen; + out->data_type = a.v->data_type; + out->journal_seq = le64_to_cpu(a.v->journal_seq); + +#define x(_name, _bits) \ + if (fieldnr < a.v->nr_fields) { \ + ret = bch2_varint_decode_fast(in, end, &v); \ + if (ret < 0) \ + return ret; \ + in += ret; \ + } else { \ + v = 0; \ + } \ + out->_name = v; \ + if (v != out->_name) \ + return -1; \ + fieldnr++; + + BCH_ALLOC_FIELDS_V2() +#undef x + return 0; +} + +static void bch2_alloc_pack_v3(struct bkey_alloc_buf *dst, const struct bkey_alloc_unpacked src) { - struct bkey_i_alloc_v2 *a = bkey_alloc_v2_init(&dst->k); + struct bkey_i_alloc_v3 *a = bkey_alloc_v3_init(&dst->k); unsigned nr_fields = 0, last_nonzero_fieldnr = 0; u8 *out = a->v.data; u8 *end = (void *) &dst[1]; @@ -161,6 +195,7 @@ static void bch2_alloc_pack_v2(struct bkey_alloc_buf *dst, a->v.gen = src.gen; a->v.oldest_gen = src.oldest_gen; a->v.data_type = src.data_type; + a->v.journal_seq = cpu_to_le64(src.journal_seq); #define x(_name, _bits) \ nr_fields++; \ @@ -194,10 +229,17 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k) .gen = 0, }; - if (k.k->type == KEY_TYPE_alloc_v2) - bch2_alloc_unpack_v2(&ret, k); - else if (k.k->type == KEY_TYPE_alloc) + switch (k.k->type) { + case KEY_TYPE_alloc: bch2_alloc_unpack_v1(&ret, k); + break; + case KEY_TYPE_alloc_v2: + bch2_alloc_unpack_v2(&ret, k); + break; + case KEY_TYPE_alloc_v3: + bch2_alloc_unpack_v3(&ret, k); + break; + } return ret; } @@ -206,7 +248,7 @@ void bch2_alloc_pack(struct bch_fs *c, struct bkey_alloc_buf *dst, const struct bkey_alloc_unpacked src) { - bch2_alloc_pack_v2(dst, src); + bch2_alloc_pack_v3(dst, src); } static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) @@ -249,13 +291,28 @@ const char *bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k) return NULL; } +const char *bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_alloc_unpacked u; + + if (k.k->p.inode >= c->sb.nr_devices || + !c->devs[k.k->p.inode]) + return "invalid device"; + + if (bch2_alloc_unpack_v3(&u, k)) + return "unpack error"; + + return NULL; +} + void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bkey_alloc_unpacked u = bch2_alloc_unpack(k); - pr_buf(out, "gen %u oldest_gen %u data_type %s", - u.gen, u.oldest_gen, bch2_data_types[u.data_type]); + pr_buf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu", + u.gen, u.oldest_gen, bch2_data_types[u.data_type], + u.journal_seq); #define x(_name, ...) pr_buf(out, " " #_name " %llu", (u64) u._name); BCH_ALLOC_FIELDS_V2() #undef x @@ -268,8 +325,7 @@ static int bch2_alloc_read_fn(struct btree_trans *trans, struct bkey_s_c k) struct bucket *g; struct bkey_alloc_unpacked u; - if (k.k->type != KEY_TYPE_alloc && - k.k->type != KEY_TYPE_alloc_v2) + if (!bkey_is_alloc(k.k)) return 0; ca = bch_dev_bkey_exists(c, k.k->p.inode); diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index a4f6bf56b18f..370573f8e05d 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -9,6 +9,7 @@ extern const char * const bch2_allocator_states[]; struct bkey_alloc_unpacked { + u64 journal_seq; u64 bucket; u8 dev; u8 gen; @@ -21,19 +22,11 @@ struct bkey_alloc_unpacked { struct bkey_alloc_buf { struct bkey_i k; + struct bch_alloc_v3 v; - union { - struct { #define x(_name, _bits) + _bits / 8 - u8 _pad[8 + BCH_ALLOC_FIELDS_V1()]; + u8 _pad[0 + BCH_ALLOC_FIELDS_V2()]; #undef x - } _v1; - struct { -#define x(_name, _bits) + 8 + _bits / 8 - u8 _pad[8 + BCH_ALLOC_FIELDS_V2()]; -#undef x - } _v2; - }; } __attribute__((packed, aligned(8))); /* How out of date a pointer gen is allowed to be: */ @@ -79,6 +72,7 @@ alloc_mem_to_key(struct btree_iter *iter, const char *bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c); const char *bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c); +const char *bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_alloc (struct bkey_ops) { \ @@ -91,6 +85,18 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); .val_to_text = bch2_alloc_to_text, \ } +#define bch2_bkey_ops_alloc_v3 (struct bkey_ops) { \ + .key_invalid = bch2_alloc_v3_invalid, \ + .val_to_text = bch2_alloc_to_text, \ +} + +static inline bool bkey_is_alloc(const struct bkey *k) +{ + return k->type == KEY_TYPE_alloc || + k->type == KEY_TYPE_alloc_v2 || + k->type == KEY_TYPE_alloc_v3; +} + int bch2_alloc_read(struct bch_fs *); static inline void bch2_wake_allocator(struct bch_dev *ca) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 8e1423b138a6..21f1948ef8d0 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -348,7 +348,9 @@ static inline void bkey_init(struct bkey *k) x(indirect_inline_data, 19) \ x(alloc_v2, 20) \ x(subvolume, 21) \ - x(snapshot, 22) + x(snapshot, 22) \ + x(inode_v2, 23) \ + x(alloc_v3, 24) enum bch_bkey_type { #define x(name, nr) KEY_TYPE_##name = nr, @@ -685,6 +687,16 @@ struct bch_inode { __u8 fields[0]; } __attribute__((packed, aligned(8))); +struct bch_inode_v2 { + struct bch_val v; + + __le64 bi_journal_seq; + __le64 bi_hash_seed; + __le64 bi_flags; + __le16 bi_mode; + __u8 fields[0]; +} __attribute__((packed, aligned(8))); + struct bch_inode_generation { struct bch_val v; @@ -776,6 +788,9 @@ LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31); LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32); +LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24); +LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31); + /* Dirents */ /* @@ -870,6 +885,17 @@ struct bch_alloc_v2 { x(stripe, 32) \ x(stripe_redundancy, 8) +struct bch_alloc_v3 { + struct bch_val v; + __le64 journal_seq; + __le32 flags; + __u8 nr_fields; + __u8 gen; + __u8 oldest_gen; + __u8 data_type; + __u8 data[]; +} __attribute__((packed, aligned(8))); + enum { #define x(name, _bits) BCH_ALLOC_FIELD_V1_##name, BCH_ALLOC_FIELDS_V1() @@ -1276,7 +1302,8 @@ enum bcachefs_metadata_version { bcachefs_metadata_version_snapshot_2 = 15, bcachefs_metadata_version_reflink_p_fix = 16, bcachefs_metadata_version_subvol_dirent = 17, - bcachefs_metadata_version_max = 18, + bcachefs_metadata_version_inode_v2 = 18, + bcachefs_metadata_version_max = 19, }; #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index f7f4139072b5..c93004741b87 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -113,6 +113,7 @@ static unsigned bch2_key_types_allowed[] = { (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_whiteout)| (1U << KEY_TYPE_inode)| + (1U << KEY_TYPE_inode_v2)| (1U << KEY_TYPE_inode_generation), [BKEY_TYPE_dirents] = (1U << KEY_TYPE_deleted)| @@ -128,7 +129,8 @@ static unsigned bch2_key_types_allowed[] = { [BKEY_TYPE_alloc] = (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_alloc)| - (1U << KEY_TYPE_alloc_v2), + (1U << KEY_TYPE_alloc_v2)| + (1U << KEY_TYPE_alloc_v3), [BKEY_TYPE_quotas] = (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_quota), diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index d8c35ba9ec89..5331626e62a5 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -606,6 +606,7 @@ static inline bool btree_node_is_extents(struct btree *b) #define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS \ ((1U << BKEY_TYPE_alloc)| \ + (1U << BKEY_TYPE_inodes)| \ (1U << BKEY_TYPE_stripes)| \ (1U << BKEY_TYPE_snapshots)) @@ -655,8 +656,12 @@ enum btree_update_flags { #define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC) #define BTREE_TRIGGER_WANTS_OLD_AND_NEW \ - ((1U << KEY_TYPE_stripe)| \ + ((1U << KEY_TYPE_alloc)| \ + (1U << KEY_TYPE_alloc_v2)| \ + (1U << KEY_TYPE_alloc_v3)| \ + (1U << KEY_TYPE_stripe)| \ (1U << KEY_TYPE_inode)| \ + (1U << KEY_TYPE_inode_v2)| \ (1U << KEY_TYPE_snapshot)) static inline bool btree_node_type_needs_gc(enum btree_node_type type) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 6e1837a0fc64..b51b1cf3ca25 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -13,6 +13,7 @@ #include "buckets.h" #include "ec.h" #include "error.h" +#include "inode.h" #include "movinggc.h" #include "recovery.h" #include "reflink.h" @@ -541,8 +542,7 @@ static int bch2_mark_alloc(struct btree_trans *trans, struct bucket_mark old_m, m; /* We don't do anything for deletions - do we?: */ - if (new.k->type != KEY_TYPE_alloc && - new.k->type != KEY_TYPE_alloc_v2) + if (!bkey_is_alloc(new.k)) return 0; /* @@ -552,6 +552,15 @@ static int bch2_mark_alloc(struct btree_trans *trans, !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) return 0; + if (flags & BTREE_TRIGGER_INSERT) { + struct bch_alloc_v3 *v = (struct bch_alloc_v3 *) new.v; + + BUG_ON(!journal_seq); + BUG_ON(new.k->type != KEY_TYPE_alloc_v3); + + v->journal_seq = cpu_to_le64(journal_seq); + } + ca = bch_dev_bkey_exists(c, new.k->p.inode); if (new.k->p.offset >= ca->mi.nbuckets) @@ -1095,12 +1104,24 @@ static int bch2_mark_inode(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bch_fs_usage __percpu *fs_usage; + u64 journal_seq = trans->journal_res.seq; - preempt_disable(); - fs_usage = fs_usage_ptr(c, trans->journal_res.seq, flags & BTREE_TRIGGER_GC); - fs_usage->nr_inodes += new.k->type == KEY_TYPE_inode; - fs_usage->nr_inodes -= old.k->type == KEY_TYPE_inode; - preempt_enable(); + if (flags & BTREE_TRIGGER_INSERT) { + struct bch_inode_v2 *v = (struct bch_inode_v2 *) new.v; + + BUG_ON(!journal_seq); + BUG_ON(new.k->type != KEY_TYPE_inode_v2); + + v->bi_journal_seq = cpu_to_le64(journal_seq); + } + + if (flags & BTREE_TRIGGER_GC) { + preempt_disable(); + fs_usage = fs_usage_ptr(c, journal_seq, flags & BTREE_TRIGGER_GC); + fs_usage->nr_inodes += bkey_is_inode(new.k); + fs_usage->nr_inodes -= bkey_is_inode(old.k); + preempt_enable(); + } return 0; } @@ -1219,6 +1240,7 @@ static int bch2_mark_key_locked(struct btree_trans *trans, switch (k.k->type) { case KEY_TYPE_alloc: case KEY_TYPE_alloc_v2: + case KEY_TYPE_alloc_v3: return bch2_mark_alloc(trans, old, new, flags); case KEY_TYPE_btree_ptr: case KEY_TYPE_btree_ptr_v2: @@ -1228,6 +1250,7 @@ static int bch2_mark_key_locked(struct btree_trans *trans, case KEY_TYPE_stripe: return bch2_mark_stripe(trans, old, new, flags); case KEY_TYPE_inode: + case KEY_TYPE_inode_v2: return bch2_mark_inode(trans, old, new, flags); case KEY_TYPE_reservation: return bch2_mark_reservation(trans, old, new, flags); @@ -1685,8 +1708,7 @@ static int bch2_trans_mark_inode(struct btree_trans *trans, struct bkey_s_c new, unsigned flags) { - int nr = (new.k->type == KEY_TYPE_inode) - - (old.k->type == KEY_TYPE_inode); + int nr = bkey_is_inode(new.k) - bkey_is_inode(old.k); if (nr) { struct replicas_delta_list *d = @@ -1834,6 +1856,7 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old, case KEY_TYPE_stripe: return bch2_trans_mark_stripe(trans, old, new, flags); case KEY_TYPE_inode: + case KEY_TYPE_inode_v2: return bch2_trans_mark_inode(trans, old, new, flags); case KEY_TYPE_reservation: return bch2_trans_mark_reservation(trans, k, flags); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 7647e117013d..64627543fe17 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1207,7 +1207,7 @@ static void bch2_vfs_inode_init(struct bch_fs *c, subvol_inum inum, inode->v.i_size = bi->bi_size; inode->ei_flags = 0; - inode->ei_journal_seq = 0; + inode->ei_journal_seq = bi->bi_journal_seq; inode->ei_quota_reserved = 0; inode->ei_qid = bch_qid(bi); inode->ei_subvol = inum.subvol; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 9519ced976f2..361dbf338023 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -133,7 +133,7 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, goto err; } - ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode); + ret = bch2_inode_unpack(k, inode); err: if (ret && ret != -EINTR) bch_err(trans->c, "error %i fetching inode %llu", @@ -157,8 +157,8 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, if (ret) goto err; - ret = k.k->type == KEY_TYPE_inode - ? bch2_inode_unpack(bkey_s_c_to_inode(k), inode) + ret = bkey_is_inode(k.k) + ? bch2_inode_unpack(k, inode) : -ENOENT; if (!ret) *snapshot = iter.pos.snapshot; @@ -261,7 +261,7 @@ retry: if (ret) goto err; - if (k.k->type != KEY_TYPE_inode) { + if (!bkey_is_inode(k.k)) { bch2_fs_inconsistent(trans->c, "inode %llu:%u not found when deleting", inum, snapshot); @@ -269,7 +269,7 @@ retry: goto err; } - bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u); + bch2_inode_unpack(k, &inode_u); /* Subvolume root? */ if (inode_u.bi_subvol) { @@ -581,7 +581,7 @@ static int inode_walker_realloc(struct inode_walker *w) } static int add_inode(struct bch_fs *c, struct inode_walker *w, - struct bkey_s_c_inode inode) + struct bkey_s_c inode) { struct bch_inode_unpacked u; int ret; @@ -623,8 +623,8 @@ static int __walk_inode(struct btree_trans *trans, if (k.k->p.offset != pos.inode) break; - if (k.k->type == KEY_TYPE_inode) - add_inode(c, w, bkey_s_c_to_inode(k)); + if (bkey_is_inode(k.k)) + add_inode(c, w, k); } bch2_trans_iter_exit(trans, &iter); @@ -676,11 +676,11 @@ static int __get_visible_inodes(struct btree_trans *trans, if (k.k->p.offset != inum) break; - if (k.k->type != KEY_TYPE_inode) + if (!bkey_is_inode(k.k)) continue; if (ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) { - add_inode(c, w, bkey_s_c_to_inode(k)); + add_inode(c, w, k); if (k.k->p.snapshot >= s->pos.snapshot) break; } @@ -805,7 +805,6 @@ static int check_inode(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bkey_s_c k; - struct bkey_s_c_inode inode; struct bch_inode_unpacked u; bool do_update = false; int ret; @@ -830,19 +829,17 @@ static int check_inode(struct btree_trans *trans, if (bch2_snapshot_internal_node(c, k.k->p.snapshot)) return 0; - if (k.k->type != KEY_TYPE_inode) + if (!bkey_is_inode(k.k)) return 0; - inode = bkey_s_c_to_inode(k); + BUG_ON(bch2_inode_unpack(k, &u)); if (!full && - !(inode.v->bi_flags & (BCH_INODE_I_SIZE_DIRTY| - BCH_INODE_I_SECTORS_DIRTY| - BCH_INODE_UNLINKED))) + !(u.bi_flags & (BCH_INODE_I_SIZE_DIRTY| + BCH_INODE_I_SECTORS_DIRTY| + BCH_INODE_UNLINKED))) return 0; - BUG_ON(bch2_inode_unpack(inode, &u)); - if (prev->bi_inum != u.bi_inum) *prev = u; @@ -1963,10 +1960,10 @@ static int check_directory_structure(struct bch_fs *c) BTREE_ITER_INTENT| BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - if (k.k->type != KEY_TYPE_inode) + if (!bkey_is_inode(k.k)) continue; - ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u); + ret = bch2_inode_unpack(k, &u); if (ret) { /* Should have been caught earlier in fsck: */ bch_err(c, "error unpacking inode %llu: %i", k.k->p.offset, ret); @@ -2070,7 +2067,6 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - struct bkey_s_c_inode inode; struct bch_inode_unpacked u; int ret = 0; @@ -2081,21 +2077,19 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, BTREE_ITER_INTENT| BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - if (k.k->type != KEY_TYPE_inode) + if (!bkey_is_inode(k.k)) continue; - inode = bkey_s_c_to_inode(k); + /* Should never fail, checked by bch2_inode_invalid: */ + BUG_ON(bch2_inode_unpack(k, &u)); /* * Backpointer and directory structure checks are sufficient for * directories, since they can't have hardlinks: */ - if (S_ISDIR(le16_to_cpu(inode.v->bi_mode))) + if (S_ISDIR(le16_to_cpu(u.bi_mode))) continue; - /* Should never fail, checked by bch2_inode_invalid: */ - BUG_ON(bch2_inode_unpack(inode, &u)); - if (!u.bi_nlink) continue; @@ -2169,7 +2163,6 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - struct bkey_s_c_inode inode; struct bch_inode_unpacked u; struct nlink *link = links->d; int ret = 0; @@ -2184,14 +2177,13 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, if (k.k->p.offset >= range_end) break; - if (k.k->type != KEY_TYPE_inode) + if (!bkey_is_inode(k.k)) continue; - inode = bkey_s_c_to_inode(k); - if (S_ISDIR(le16_to_cpu(inode.v->bi_mode))) - continue; + BUG_ON(bch2_inode_unpack(k, &u)); - BUG_ON(bch2_inode_unpack(inode, &u)); + if (S_ISDIR(le16_to_cpu(u.bi_mode))) + continue; if (!u.bi_nlink) continue; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 3ae321a99cee..728545141a39 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -35,29 +35,6 @@ static const u8 bits_table[8] = { 13 * 8 - 8, }; -static int inode_encode_field(u8 *out, u8 *end, u64 hi, u64 lo) -{ - __be64 in[2] = { cpu_to_be64(hi), cpu_to_be64(lo), }; - unsigned shift, bytes, bits = likely(!hi) - ? fls64(lo) - : fls64(hi) + 64; - - for (shift = 1; shift <= 8; shift++) - if (bits < bits_table[shift - 1]) - goto got_shift; - - BUG(); -got_shift: - bytes = byte_table[shift - 1]; - - BUG_ON(out + bytes > end); - - memcpy(out, (u8 *) in + 16 - bytes, bytes); - *out |= (1 << 8) >> shift; - - return bytes; -} - static int inode_decode_field(const u8 *in, const u8 *end, u64 out[2], unsigned *out_bits) { @@ -92,42 +69,11 @@ static int inode_decode_field(const u8 *in, const u8 *end, return bytes; } -static noinline void bch2_inode_pack_v1(struct bkey_inode_buf *packed, - const struct bch_inode_unpacked *inode) -{ - struct bkey_i_inode *k = &packed->inode; - u8 *out = k->v.fields; - u8 *end = (void *) &packed[1]; - u8 *last_nonzero_field = out; - unsigned nr_fields = 0, last_nonzero_fieldnr = 0; - unsigned bytes; - -#define x(_name, _bits) \ - out += inode_encode_field(out, end, 0, inode->_name); \ - nr_fields++; \ - \ - if (inode->_name) { \ - last_nonzero_field = out; \ - last_nonzero_fieldnr = nr_fields; \ - } - - BCH_INODE_FIELDS() -#undef x - - out = last_nonzero_field; - nr_fields = last_nonzero_fieldnr; - - bytes = out - (u8 *) &packed->inode.v; - set_bkey_val_bytes(&packed->inode.k, bytes); - memset_u64s_tail(&packed->inode.v, 0, bytes); - - SET_INODE_NR_FIELDS(&k->v, nr_fields); -} - -static void bch2_inode_pack_v2(struct bkey_inode_buf *packed, - const struct bch_inode_unpacked *inode) +void bch2_inode_pack(struct bch_fs *c, + struct bkey_inode_buf *packed, + const struct bch_inode_unpacked *inode) { - struct bkey_i_inode *k = &packed->inode; + struct bkey_i_inode_v2 *k = &packed->inode; u8 *out = k->v.fields; u8 *end = (void *) &packed[1]; u8 *last_nonzero_field = out; @@ -135,6 +81,14 @@ static void bch2_inode_pack_v2(struct bkey_inode_buf *packed, unsigned bytes; int ret; + bkey_inode_v2_init(&packed->inode.k_i); + packed->inode.k.p.offset = inode->bi_inum; + packed->inode.v.bi_journal_seq = cpu_to_le64(inode->bi_journal_seq); + packed->inode.v.bi_hash_seed = inode->bi_hash_seed; + packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags); + packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags); + packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode); + #define x(_name, _bits) \ nr_fields++; \ \ @@ -165,30 +119,12 @@ static void bch2_inode_pack_v2(struct bkey_inode_buf *packed, set_bkey_val_bytes(&packed->inode.k, bytes); memset_u64s_tail(&packed->inode.v, 0, bytes); - SET_INODE_NR_FIELDS(&k->v, nr_fields); -} - -void bch2_inode_pack(struct bch_fs *c, - struct bkey_inode_buf *packed, - const struct bch_inode_unpacked *inode) -{ - bkey_inode_init(&packed->inode.k_i); - packed->inode.k.p.offset = inode->bi_inum; - packed->inode.v.bi_hash_seed = inode->bi_hash_seed; - packed->inode.v.bi_flags = cpu_to_le32(inode->bi_flags); - packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode); - - if (c->sb.features & (1ULL << BCH_FEATURE_new_varint)) { - SET_INODE_NEW_VARINT(&packed->inode.v, true); - bch2_inode_pack_v2(packed, inode); - } else { - bch2_inode_pack_v1(packed, inode); - } + SET_INODEv2_NR_FIELDS(&k->v, nr_fields); if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { struct bch_inode_unpacked unpacked; - int ret = bch2_inode_unpack(inode_i_to_s_c(&packed->inode), + int ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i), &unpacked); BUG_ON(ret); BUG_ON(unpacked.bi_inum != inode->bi_inum); @@ -237,17 +173,16 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode, return 0; } -static int bch2_inode_unpack_v2(struct bkey_s_c_inode inode, - struct bch_inode_unpacked *unpacked) +static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked, + const u8 *in, const u8 *end, + unsigned nr_fields) { - const u8 *in = inode.v->fields; - const u8 *end = bkey_val_end(inode); unsigned fieldnr = 0; int ret; u64 v[2]; #define x(_name, _bits) \ - if (fieldnr < INODE_NR_FIELDS(inode.v)) { \ + if (fieldnr < nr_fields) { \ ret = bch2_varint_decode_fast(in, end, &v[0]); \ if (ret < 0) \ return ret; \ @@ -277,21 +212,43 @@ static int bch2_inode_unpack_v2(struct bkey_s_c_inode inode, return 0; } -int bch2_inode_unpack(struct bkey_s_c_inode inode, +int bch2_inode_unpack(struct bkey_s_c k, struct bch_inode_unpacked *unpacked) { - unpacked->bi_inum = inode.k->p.offset; - unpacked->bi_hash_seed = inode.v->bi_hash_seed; - unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags); - unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode); - - if (INODE_NEW_VARINT(inode.v)) { - return bch2_inode_unpack_v2(inode, unpacked); - } else { - return bch2_inode_unpack_v1(inode, unpacked); + switch (k.k->type) { + case KEY_TYPE_inode: { + struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); + + unpacked->bi_inum = inode.k->p.offset; + unpacked->bi_hash_seed = inode.v->bi_hash_seed; + unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags); + unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode); + + if (INODE_NEW_VARINT(inode.v)) { + return bch2_inode_unpack_v2(unpacked, inode.v->fields, + bkey_val_end(inode), + INODE_NR_FIELDS(inode.v)); + } else { + return bch2_inode_unpack_v1(inode, unpacked); + } + break; + } + case KEY_TYPE_inode_v2: { + struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); + + unpacked->bi_inum = inode.k->p.offset; + unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq); + unpacked->bi_hash_seed = inode.v->bi_hash_seed; + unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags); + unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode); + + return bch2_inode_unpack_v2(unpacked, inode.v->fields, + bkey_val_end(inode), + INODEv2_NR_FIELDS(inode.v)); + } + default: + BUG(); } - - return 0; } int bch2_inode_peek(struct btree_trans *trans, @@ -317,11 +274,11 @@ int bch2_inode_peek(struct btree_trans *trans, if (ret) goto err; - ret = k.k->type == KEY_TYPE_inode ? 0 : -ENOENT; + ret = bkey_is_inode(k.k) ? 0 : -ENOENT; if (ret) goto err; - ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode); + ret = bch2_inode_unpack(k, inode); if (ret) goto err; @@ -363,7 +320,43 @@ const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k) if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR) return "invalid str hash type"; - if (bch2_inode_unpack(inode, &unpacked)) + if (bch2_inode_unpack(k, &unpacked)) + return "invalid variable length fields"; + + if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1) + return "invalid data checksum type"; + + if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1) + return "invalid data checksum type"; + + if ((unpacked.bi_flags & BCH_INODE_UNLINKED) && + unpacked.bi_nlink != 0) + return "flagged as unlinked but bi_nlink != 0"; + + if (unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode)) + return "subvolume root but not a directory"; + + return NULL; +} + +const char *bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); + struct bch_inode_unpacked unpacked; + + if (k.k->p.inode) + return "nonzero k.p.inode"; + + if (bkey_val_bytes(k.k) < sizeof(struct bch_inode)) + return "incorrect value size"; + + if (k.k->p.offset < BLOCKDEV_INODE_MAX) + return "fs inode in blockdev range"; + + if (INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR) + return "invalid str hash type"; + + if (bch2_inode_unpack(k, &unpacked)) return "invalid variable length fields"; if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1) @@ -384,10 +377,12 @@ const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k) static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) { - pr_buf(out, "mode %o flags %x ", inode->bi_mode, inode->bi_flags); + pr_buf(out, "mode %o flags %x journal_seq %llu", + inode->bi_mode, inode->bi_flags, + inode->bi_journal_seq); #define x(_name, _bits) \ - pr_buf(out, #_name " %llu ", (u64) inode->_name); + pr_buf(out, " "#_name " %llu", (u64) inode->_name); BCH_INODE_FIELDS() #undef x } @@ -401,15 +396,14 @@ void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); - struct bch_inode_unpacked unpacked; + struct bch_inode_unpacked inode; - if (bch2_inode_unpack(inode, &unpacked)) { + if (bch2_inode_unpack(k, &inode)) { pr_buf(out, "(unpack error)"); return; } - __bch2_inode_unpacked_to_text(out, &unpacked); + __bch2_inode_unpacked_to_text(out, &inode); } const char *bch2_inode_generation_invalid(const struct bch_fs *c, @@ -485,6 +479,7 @@ static inline u32 bkey_generation(struct bkey_s_c k) { switch (k.k->type) { case KEY_TYPE_inode: + case KEY_TYPE_inode_v2: BUG(); case KEY_TYPE_inode_generation: return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation); @@ -542,7 +537,7 @@ again: } if (k.k->p.snapshot == snapshot && - k.k->type != KEY_TYPE_inode && + !bkey_is_inode(k.k) && !bch2_btree_key_cache_find(c, BTREE_ID_inodes, SPOS(0, pos, snapshot))) { bch2_btree_iter_advance(iter); continue; @@ -585,7 +580,7 @@ found_slot: } /* We may have raced while the iterator wasn't pointing at pos: */ - if (k.k->type == KEY_TYPE_inode || + if (bkey_is_inode(k.k) || bch2_btree_key_cache_find(c, BTREE_ID_inodes, k.k->p)) goto again; @@ -698,7 +693,7 @@ retry: if (ret) goto err; - if (k.k->type != KEY_TYPE_inode) { + if (!bkey_is_inode(k.k)) { bch2_fs_inconsistent(trans.c, "inode %llu not found when deleting", inum.inum); @@ -706,7 +701,7 @@ retry: goto err; } - bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u); + bch2_inode_unpack(k, &inode_u); /* Subvolume root? */ BUG_ON(inode_u.bi_subvol); diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 009b807cc167..d433d48de4e0 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -7,6 +7,7 @@ extern const char * const bch2_inode_opts[]; const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c); +const char *bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_inode (struct bkey_ops) { \ @@ -14,6 +15,17 @@ void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); .val_to_text = bch2_inode_to_text, \ } +#define bch2_bkey_ops_inode_v2 (struct bkey_ops) { \ + .key_invalid = bch2_inode_v2_invalid, \ + .val_to_text = bch2_inode_to_text, \ +} + +static inline bool bkey_is_inode(const struct bkey *k) +{ + return k->type == KEY_TYPE_inode || + k->type == KEY_TYPE_inode_v2; +} + const char *bch2_inode_generation_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, @@ -34,6 +46,7 @@ typedef u64 u96; struct bch_inode_unpacked { u64 bi_inum; + u64 bi_journal_seq; __le64 bi_hash_seed; u32 bi_flags; u16 bi_mode; @@ -44,7 +57,7 @@ struct bch_inode_unpacked { }; struct bkey_inode_buf { - struct bkey_i_inode inode; + struct bkey_i_inode_v2 inode; #define x(_name, _bits) + 8 + _bits / 8 u8 _pad[0 + BCH_INODE_FIELDS()]; @@ -53,7 +66,7 @@ struct bkey_inode_buf { void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *, const struct bch_inode_unpacked *); -int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *); +int bch2_inode_unpack(struct bkey_s_c, struct bch_inode_unpacked *); void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *); diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index ca4e7a5a64b9..0a9cb4d489f4 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -337,12 +337,12 @@ int bch2_extent_update(struct btree_trans *trans, if (ret) goto err; - ret = inode.k->type == KEY_TYPE_inode ? 0 : -ENOENT; + ret = bkey_is_inode(inode.k) ? 0 : -ENOENT; if (ret) goto err; if (i_sectors_delta || new_i_size) { - ret = bch2_inode_unpack(bkey_s_c_to_inode(inode), &inode_u); + ret = bch2_inode_unpack(inode, &inode_u); if (ret) goto err; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 2f260360b089..249d0b2be167 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -623,11 +623,11 @@ static int lookup_inode(struct btree_trans *trans, struct bpos pos, goto err; } - ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO; + ret = bkey_is_inode(k.k) ? 0 : -EIO; if (ret) goto err; - ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode); + ret = bch2_inode_unpack(k, inode); if (ret) goto err; err: diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 17fd5bf107bb..5f1216da76d0 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -439,9 +439,8 @@ int bch2_fs_quota_read(struct bch_fs *c) for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { - switch (k.k->type) { - case KEY_TYPE_inode: - ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u); + if (bkey_is_inode(k.k)) { + ret = bch2_inode_unpack(k, &u); if (ret) return ret; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 29fae6dbce76..d8e511a0664e 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1015,13 +1015,13 @@ static int bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) if (ret) goto err; - if (k.k->type != KEY_TYPE_inode) { + if (!bkey_is_inode(k.k)) { bch_err(c, "root inode not found"); ret = -ENOENT; goto err; } - ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &inode); + ret = bch2_inode_unpack(k, &inode); BUG_ON(ret); inode.bi_subvol = BCACHEFS_ROOT_SUBVOL; @@ -1093,6 +1093,9 @@ int bch2_fs_recovery(struct bch_fs *c) bch_info(c, "filesystem version is prior to subvol_dirent - upgrading"); c->opts.version_upgrade = true; c->opts.fsck = true; + } else if (c->sb.version < bcachefs_metadata_version_inode_v2) { + bch_info(c, "filesystem version is prior to inode_v2 - upgrading"); + c->opts.version_upgrade = true; } } -- cgit v1.2.3 From 0e030f5e2014bf9a33e977820cf64fce4258cf1d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 3 Nov 2021 22:33:32 -0400 Subject: bcachefs: Kill journal buf bloom filter This was used for recording which inodes have been modified by in flight journal writes, but was broken and has been superceded. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_leaf.c | 3 --- fs/bcachefs/fs.c | 4 ---- fs/bcachefs/journal.c | 51 ----------------------------------------- fs/bcachefs/journal.h | 15 ------------ fs/bcachefs/journal_types.h | 2 -- 5 files changed, 75 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index b9c93182f2de..4e9f7e3b5a61 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -363,9 +363,6 @@ static inline void do_btree_insert_one(struct btree_trans *trans, i->level, i->k); - bch2_journal_set_has_inode(j, &trans->journal_res, - i->k->k.p.inode); - if (trans->journal_seq) *trans->journal_seq = trans->journal_res.seq; } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 64627543fe17..12178bd15c34 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -58,8 +58,6 @@ static void journal_seq_copy(struct bch_fs *c, if (old >= journal_seq) break; } while ((v = atomic64_cmpxchg(dst_seq, old, journal_seq)) != old); - - bch2_journal_set_has_inum(&c->journal, dst->v.i_ino, journal_seq); } static void __pagecache_lock_put(struct pagecache_lock *lock, long i) @@ -258,8 +256,6 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) bch2_vfs_inode_init(c, inum, inode, &inode_u); - inode->ei_journal_seq = bch2_inode_journal_seq(&c->journal, inum.inum); - unlock_new_inode(&inode->v); return &inode->v; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index f72e3124d351..1abd1ac560e6 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -87,8 +87,6 @@ static void bch2_journal_buf_init(struct journal *j) buf->must_flush = false; buf->separate_flush = false; - memset(buf->has_inode, 0, sizeof(buf->has_inode)); - memset(buf->data, 0, sizeof(*buf->data)); buf->data->seq = cpu_to_le64(journal_cur_seq(j)); buf->data->u64s = 0; @@ -334,55 +332,6 @@ static void journal_write_work(struct work_struct *work) journal_entry_close(j); } -/* - * Given an inode number, if that inode number has data in the journal that - * hasn't yet been flushed, return the journal sequence number that needs to be - * flushed: - */ -u64 bch2_inode_journal_seq(struct journal *j, u64 inode) -{ - size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8)); - union journal_res_state s; - unsigned i; - u64 seq; - - - spin_lock(&j->lock); - seq = journal_cur_seq(j); - s = READ_ONCE(j->reservations); - i = s.idx; - - while (1) { - if (test_bit(h, j->buf[i].has_inode)) - goto out; - - if (i == s.unwritten_idx) - break; - - i = (i - 1) & JOURNAL_BUF_MASK; - seq--; - } - - seq = 0; -out: - spin_unlock(&j->lock); - - return seq; -} - -void bch2_journal_set_has_inum(struct journal *j, u64 inode, u64 seq) -{ - size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8)); - struct journal_buf *buf; - - spin_lock(&j->lock); - - if ((buf = journal_seq_to_buf(j, seq))) - set_bit(h, buf->has_inode); - - spin_unlock(&j->lock); -} - static int __journal_res_get(struct journal *j, struct journal_res *res, unsigned flags) { diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 99fd253648bf..2cfb6c7f0d14 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -141,9 +141,6 @@ static inline u64 journal_cur_seq(struct journal *j) return j->pin.back - 1; } -u64 bch2_inode_journal_seq(struct journal *, u64); -void bch2_journal_set_has_inum(struct journal *, u64, u64); - static inline int journal_state_count(union journal_res_state s, int idx) { switch (idx) { @@ -163,18 +160,6 @@ static inline void journal_state_inc(union journal_res_state *s) s->buf3_count += s->idx == 3; } -static inline void bch2_journal_set_has_inode(struct journal *j, - struct journal_res *res, - u64 inum) -{ - struct journal_buf *buf = &j->buf[res->idx]; - unsigned long bit = hash_64(inum, ilog2(sizeof(buf->has_inode) * 8)); - - /* avoid atomic op if possible */ - if (unlikely(!test_bit(bit, buf->has_inode))) - set_bit(bit, buf->has_inode); -} - /* * Amount of space that will be taken up by some keys in the journal (i.e. * including the jset header) diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 0647a53eb35c..0fc6569ef149 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -34,8 +34,6 @@ struct journal_buf { bool noflush; /* write has already been kicked off, and was noflush */ bool must_flush; /* something wants a flush */ bool separate_flush; - /* bloom filter: */ - unsigned long has_inode[1024 / sizeof(unsigned long)]; }; /* -- cgit v1.2.3 From 68a2054d88f7cd2866806148d9a2e4389eb46992 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 5 Nov 2021 15:17:13 -0400 Subject: bcachefs: Switch fsync to use bi_journal_seq Now that we're recording in each inode the journal sequence number of the most recent update, fsync becomes a lot simpler and we can delete all the plumbing for ei_journal_seq. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 3 +-- fs/bcachefs/fs-io.c | 58 +++++++++++++++++++++++++-------------------------- fs/bcachefs/fs.c | 52 +++++++-------------------------------------- fs/bcachefs/fs.h | 1 - fs/bcachefs/io.c | 9 ++++---- fs/bcachefs/io.h | 10 ++------- fs/bcachefs/reflink.c | 8 +++---- fs/bcachefs/reflink.h | 2 +- fs/bcachefs/xattr.c | 18 +++++++++++++++- 9 files changed, 65 insertions(+), 96 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 2afa15b26700..51a0b48a5313 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -330,8 +330,7 @@ retry: inode_u.bi_mode = mode; ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, - &inode->ei_journal_seq, 0); + bch2_trans_commit(&trans, NULL, NULL, 0); btree_err: bch2_trans_iter_exit(&trans, &inode_iter); diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index f4c97fc0e3d1..7de6b7a7aa60 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -1096,7 +1096,6 @@ static void bch2_writepage_io_alloc(struct bch_fs *c, op = &w->io->op; bch2_write_op_init(op, c, w->opts); op->target = w->opts.foreground_target; - op_journal_seq_set(op, &inode->ei_journal_seq); op->nr_replicas = nr_replicas; op->res.nr_replicas = nr_replicas; op->write_point = writepoint_hashed(inode->ei_last_dirtied); @@ -1947,7 +1946,6 @@ static long bch2_dio_write_loop(struct dio_write *dio) bch2_write_op_init(&dio->op, c, io_opts(c, &inode->ei_inode)); dio->op.end_io = bch2_dio_write_loop_async; dio->op.target = dio->op.opts.foreground_target; - op_journal_seq_set(&dio->op, &inode->ei_journal_seq); dio->op.write_point = writepoint_hashed((unsigned long) current); dio->op.nr_replicas = dio->op.opts.data_replicas; dio->op.subvol = inode->ei_subvol; @@ -2164,29 +2162,36 @@ unlock: /* fsync: */ -int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) +/* + * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an + * insert trigger: look up the btree inode instead + */ +static int bch2_flush_inode(struct bch_fs *c, subvol_inum inum) { - struct bch_inode_info *inode = file_bch_inode(file); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - int ret, ret2; + struct bch_inode_unpacked inode; + int ret; - ret = file_write_and_wait_range(file, start, end); + if (c->opts.journal_flush_disabled) + return 0; + + ret = bch2_inode_find_by_inum(c, inum, &inode); if (ret) return ret; - if (datasync && !(inode->v.i_state & I_DIRTY_DATASYNC)) - goto out; + return bch2_journal_flush_seq(&c->journal, inode.bi_journal_seq); +} - ret = sync_inode_metadata(&inode->v, 1); - if (ret) - return ret; -out: - if (!c->opts.journal_flush_disabled) - ret = bch2_journal_flush_seq(&c->journal, - inode->ei_journal_seq); - ret2 = file_check_and_advance_wb_err(file); +int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + struct bch_inode_info *inode = file_bch_inode(file); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + int ret, ret2, ret3; + + ret = file_write_and_wait_range(file, start, end); + ret2 = sync_inode_metadata(&inode->v, 1); + ret3 = bch2_flush_inode(c, inode_inum(inode)); - return ret ?: ret2; + return ret ?: ret2 ?: ret3; } /* truncate: */ @@ -2448,7 +2453,7 @@ int bch2_truncate(struct mnt_idmap *idmap, ret = bch2_fpunch(c, inode_inum(inode), round_up(iattr->ia_size, block_bytes(c)) >> 9, - U64_MAX, &inode->ei_journal_seq, &i_sectors_delta); + U64_MAX, &i_sectors_delta); i_sectors_acct(c, inode, NULL, i_sectors_delta); if (unlikely(ret)) @@ -2508,7 +2513,6 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len ret = bch2_fpunch(c, inode_inum(inode), discard_start, discard_end, - &inode->ei_journal_seq, &i_sectors_delta); i_sectors_acct(c, inode, NULL, i_sectors_delta); } @@ -2587,7 +2591,6 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, ret = bch2_fpunch(c, inode_inum(inode), offset >> 9, (offset + len) >> 9, - &inode->ei_journal_seq, &i_sectors_delta); i_sectors_acct(c, inode, NULL, i_sectors_delta); @@ -2691,8 +2694,7 @@ reassemble: ret = bch2_btree_iter_traverse(&del) ?: bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: - bch2_trans_commit(&trans, &disk_res, - &inode->ei_journal_seq, + bch2_trans_commit(&trans, &disk_res, NULL, BTREE_INSERT_NOFAIL); bch2_disk_reservation_put(c, &disk_res); @@ -2803,7 +2805,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, ret = bch2_extent_update(&trans, inode_inum(inode), &iter, &reservation.k_i, - &disk_res, &inode->ei_journal_seq, + &disk_res, NULL, 0, &i_sectors_delta, true); i_sectors_acct(c, inode, "a_res, i_sectors_delta); bkey_err: @@ -3003,7 +3005,6 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, inode_inum(dst), pos_dst >> 9, inode_inum(src), pos_src >> 9, aligned_len >> 9, - &dst->ei_journal_seq, pos_dst + len, &i_sectors_delta); if (ret < 0) goto err; @@ -3021,10 +3022,9 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, i_size_write(&dst->v, pos_dst + ret); spin_unlock(&dst->v.i_lock); - if (((file_dst->f_flags & (__O_SYNC | O_DSYNC)) || - IS_SYNC(file_inode(file_dst))) && - !c->opts.journal_flush_disabled) - ret = bch2_journal_flush_seq(&c->journal, dst->ei_journal_seq); + if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) || + IS_SYNC(file_inode(file_dst))) + ret = bch2_flush_inode(c, inode_inum(dst)); err: bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 12178bd15c34..92919b16f2f5 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -41,25 +41,6 @@ static void bch2_vfs_inode_init(struct bch_fs *, subvol_inum, struct bch_inode_info *, struct bch_inode_unpacked *); -static void journal_seq_copy(struct bch_fs *c, - struct bch_inode_info *dst, - u64 journal_seq) -{ - /* - * atomic64_cmpxchg has a fallback for archs that don't support it, - * cmpxchg does not: - */ - atomic64_t *dst_seq = (void *) &dst->ei_journal_seq; - u64 old, v = READ_ONCE(dst->ei_journal_seq); - - do { - old = v; - - if (old >= journal_seq) - break; - } while ((v = atomic64_cmpxchg(dst_seq, old, journal_seq)) != old); -} - static void __pagecache_lock_put(struct pagecache_lock *lock, long i) { BUG_ON(atomic_long_read(&lock->v) == 0); @@ -152,9 +133,7 @@ retry: BTREE_ITER_INTENT) ?: (set ? set(inode, &inode_u, p) : 0) ?: bch2_inode_write(&trans, &iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, - &inode->ei_journal_seq, - BTREE_INSERT_NOFAIL); + bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); /* * the btree node lock protects inode->ei_inode, not ei_update_lock; @@ -329,7 +308,6 @@ err_before_quota: if (!(flags & BCH_CREATE_TMPFILE)) { bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(c, dir, journal_seq); mutex_unlock(&dir->ei_update_lock); } @@ -337,7 +315,6 @@ err_before_quota: inum.inum = inode_u.bi_inum; bch2_vfs_inode_init(c, inum, inode, &inode_u); - journal_seq_copy(c, inode, journal_seq); set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl); @@ -362,7 +339,6 @@ err_before_quota: * We raced, another process pulled the new inode into cache * before us: */ - journal_seq_copy(c, old, journal_seq); make_bad_inode(&inode->v); iput(&inode->v); @@ -446,7 +422,7 @@ static int __bch2_link(struct bch_fs *c, mutex_lock(&inode->ei_update_lock); bch2_trans_init(&trans, c, 4, 1024); - ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, 0, + ret = __bch2_trans_do(&trans, NULL, NULL, 0, bch2_link_trans(&trans, inode_inum(dir), &dir_u, inode_inum(inode), &inode_u, @@ -455,7 +431,6 @@ static int __bch2_link(struct bch_fs *c, if (likely(!ret)) { BUG_ON(inode_u.bi_inum != inode->v.i_ino); - journal_seq_copy(c, inode, dir->ei_journal_seq); bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME); @@ -498,7 +473,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); bch2_trans_init(&trans, c, 4, 1024); - ret = __bch2_trans_do(&trans, NULL, &dir->ei_journal_seq, + ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, bch2_unlink_trans(&trans, inode_inum(dir), &dir_u, @@ -508,7 +483,6 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, if (likely(!ret)) { BUG_ON(inode_u.bi_inum != inode->v.i_ino); - journal_seq_copy(c, inode, dir->ei_journal_seq); bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); bch2_inode_update_after_write(c, inode, &inode_u, @@ -550,8 +524,6 @@ static int bch2_symlink(struct mnt_idmap *idmap, if (unlikely(ret)) goto err; - journal_seq_copy(c, dir, inode->ei_journal_seq); - ret = __bch2_link(c, inode, dir, dentry); if (unlikely(ret)) goto err; @@ -586,7 +558,6 @@ static int bch2_rename2(struct mnt_idmap *idmap, ? BCH_RENAME_EXCHANGE : dst_dentry->d_inode ? BCH_RENAME_OVERWRITE : BCH_RENAME; - u64 journal_seq = 0; int ret; if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE)) @@ -626,7 +597,7 @@ static int bch2_rename2(struct mnt_idmap *idmap, goto err; } - ret = __bch2_trans_do(&trans, NULL, &journal_seq, 0, + ret = __bch2_trans_do(&trans, NULL, NULL, 0, bch2_rename_trans(&trans, inode_inum(src_dir), &src_dir_u, inode_inum(dst_dir), &dst_dir_u, @@ -644,23 +615,17 @@ static int bch2_rename2(struct mnt_idmap *idmap, bch2_inode_update_after_write(c, src_dir, &src_dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(c, src_dir, journal_seq); - if (src_dir != dst_dir) { + if (src_dir != dst_dir) bch2_inode_update_after_write(c, dst_dir, &dst_dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(c, dst_dir, journal_seq); - } bch2_inode_update_after_write(c, src_inode, &src_inode_u, ATTR_CTIME); - journal_seq_copy(c, src_inode, journal_seq); - if (dst_inode) { + if (dst_inode) bch2_inode_update_after_write(c, dst_inode, &dst_inode_u, ATTR_CTIME); - journal_seq_copy(c, dst_inode, journal_seq); - } err: bch2_trans_exit(&trans); @@ -767,8 +732,7 @@ retry: } ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, - &inode->ei_journal_seq, + bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); btree_err: bch2_trans_iter_exit(&trans, &inode_iter); @@ -1203,7 +1167,6 @@ static void bch2_vfs_inode_init(struct bch_fs *c, subvol_inum inum, inode->v.i_size = bi->bi_size; inode->ei_flags = 0; - inode->ei_journal_seq = bi->bi_journal_seq; inode->ei_quota_reserved = 0; inode->ei_qid = bch_qid(bi); inode->ei_subvol = inum.subvol; @@ -1242,7 +1205,6 @@ static struct inode *bch2_alloc_inode(struct super_block *sb) mutex_init(&inode->ei_update_lock); pagecache_lock_init(&inode->ei_pagecache_lock); mutex_init(&inode->ei_quota_lock); - inode->ei_journal_seq = 0; return &inode->v; } diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 38c04282da64..1c8936df9fbb 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -36,7 +36,6 @@ struct bch_inode_info { unsigned long ei_flags; struct mutex ei_update_lock; - u64 ei_journal_seq; u64 ei_quota_reserved; unsigned long ei_last_dirtied; struct pagecache_lock ei_pagecache_lock; diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 0a9cb4d489f4..dc41286c229e 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -393,7 +393,7 @@ err: */ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, subvol_inum inum, u64 end, - u64 *journal_seq, s64 *i_sectors_delta) + s64 *i_sectors_delta) { struct bch_fs *c = trans->c; unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); @@ -431,7 +431,7 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, bch2_cut_back(end_pos, &delete); ret = bch2_extent_update(trans, inum, iter, &delete, - &disk_res, journal_seq, + &disk_res, NULL, 0, i_sectors_delta, false); bch2_disk_reservation_put(c, &disk_res); btree_err: @@ -450,7 +450,7 @@ btree_err: } int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, - u64 *journal_seq, s64 *i_sectors_delta) + s64 *i_sectors_delta) { struct btree_trans trans; struct btree_iter iter; @@ -461,8 +461,7 @@ int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, POS(inum.inum, start), BTREE_ITER_INTENT); - ret = bch2_fpunch_at(&trans, &iter, inum, end, - journal_seq, i_sectors_delta); + ret = bch2_fpunch_at(&trans, &iter, inum, end, i_sectors_delta); bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h index ebb0944b4ca3..8be77561badb 100644 --- a/fs/bcachefs/io.h +++ b/fs/bcachefs/io.h @@ -68,12 +68,6 @@ static inline u64 *op_journal_seq(struct bch_write_op *op) ? op->journal_seq_p : &op->journal_seq; } -static inline void op_journal_seq_set(struct bch_write_op *op, u64 *journal_seq) -{ - op->journal_seq_p = journal_seq; - op->flags |= BCH_WRITE_JOURNAL_SEQ_PTR; -} - static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op) { return op->alloc_reserve == RESERVE_MOVINGGC @@ -88,8 +82,8 @@ int bch2_extent_update(struct btree_trans *, subvol_inum, struct disk_reservation *, u64 *, u64, s64 *, bool); int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, - subvol_inum, u64, u64 *, s64 *); -int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, u64 *, s64 *); + subvol_inum, u64, s64 *); +int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c, struct bch_io_opts opts) diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 8e66e6390e62..d003f4088dfc 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -210,7 +210,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) s64 bch2_remap_range(struct bch_fs *c, subvol_inum dst_inum, u64 dst_offset, subvol_inum src_inum, u64 src_offset, - u64 remap_sectors, u64 *journal_seq, + u64 remap_sectors, u64 new_i_size, s64 *i_sectors_delta) { struct btree_trans trans; @@ -281,7 +281,7 @@ s64 bch2_remap_range(struct bch_fs *c, min(dst_end.offset, dst_iter.pos.offset + src_iter.pos.offset - src_want.offset), - journal_seq, i_sectors_delta); + i_sectors_delta); continue; } @@ -320,7 +320,7 @@ s64 bch2_remap_range(struct bch_fs *c, dst_end.offset - dst_iter.pos.offset)); ret = bch2_extent_update(&trans, dst_inum, &dst_iter, - new_dst.k, &disk_res, journal_seq, + new_dst.k, &disk_res, NULL, new_i_size, i_sectors_delta, true); bch2_disk_reservation_put(c, &disk_res); @@ -347,7 +347,7 @@ s64 bch2_remap_range(struct bch_fs *c, inode_u.bi_size < new_i_size) { inode_u.bi_size = new_i_size; ret2 = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, journal_seq, 0); + bch2_trans_commit(&trans, NULL, NULL, 0); } bch2_trans_iter_exit(&trans, &inode_iter); diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index 4c1b82860b0b..3745873fd88d 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -58,6 +58,6 @@ static inline __le64 *bkey_refcount(struct bkey_i *k) } s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64, - subvol_inum, u64, u64, u64 *, u64, s64 *); + subvol_inum, u64, u64, u64, s64 *); #endif /* _BCACHEFS_REFLINK_H */ diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 181af89b0553..21823ce69237 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -165,8 +165,24 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, const char *name, const void *value, size_t size, int type, int flags) { + struct btree_iter inode_iter = { NULL }; + struct bch_inode_unpacked inode_u; int ret; + /* + * We need to do an inode update so that bi_journal_sync gets updated + * and fsync works: + * + * Perhaps we should be updating bi_mtime too? + */ + + ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inum, BTREE_ITER_INTENT) ?: + bch2_inode_write(trans, &inode_iter, &inode_u); + bch2_trans_iter_exit(trans, &inode_iter); + + if (ret) + return ret; + if (value) { struct bkey_i_xattr *xattr; unsigned namelen = strlen(name); @@ -352,7 +368,7 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler, struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); - return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0, + return bch2_trans_do(c, NULL, NULL, 0, bch2_xattr_set(&trans, inode_inum(inode), &hash, name, value, size, handler->flags, flags)); -- cgit v1.2.3 From 32b26e8c7f6418b2d8bd404c7482c44141ba52e5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 6 Nov 2021 00:03:40 -0400 Subject: bcachefs: bch2_assert_pos_locked() This adds a new assertion to be used by bch2_inode_update_after_write(), which updates the VFS inode based on the update to the btree inode we just did - we require that the btree inode still be locked when we do that update. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 2 +- fs/bcachefs/btree_iter.c | 45 ++++++++++++++++++++++++++++++++++--- fs/bcachefs/btree_iter.h | 4 ++++ fs/bcachefs/fs.c | 58 +++++++++++++++++++++++++++++------------------- fs/bcachefs/fs.h | 2 +- fs/bcachefs/inode.c | 6 ++--- fs/bcachefs/inode.h | 2 ++ 7 files changed, 88 insertions(+), 31 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 51a0b48a5313..00cd40a8d7fa 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -339,7 +339,7 @@ btree_err: if (unlikely(ret)) goto err; - bch2_inode_update_after_write(c, inode, &inode_u, + bch2_inode_update_after_write(&trans, inode, &inode_u, ATTR_CTIME|ATTR_MODE); set_cached_acl(&inode->v, type, acl); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 94ba43626cde..1ad81cad36f1 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -46,7 +46,7 @@ static inline int __btree_path_cmp(const struct btree_path *l, unsigned r_level) { return cmp_int(l->btree_id, r_btree_id) ?: - cmp_int(l->cached, r_cached) ?: + cmp_int((int) l->cached, (int) r_cached) ?: bpos_cmp(l->pos, r_pos) ?: -cmp_int(l->level, r_level); } @@ -762,6 +762,43 @@ out: return ret; } +void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, + struct bpos pos, bool key_cache) +{ + struct btree_path *path; + unsigned idx; + char buf[100]; + + trans_for_each_path_inorder(trans, path, idx) { + int cmp = cmp_int(path->btree_id, id) ?: + cmp_int(path->cached, key_cache); + + if (cmp > 0) + break; + if (cmp < 0) + continue; + + if (!(path->nodes_locked & 1) || + !path->should_be_locked) + continue; + + if (!key_cache) { + if (bkey_cmp(pos, path->l[0].b->data->min_key) >= 0 && + bkey_cmp(pos, path->l[0].b->key.k.p) <= 0) + return; + } else { + if (!bkey_cmp(pos, path->pos)) + return; + } + } + + bch2_dump_trans_paths_updates(trans); + panic("not locked: %s %s%s\n", + bch2_btree_ids[id], + (bch2_bpos_to_text(&PBUF(buf), pos), buf), + key_cache ? " cached" : ""); +} + #else static inline void bch2_btree_path_verify_level(struct btree_trans *trans, @@ -1720,11 +1757,13 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans) btree_trans_sort_paths(trans); trans_for_each_path_inorder(trans, path, idx) - printk(KERN_ERR "path: idx %u ref %u:%u%s btree %s pos %s %pS\n", + printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree %s pos %s locks %u %pS\n", path->idx, path->ref, path->intent_ref, - path->preserve ? " preserve" : "", + path->should_be_locked ? " S" : "", + path->preserve ? " P" : "", bch2_btree_ids[path->btree_id], (bch2_bpos_to_text(&PBUF(buf1), path->pos), buf1), + path->nodes_locked, #ifdef CONFIG_BCACHEFS_DEBUG (void *) path->ip_allocated #else diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index c71e42a782d6..72b9605cf3e7 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -166,9 +166,13 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bke #ifdef CONFIG_BCACHEFS_DEBUG void bch2_trans_verify_paths(struct btree_trans *); void bch2_trans_verify_locks(struct btree_trans *); +void bch2_assert_pos_locked(struct btree_trans *, enum btree_id, + struct bpos, bool); #else static inline void bch2_trans_verify_paths(struct btree_trans *trans) {} static inline void bch2_trans_verify_locks(struct btree_trans *trans) {} +static inline void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, + struct bpos pos, bool key_cache) {} #endif void bch2_btree_path_fix_key_modified(struct btree_trans *trans, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 92919b16f2f5..5596081b93c1 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -37,7 +37,7 @@ static struct kmem_cache *bch2_inode_cache; -static void bch2_vfs_inode_init(struct bch_fs *, subvol_inum, +static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum, struct bch_inode_info *, struct bch_inode_unpacked *); @@ -93,11 +93,19 @@ void bch2_pagecache_block_get(struct pagecache_lock *lock) __pagecache_lock_get(lock, -1); } -void bch2_inode_update_after_write(struct bch_fs *c, +void bch2_inode_update_after_write(struct btree_trans *trans, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, unsigned fields) { + struct bch_fs *c = trans->c; + + BUG_ON(bi->bi_inum != inode->v.i_ino); + + bch2_assert_pos_locked(trans, BTREE_ID_inodes, + POS(0, bi->bi_inum), + 0 && c->opts.inodes_use_key_cache); + set_nlink(&inode->v, bch2_inode_nlink_get(bi)); i_uid_write(&inode->v, bi->bi_uid); i_gid_write(&inode->v, bi->bi_gid); @@ -126,6 +134,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, int ret; bch2_trans_init(&trans, c, 0, 512); + trans.ip = _RET_IP_; retry: bch2_trans_begin(&trans); @@ -140,7 +149,7 @@ retry: * this is important for inode updates via bchfs_write_index_update */ if (!ret) - bch2_inode_update_after_write(c, inode, &inode_u, fields); + bch2_inode_update_after_write(&trans, inode, &inode_u, fields); bch2_trans_iter_exit(&trans, &iter); @@ -215,6 +224,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) { struct bch_inode_unpacked inode_u; struct bch_inode_info *inode; + struct btree_trans trans; int ret; inode = to_bch_ei(iget5_locked(c->vfs_sb, @@ -227,14 +237,19 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) if (!(inode->v.i_state & I_NEW)) return &inode->v; - ret = bch2_inode_find_by_inum(c, inum, &inode_u); + bch2_trans_init(&trans, c, 8, 0); + ret = lockrestart_do(&trans, + bch2_inode_find_by_inum_trans(&trans, inum, &inode_u)); + + if (!ret) + bch2_vfs_inode_init(&trans, inum, inode, &inode_u); + bch2_trans_exit(&trans); + if (ret) { iget_failed(&inode->v); return ERR_PTR(ret); } - bch2_vfs_inode_init(c, inum, inode, &inode_u); - unlock_new_inode(&inode->v); return &inode->v; @@ -306,7 +321,7 @@ err_before_quota: } if (!(flags & BCH_CREATE_TMPFILE)) { - bch2_inode_update_after_write(c, dir, &dir_u, + bch2_inode_update_after_write(&trans, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); mutex_unlock(&dir->ei_update_lock); } @@ -314,7 +329,8 @@ err_before_quota: inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol; inum.inum = inode_u.bi_inum; - bch2_vfs_inode_init(c, inum, inode, &inode_u); + bch2_iget5_set(&inode->v, &inum); + bch2_vfs_inode_init(&trans, inum, inode, &inode_u); set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl); @@ -429,11 +445,9 @@ static int __bch2_link(struct bch_fs *c, &dentry->d_name)); if (likely(!ret)) { - BUG_ON(inode_u.bi_inum != inode->v.i_ino); - - bch2_inode_update_after_write(c, dir, &dir_u, + bch2_inode_update_after_write(&trans, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); - bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME); + bch2_inode_update_after_write(&trans, inode, &inode_u, ATTR_CTIME); } bch2_trans_exit(&trans); @@ -481,11 +495,9 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, deleting_snapshot)); if (likely(!ret)) { - BUG_ON(inode_u.bi_inum != inode->v.i_ino); - - bch2_inode_update_after_write(c, dir, &dir_u, + bch2_inode_update_after_write(&trans, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); - bch2_inode_update_after_write(c, inode, &inode_u, + bch2_inode_update_after_write(&trans, inode, &inode_u, ATTR_MTIME); } @@ -613,18 +625,18 @@ static int bch2_rename2(struct mnt_idmap *idmap, BUG_ON(dst_inode && dst_inode->v.i_ino != dst_inode_u.bi_inum); - bch2_inode_update_after_write(c, src_dir, &src_dir_u, + bch2_inode_update_after_write(&trans, src_dir, &src_dir_u, ATTR_MTIME|ATTR_CTIME); if (src_dir != dst_dir) - bch2_inode_update_after_write(c, dst_dir, &dst_dir_u, + bch2_inode_update_after_write(&trans, dst_dir, &dst_dir_u, ATTR_MTIME|ATTR_CTIME); - bch2_inode_update_after_write(c, src_inode, &src_inode_u, + bch2_inode_update_after_write(&trans, src_inode, &src_inode_u, ATTR_CTIME); if (dst_inode) - bch2_inode_update_after_write(c, dst_inode, &dst_inode_u, + bch2_inode_update_after_write(&trans, dst_inode, &dst_inode_u, ATTR_CTIME); err: bch2_trans_exit(&trans); @@ -742,7 +754,7 @@ btree_err: if (unlikely(ret)) goto err_trans; - bch2_inode_update_after_write(c, inode, &inode_u, attr->ia_valid); + bch2_inode_update_after_write(&trans, inode, &inode_u, attr->ia_valid); if (acl) set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); @@ -1154,11 +1166,11 @@ static const struct export_operations bch_export_ops = { //.get_parent = bch2_get_parent, }; -static void bch2_vfs_inode_init(struct bch_fs *c, subvol_inum inum, +static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, struct bch_inode_info *inode, struct bch_inode_unpacked *bi) { - bch2_inode_update_after_write(c, inode, bi, ~0); + bch2_inode_update_after_write(trans, inode, bi, ~0); inode->v.i_blocks = bi->bi_sectors; inode->v.i_ino = bi->bi_inum; diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 1c8936df9fbb..530238780a88 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -172,7 +172,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *, subvol_inum); typedef int (*inode_set_fn)(struct bch_inode_info *, struct bch_inode_unpacked *, void *); -void bch2_inode_update_after_write(struct bch_fs *, +void bch2_inode_update_after_write(struct btree_trans *, struct bch_inode_info *, struct bch_inode_unpacked *, unsigned); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 728545141a39..a24bbc5228c1 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -722,9 +722,9 @@ err: return ret; } -static int bch2_inode_find_by_inum_trans(struct btree_trans *trans, - subvol_inum inum, - struct bch_inode_unpacked *inode) +int bch2_inode_find_by_inum_trans(struct btree_trans *trans, + subvol_inum inum, + struct bch_inode_unpacked *inode) { struct btree_iter iter; int ret; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index d433d48de4e0..723186d8afb6 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -89,6 +89,8 @@ int bch2_inode_create(struct btree_trans *, struct btree_iter *, int bch2_inode_rm(struct bch_fs *, subvol_inum, bool); +int bch2_inode_find_by_inum_trans(struct btree_trans *, subvol_inum, + struct bch_inode_unpacked *); int bch2_inode_find_by_inum(struct bch_fs *, subvol_inum, struct bch_inode_unpacked *); -- cgit v1.2.3 From e3f2db39b39b69538db5bfbd9e359e99dcf1c986 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 13 Nov 2021 13:36:26 -0500 Subject: bcachefs: Tweak vfs cache shrinker behaviour In bcachefs, inodes and dentries are also cached - more compactly - by the btree node cache, they don't require seeks to recreate. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 5596081b93c1..4561c60c95e3 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1669,6 +1669,8 @@ got_sb: sb->s_flags |= SB_POSIXACL; #endif + sb->s_shrink.seeks = 0; + vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); if (IS_ERR(vinode)) { bch_err(c, "error mounting: error getting root inode %i", -- cgit v1.2.3 From 85e95ca7cc48c23f772387b069d794f69116192b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 13 Nov 2021 19:49:14 -0500 Subject: bcachefs: Update export_operations for snapshots When support for snapshots was merged, export operations weren't updated yet. This patch adds new filehandle types for bcachefs that include the subvolume ID and updates export operations for subvolumes - and also .get_parent, support for which was added just prior to snapshots. Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 4 +- fs/bcachefs/dirent.h | 3 + fs/bcachefs/fs.c | 230 ++++++++++++++++++++++++++++++++++++++++++----- include/linux/exportfs.h | 6 ++ 4 files changed, 218 insertions(+), 25 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 5db1426faaf3..4dfcc955675b 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -197,8 +197,8 @@ static void dirent_copy_target(struct bkey_i_dirent *dst, dst->v.d_type = src.v->d_type; } -static int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, - struct bkey_s_c_dirent d, subvol_inum *target) +int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, + struct bkey_s_c_dirent d, subvol_inum *target) { struct bch_subvolume s; int ret = 0; diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 8ae407765fe4..1bb4d802bc1d 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -29,6 +29,9 @@ static inline unsigned dirent_val_u64s(unsigned len) sizeof(u64)); } +int bch2_dirent_read_target(struct btree_trans *, subvol_inum, + struct bkey_s_c_dirent, subvol_inum *); + int bch2_dirent_create(struct btree_trans *, subvol_inum, const struct bch_hash_info *, u8, const struct qstr *, u64, u64 *, int); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 4561c60c95e3..61027d349cd8 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1124,46 +1124,230 @@ static const struct address_space_operations bch_address_space_operations = { .error_remove_page = generic_error_remove_page, }; -#if 0 -static struct inode *bch2_nfs_get_inode(struct super_block *sb, - u64 ino, u32 generation) +struct bcachefs_fid { + u64 inum; + u32 subvol; + u32 gen; +} __packed; + +struct bcachefs_fid_with_parent { + struct bcachefs_fid fid; + struct bcachefs_fid dir; +} __packed; + +static int bcachefs_fid_valid(int fh_len, int fh_type) { - struct bch_fs *c = sb->s_fs_info; - struct inode *vinode; + switch (fh_type) { + case FILEID_BCACHEFS_WITHOUT_PARENT: + return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32); + case FILEID_BCACHEFS_WITH_PARENT: + return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32); + default: + return false; + } +} + +static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode) +{ + return (struct bcachefs_fid) { + .inum = inode->ei_inode.bi_inum, + .subvol = inode->ei_subvol, + .gen = inode->ei_inode.bi_generation, + }; +} + +static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len, + struct inode *vdir) +{ + struct bch_inode_info *inode = to_bch_ei(vinode); + struct bch_inode_info *dir = to_bch_ei(vdir); + + if (*len < sizeof(struct bcachefs_fid_with_parent) / sizeof(u32)) + return FILEID_INVALID; + + if (!S_ISDIR(inode->v.i_mode) && dir) { + struct bcachefs_fid_with_parent *fid = (void *) fh; + + fid->fid = bch2_inode_to_fid(inode); + fid->dir = bch2_inode_to_fid(dir); + + *len = sizeof(*fid) / sizeof(u32); + return FILEID_BCACHEFS_WITH_PARENT; + } else { + struct bcachefs_fid *fid = (void *) fh; - if (ino < BCACHEFS_ROOT_INO) - return ERR_PTR(-ESTALE); + *fid = bch2_inode_to_fid(inode); - vinode = bch2_vfs_inode_get(c, ino); - if (IS_ERR(vinode)) - return ERR_CAST(vinode); - if (generation && vinode->i_generation != generation) { - /* we didn't find the right inode.. */ + *len = sizeof(*fid) / sizeof(u32); + return FILEID_BCACHEFS_WITHOUT_PARENT; + } +} + +static struct inode *bch2_nfs_get_inode(struct super_block *sb, + struct bcachefs_fid fid) +{ + struct bch_fs *c = sb->s_fs_info; + struct inode *vinode = bch2_vfs_inode_get(c, (subvol_inum) { + .subvol = fid.subvol, + .inum = fid.inum, + }); + if (!IS_ERR(vinode) && vinode->i_generation != fid.gen) { iput(vinode); - return ERR_PTR(-ESTALE); + vinode = ERR_PTR(-ESTALE); } return vinode; } -static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *fid, +static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid, int fh_len, int fh_type) { - return generic_fh_to_dentry(sb, fid, fh_len, fh_type, - bch2_nfs_get_inode); + struct bcachefs_fid *fid = (void *) _fid; + + if (!bcachefs_fid_valid(fh_len, fh_type)) + return NULL; + + return d_obtain_alias(bch2_nfs_get_inode(sb, *fid)); } -static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *fid, +static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid, int fh_len, int fh_type) { - return generic_fh_to_parent(sb, fid, fh_len, fh_type, - bch2_nfs_get_inode); + struct bcachefs_fid_with_parent *fid = (void *) _fid; + + if (!bcachefs_fid_valid(fh_len, fh_type) || + fh_type != FILEID_BCACHEFS_WITH_PARENT) + return NULL; + + return d_obtain_alias(bch2_nfs_get_inode(sb, fid->dir)); +} + +static struct dentry *bch2_get_parent(struct dentry *child) +{ + struct bch_inode_info *inode = to_bch_ei(child->d_inode); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + subvol_inum parent_inum = { + .subvol = inode->ei_inode.bi_parent_subvol ?: + inode->ei_subvol, + .inum = inode->ei_inode.bi_dir, + }; + + if (!parent_inum.inum) + return NULL; + + return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum)); +} + +static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child) +{ + struct bch_inode_info *inode = to_bch_ei(child->d_inode); + struct bch_inode_info *dir = to_bch_ei(parent->d_inode); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct btree_trans trans; + struct btree_iter iter1; + struct btree_iter iter2; + struct bkey_s_c k; + struct bkey_s_c_dirent d; + struct bch_inode_unpacked inode_u; + subvol_inum target; + u32 snapshot; + unsigned name_len; + int ret; + + if (!S_ISDIR(dir->v.i_mode)) + return -EINVAL; + + bch2_trans_init(&trans, c, 0, 0); + + bch2_trans_iter_init(&trans, &iter1, BTREE_ID_dirents, + POS(dir->ei_inode.bi_inum, 0), 0); + bch2_trans_iter_init(&trans, &iter2, BTREE_ID_dirents, + POS(dir->ei_inode.bi_inum, 0), 0); +retry: + bch2_trans_begin(&trans); + + ret = bch2_subvolume_get_snapshot(&trans, dir->ei_subvol, &snapshot); + if (ret) + goto err; + + bch2_btree_iter_set_snapshot(&iter1, snapshot); + bch2_btree_iter_set_snapshot(&iter2, snapshot); + + ret = bch2_inode_find_by_inum_trans(&trans, inode_inum(inode), &inode_u); + if (ret) + goto err; + + if (inode_u.bi_dir == dir->ei_inode.bi_inum) { + bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset)); + + k = bch2_btree_iter_peek_slot(&iter1); + ret = bkey_err(k); + if (ret) + goto err; + + if (k.k->type != KEY_TYPE_dirent) { + ret = -ENOENT; + goto err; + } + + d = bkey_s_c_to_dirent(k); + ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target); + if (ret > 0) + ret = -ENOENT; + if (ret) + goto err; + + if (target.subvol == inode->ei_subvol && + target.inum == inode->ei_inode.bi_inum) + goto found; + } else { + /* + * File with multiple hardlinks and our backref is to the wrong + * directory - linear search: + */ + for_each_btree_key_continue_norestart(iter2, 0, k, ret) { + if (k.k->p.inode > dir->ei_inode.bi_inum) + break; + + if (k.k->type != KEY_TYPE_dirent) + continue; + + d = bkey_s_c_to_dirent(k); + ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target); + if (ret < 0) + break; + if (ret) + continue; + + if (target.subvol == inode->ei_subvol && + target.inum == inode->ei_inode.bi_inum) + goto found; + } + } + + ret = -ENOENT; + goto err; +found: + name_len = min_t(unsigned, bch2_dirent_name_bytes(d), NAME_MAX); + + memcpy(name, d.v->d_name, name_len); + name[name_len] = '\0'; +err: + if (ret == -EINTR) + goto retry; + + bch2_trans_iter_exit(&trans, &iter1); + bch2_trans_iter_exit(&trans, &iter2); + bch2_trans_exit(&trans); + + return ret; } -#endif static const struct export_operations bch_export_ops = { - //.fh_to_dentry = bch2_fh_to_dentry, - //.fh_to_parent = bch2_fh_to_parent, - //.get_parent = bch2_get_parent, + .encode_fh = bch2_encode_fh, + .fh_to_dentry = bch2_fh_to_dentry, + .fh_to_parent = bch2_fh_to_parent, + .get_parent = bch2_get_parent, + .get_name = bch2_get_name, }; static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 11fbd0ee1370..f49a7d31167e 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -98,6 +98,12 @@ enum fid_type { */ FILEID_FAT_WITH_PARENT = 0x72, + /* + * 64 bit inode number, 32 bit subvolume, 32 bit generation number: + */ + FILEID_BCACHEFS_WITHOUT_PARENT = 0x80, + FILEID_BCACHEFS_WITH_PARENT = 0x81, + /* * 128 bit child FID (struct lu_fid) * 128 bit parent FID (struct lu_fid) -- cgit v1.2.3 From 9ca4853b98af5fa15a2ddc47a45f8e103027f95d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 27 Oct 2021 13:05:56 -0400 Subject: bcachefs: Fix quota support for snapshots Quota support was disabled when snapshots were released, because of some tricky interactions with snpashots. We're sidestepping that for now - we're simply disabling quota accounting on snapshot subvolumes. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 28 ++++++++++++++------ fs/bcachefs/fs.h | 6 +++++ fs/bcachefs/opts.h | 12 ++++----- fs/bcachefs/quota.c | 69 +++++++++++++++++++++++++++++++++++++------------ fs/bcachefs/subvolume.c | 9 +++++++ fs/bcachefs/subvolume.h | 2 ++ 6 files changed, 96 insertions(+), 30 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 61027d349cd8..31adc0e0d452 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -39,7 +39,8 @@ static struct kmem_cache *bch2_inode_cache; static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum, struct bch_inode_info *, - struct bch_inode_unpacked *); + struct bch_inode_unpacked *, + struct bch_subvolume *); static void __pagecache_lock_put(struct pagecache_lock *lock, long i) { @@ -225,6 +226,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) struct bch_inode_unpacked inode_u; struct bch_inode_info *inode; struct btree_trans trans; + struct bch_subvolume subvol; int ret; inode = to_bch_ei(iget5_locked(c->vfs_sb, @@ -239,10 +241,11 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) bch2_trans_init(&trans, c, 8, 0); ret = lockrestart_do(&trans, + bch2_subvolume_get(&trans, inum.subvol, true, 0, &subvol) ?: bch2_inode_find_by_inum_trans(&trans, inum, &inode_u)); if (!ret) - bch2_vfs_inode_init(&trans, inum, inode, &inode_u); + bch2_vfs_inode_init(&trans, inum, inode, &inode_u, &subvol); bch2_trans_exit(&trans); if (ret) { @@ -268,6 +271,7 @@ __bch2_create(struct mnt_idmap *idmap, struct bch_inode_unpacked inode_u; struct posix_acl *default_acl = NULL, *acl = NULL; subvol_inum inum; + struct bch_subvolume subvol; u64 journal_seq = 0; int ret; @@ -310,7 +314,12 @@ retry: if (unlikely(ret)) goto err_before_quota; - ret = bch2_trans_commit(&trans, NULL, &journal_seq, 0); + inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol; + inum.inum = inode_u.bi_inum; + + ret = bch2_subvolume_get(&trans, inum.subvol, true, + BTREE_ITER_WITH_UPDATES, &subvol) ?: + bch2_trans_commit(&trans, NULL, &journal_seq, 0); if (unlikely(ret)) { bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, KEY_TYPE_QUOTA_WARN); @@ -326,11 +335,8 @@ err_before_quota: mutex_unlock(&dir->ei_update_lock); } - inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol; - inum.inum = inode_u.bi_inum; - bch2_iget5_set(&inode->v, &inum); - bch2_vfs_inode_init(&trans, inum, inode, &inode_u); + bch2_vfs_inode_init(&trans, inum, inode, &inode_u, &subvol); set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl); @@ -1352,10 +1358,16 @@ static const struct export_operations bch_export_ops = { static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, struct bch_inode_info *inode, - struct bch_inode_unpacked *bi) + struct bch_inode_unpacked *bi, + struct bch_subvolume *subvol) { bch2_inode_update_after_write(trans, inode, bi, ~0); + if (BCH_SUBVOLUME_SNAP(subvol)) + set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags); + else + clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags); + inode->v.i_blocks = bi->bi_sectors; inode->v.i_ino = bi->bi_inum; inode->v.i_rdev = bi->bi_dev; diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 530238780a88..a67ab1ad2a31 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -63,6 +63,12 @@ static inline subvol_inum inode_inum(struct bch_inode_info *inode) */ #define EI_INODE_ERROR 0 +/* + * Set in the inode is in a snapshot subvolume - we don't do quota accounting in + * those: + */ +#define EI_INODE_SNAPSHOT 1 + #define to_bch_ei(_inode) \ container_of_or_null(_inode, struct bch_inode_info, v) diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 10c022ec6ee0..896b8c9c1180 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -223,19 +223,19 @@ enum opt_type { BCH_SB_POSIX_ACL, true, \ NULL, "Enable POSIX acls") \ x(usrquota, u8, \ - 0, \ + OPT_FORMAT|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH_SB_USRQUOTA, false, \ NULL, "Enable user quotas") \ x(grpquota, u8, \ - 0, \ + OPT_FORMAT|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH_SB_GRPQUOTA, false, \ NULL, "Enable group quotas") \ x(prjquota, u8, \ - 0, \ + OPT_FORMAT|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH_SB_PRJQUOTA, false, \ NULL, "Enable project quotas") \ x(degraded, u8, \ OPT_MOUNT, \ diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 5f1216da76d0..8f8f4b0accd6 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -3,6 +3,7 @@ #include "btree_update.h" #include "inode.h" #include "quota.h" +#include "subvolume.h" #include "super-io.h" static const char *bch2_sb_validate_quota(struct bch_sb *sb, @@ -415,14 +416,55 @@ static void bch2_sb_quota_read(struct bch_fs *c) } } +static int bch2_fs_quota_read_inode(struct btree_trans *trans, + struct btree_iter *iter) +{ + struct bch_fs *c = trans->c; + struct bch_inode_unpacked u; + struct bch_subvolume subvolume; + struct bkey_s_c k; + int ret; + + k = bch2_btree_iter_peek(iter); + ret = bkey_err(k); + if (ret) + return ret; + + if (!k.k) + return 1; + + ret = bch2_snapshot_get_subvol(trans, k.k->p.snapshot, &subvolume); + if (ret) + return ret; + + /* + * We don't do quota accounting in snapshots: + */ + if (BCH_SUBVOLUME_SNAP(&subvolume)) + goto advance; + + if (!bkey_is_inode(k.k)) + goto advance; + + ret = bch2_inode_unpack(k, &u); + if (ret) + return ret; + + bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors, + KEY_TYPE_QUOTA_NOCHECK); + bch2_quota_acct(c, bch_qid(&u), Q_INO, 1, + KEY_TYPE_QUOTA_NOCHECK); +advance: + bch2_btree_iter_set_pos(iter, POS(iter->pos.inode, iter->pos.offset + 1)); + return 0; +} + int bch2_fs_quota_read(struct bch_fs *c) { unsigned i, qtypes = enabled_qtypes(c); struct bch_memquota_type *q; struct btree_trans trans; struct btree_iter iter; - struct bch_inode_unpacked u; - struct bkey_s_c k; int ret; mutex_lock(&c->sb_lock); @@ -437,23 +479,18 @@ int bch2_fs_quota_read(struct bch_fs *c) bch2_trans_init(&trans, c, 0, 0); - for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - if (bkey_is_inode(k.k)) { - ret = bch2_inode_unpack(k, &u); - if (ret) - return ret; - - bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors, - KEY_TYPE_QUOTA_NOCHECK); - bch2_quota_acct(c, bch_qid(&u), Q_INO, 1, - KEY_TYPE_QUOTA_NOCHECK); - } - } + bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes, POS_MIN, + BTREE_ITER_INTENT| + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS); + do { + ret = lockrestart_do(&trans, + bch2_fs_quota_read_inode(&trans, &iter)); + } while (!ret); bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); - return ret; + return ret < 0 ? ret : 0; } /* Enable/disable/delete quotas for an entire filesystem: */ diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 0ef625d21672..7e909a118189 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -789,6 +789,15 @@ int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol, return ret; } +int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, + struct bch_subvolume *subvol) +{ + struct bch_snapshot snap; + + return snapshot_lookup(trans, snapshot, &snap) ?: + bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); +} + int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol, u32 *snapid) { diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index dde755b45392..e4c3fdcdf22f 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -118,6 +118,8 @@ void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c) int bch2_subvolume_get(struct btree_trans *, unsigned, bool, int, struct bch_subvolume *); +int bch2_snapshot_get_subvol(struct btree_trans *, u32, + struct bch_subvolume *); int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); int bch2_subvolume_delete(struct btree_trans *, u32); -- cgit v1.2.3 From 8244f3209b5b49a6bde9921d7825af9f57161b23 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 14 Dec 2021 14:24:41 -0500 Subject: bcachefs: Option improvements This adds flags for options that must be a power of two (block size and btree node size), and options that are stored in the superblock as a power of two (encoded extent max). Also: options are now stored in memory in the same units they're displayed in (bytes): we now convert when getting and setting from the superblock. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 18 +++- fs/bcachefs/btree_cache.h | 4 +- fs/bcachefs/btree_io.c | 18 ++-- fs/bcachefs/btree_update_interior.c | 8 +- fs/bcachefs/btree_update_interior.h | 2 +- fs/bcachefs/buckets.c | 6 +- fs/bcachefs/compress.c | 2 +- fs/bcachefs/extents.c | 2 +- fs/bcachefs/fs.c | 6 +- fs/bcachefs/io.c | 4 +- fs/bcachefs/journal_io.c | 4 +- fs/bcachefs/opts.c | 171 +++++++++++++++++++++++++----------- fs/bcachefs/opts.h | 40 +++++---- fs/bcachefs/super-io.c | 17 ++-- fs/bcachefs/super.c | 11 ++- fs/bcachefs/sysfs.c | 12 +-- fs/bcachefs/xattr.c | 2 +- 17 files changed, 205 insertions(+), 122 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 1ad5eafb2f76..95b590d9ee7f 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -928,10 +928,20 @@ static inline unsigned bucket_bytes(const struct bch_dev *ca) static inline unsigned block_bytes(const struct bch_fs *c) { - return c->opts.block_size << 9; + return c->opts.block_size; } -static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, s64 time) +static inline unsigned block_sectors(const struct bch_fs *c) +{ + return c->opts.block_size >> 9; +} + +static inline size_t btree_sectors(const struct bch_fs *c) +{ + return c->opts.btree_node_size >> 9; +} + +static inline struct timespec64 bch2_time_to_timespec(const struct bch_fs *c, s64 time) { struct timespec64 t; s32 rem; @@ -943,13 +953,13 @@ static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, s64 time return t; } -static inline s64 timespec_to_bch2_time(struct bch_fs *c, struct timespec64 ts) +static inline s64 timespec_to_bch2_time(const struct bch_fs *c, struct timespec64 ts) { return (ts.tv_sec * c->sb.time_units_per_sec + (int) ts.tv_nsec / c->sb.nsec_per_time_unit) - c->sb.time_base_lo; } -static inline s64 bch2_current_time(struct bch_fs *c) +static inline s64 bch2_current_time(const struct bch_fs *c) { struct timespec64 now; diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 2f6e0ea87616..a08d12569075 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -69,7 +69,7 @@ static inline bool btree_node_hashed(struct btree *b) static inline size_t btree_bytes(struct bch_fs *c) { - return c->opts.btree_node_size << 9; + return c->opts.btree_node_size; } static inline size_t btree_max_u64s(struct bch_fs *c) @@ -84,7 +84,7 @@ static inline size_t btree_pages(struct bch_fs *c) static inline unsigned btree_blocks(struct bch_fs *c) { - return c->opts.btree_node_size >> c->block_bits; + return btree_sectors(c) >> c->block_bits; } #define BTREE_SPLIT_THRESHOLD(c) (btree_max_u64s(c) * 2 / 3) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 45f7ec41a8f1..287c45253a33 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -682,7 +682,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BTREE_ERR_FATAL, c, ca, b, i, "BSET_SEPARATE_WHITEOUTS no longer supported"); - if (btree_err_on(offset + sectors > c->opts.btree_node_size, + if (btree_err_on(offset + sectors > btree_sectors(c), BTREE_ERR_FIXABLE, c, ca, b, i, "bset past end of btree node")) { i->u64s = 0; @@ -896,7 +896,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, b->data->keys.seq, bp->seq); } - while (b->written < (ptr_written ?: c->opts.btree_node_size)) { + while (b->written < (ptr_written ?: btree_sectors(c))) { unsigned sectors, whiteout_u64s = 0; struct nonce nonce; struct bch_csum csum; @@ -1204,7 +1204,7 @@ static unsigned btree_node_sectors_written(struct bch_fs *c, void *data) if (le64_to_cpu(bn->magic) != bset_magic(c)) return 0; - while (offset < c->opts.btree_node_size) { + while (offset < btree_sectors(c)) { if (!offset) { offset += vstruct_sectors(bn, c->block_bits); } else { @@ -1226,7 +1226,7 @@ static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void * if (!offset) return false; - while (offset < c->opts.btree_node_size) { + while (offset < btree_sectors(c)) { bne = data + (offset << 9); if (bne->keys.seq == bn->keys.seq) return true; @@ -1296,7 +1296,7 @@ fsck_err: if (ra->err[i]) continue; - while (offset < c->opts.btree_node_size) { + while (offset < btree_sectors(c)) { if (!offset) { sectors = vstruct_sectors(bn, c->block_bits); } else { @@ -1313,7 +1313,7 @@ fsck_err: offset += sectors; } - while (offset < c->opts.btree_node_size) { + while (offset < btree_sectors(c)) { bne = ra->buf[i] + (offset << 9); if (bne->keys.seq == bn->keys.seq) { if (!gap) @@ -1793,8 +1793,8 @@ do_write: BUG_ON(btree_node_fake(b)); BUG_ON((b->will_make_reachable != 0) != !b->written); - BUG_ON(b->written >= c->opts.btree_node_size); - BUG_ON(b->written & (c->opts.block_size - 1)); + BUG_ON(b->written >= btree_sectors(c)); + BUG_ON(b->written & (block_sectors(c) - 1)); BUG_ON(bset_written(b, btree_bset_last(b))); BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c)); BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format))); @@ -1867,7 +1867,7 @@ do_write: memset(data + bytes_to_write, 0, (sectors_to_write << 9) - bytes_to_write); - BUG_ON(b->written + sectors_to_write > c->opts.btree_node_size); + BUG_ON(b->written + sectors_to_write > btree_sectors(c)); BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN); BUG_ON(i->seq != b->data->keys.seq); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 3e6dd2ed1c03..fd4089d19ad2 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -223,12 +223,12 @@ retry: if (IS_ERR(wp)) return ERR_CAST(wp); - if (wp->sectors_free < c->opts.btree_node_size) { + if (wp->sectors_free < btree_sectors(c)) { struct open_bucket *ob; unsigned i; open_bucket_for_each(c, &wp->ptrs, ob, i) - if (ob->sectors_free < c->opts.btree_node_size) + if (ob->sectors_free < btree_sectors(c)) ob->sectors_free = 0; bch2_alloc_sectors_done(c, wp); @@ -236,7 +236,7 @@ retry: } bkey_btree_ptr_v2_init(&tmp.k); - bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size); + bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, btree_sectors(c)); bch2_open_bucket_get(c, wp, &ob); bch2_alloc_sectors_done(c, wp); @@ -1029,7 +1029,7 @@ retry: } ret = bch2_disk_reservation_get(c, &as->disk_res, - nr_nodes * c->opts.btree_node_size, + nr_nodes * btree_sectors(c), c->opts.metadata_replicas, disk_res_flags); if (ret) diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index d4574161a733..8cf59cee6e4e 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -218,7 +218,7 @@ static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c, { ssize_t used = bset_byte_offset(b, end) / sizeof(u64) + b->whiteout_u64s; - ssize_t total = c->opts.btree_node_size << 6; + ssize_t total = c->opts.btree_node_size >> 3; /* Always leave one extra u64 for bch2_varint_decode: */ used++; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 4fef482ad60e..0d9d723c24bb 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1000,7 +1000,7 @@ static int bch2_mark_extent(struct btree_trans *trans, ? BCH_DATA_btree : BCH_DATA_user; s64 sectors = bkey_is_btree_ptr(k.k) - ? c->opts.btree_node_size + ? btree_sectors(c) : k.k->size; s64 dirty_sectors = 0; bool stale; @@ -1609,7 +1609,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans, ? BCH_DATA_btree : BCH_DATA_user; s64 sectors = bkey_is_btree_ptr(k.k) - ? c->opts.btree_node_size + ? btree_sectors(c) : k.k->size; s64 dirty_sectors = 0; bool stale; @@ -2184,7 +2184,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) alloc_heap alloc_heap; size_t btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE, - ca->mi.bucket_size / c->opts.btree_node_size); + ca->mi.bucket_size / btree_sectors(c)); /* XXX: these should be tunable */ size_t reserve_none = max_t(size_t, 1, nbuckets >> 9); size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 6); diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index 78757dcede36..2d5dc2394bab 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -376,7 +376,7 @@ static unsigned __bio_compress(struct bch_fs *c, BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type])); /* If it's only one block, don't bother trying to compress: */ - if (bio_sectors(src) <= c->opts.block_size) + if (src->bi_iter.bi_size <= c->opts.block_size) return 0; dst_data = bio_map_or_bounce(c, dst, WRITE); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 8592a0f6327e..161ae4fd59d9 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1037,7 +1037,7 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k) if (k.k->type == KEY_TYPE_btree_ptr || k.k->type == KEY_TYPE_btree_ptr_v2) - size_ondisk = c->opts.btree_node_size; + size_ondisk = btree_sectors(c); bkey_extent_entry_for_each(ptrs, entry) { if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 31adc0e0d452..bbdfccf24e53 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -868,8 +868,8 @@ static int bch2_fill_extent(struct bch_fs *c, else offset += p.crc.offset; - if ((offset & (c->opts.block_size - 1)) || - (k.k->size & (c->opts.block_size - 1))) + if ((offset & (block_sectors(c) - 1)) || + (k.k->size & (block_sectors(c) - 1))) flags2 |= FIEMAP_EXTENT_NOT_ALIGNED; ret = fiemap_fill_next_extent(info, @@ -1683,7 +1683,7 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root) const struct bch_option *opt = &bch2_opt_table[i]; u64 v = bch2_opt_get_by_id(&c->opts, i); - if (!(opt->mode & OPT_MOUNT)) + if (!(opt->flags & OPT_MOUNT)) continue; if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 814984ec608c..1cfe433ded33 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -1357,7 +1357,7 @@ void bch2_write(struct closure *cl) bch2_keylist_init(&op->insert_keys, op->inline_keys); wbio_init(bio)->put_bio = false; - if (bio_sectors(bio) & (c->opts.block_size - 1)) { + if (bio->bi_iter.bi_size & (c->opts.block_size - 1)) { bch_err_inum_ratelimited(c, op->pos.inode, "misaligned write"); op->error = -EIO; @@ -2437,7 +2437,7 @@ int bch2_fs_io_init(struct bch_fs *c) BIOSET_NEED_BVECS) || mempool_init_page_pool(&c->bio_bounce_pages, max_t(unsigned, - c->opts.btree_node_size, + btree_sectors(c), c->sb.encoded_extent_max) / PAGE_SECTORS, 0) || rhashtable_init(&c->promote_table, &bch_promote_params)) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 1a8c0a7eaca7..ae28cee127e3 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -709,7 +709,7 @@ reread: case JOURNAL_ENTRY_NONE: if (!saw_bad) return 0; - sectors = c->opts.block_size; + sectors = block_sectors(c); goto next_block; case JOURNAL_ENTRY_BAD: saw_bad = true; @@ -718,7 +718,7 @@ reread: * field of the journal entry we read, so try reading * again at next block boundary: */ - sectors = c->opts.block_size; + sectors = block_sectors(c); break; default: return ret; diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index e81e07a383bb..9b75c852bac8 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -141,41 +141,27 @@ void bch2_opt_set_by_id(struct bch_opts *opts, enum bch_opt_id id, u64 v) } } -/* - * Initial options from superblock - here we don't want any options undefined, - * any options the superblock doesn't specify are set to 0: - */ -struct bch_opts bch2_opts_from_sb(struct bch_sb *sb) -{ - struct bch_opts opts = bch2_opts_empty(); - -#define x(_name, _bits, _mode, _type, _sb_opt, ...) \ - if (_sb_opt != NO_SB_OPT) \ - opt_set(opts, _name, _sb_opt(sb)); - BCH_OPTS() -#undef x - - return opts; -} - const struct bch_option bch2_opt_table[] = { -#define OPT_BOOL() .type = BCH_OPT_BOOL -#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, .min = _min, .max = _max -#define OPT_SECTORS(_min, _max) .type = BCH_OPT_SECTORS, .min = _min, .max = _max -#define OPT_STR(_choices) .type = BCH_OPT_STR, .choices = _choices +#define OPT_BOOL() .type = BCH_OPT_BOOL, .min = 0, .max = 2 +#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, \ + .min = _min, .max = _max +#define OPT_STR(_choices) .type = BCH_OPT_STR, \ + .min = 0, .max = ARRAY_SIZE(_choices),\ + .choices = _choices #define OPT_FN(_fn) .type = BCH_OPT_FN, \ .parse = _fn##_parse, \ .to_text = _fn##_to_text -#define x(_name, _bits, _mode, _type, _sb_opt, _default, _hint, _help) \ +#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \ [Opt_##_name] = { \ .attr = { \ .name = #_name, \ - .mode = (_mode) & OPT_RUNTIME ? 0644 : 0444, \ + .mode = (_flags) & OPT_RUNTIME ? 0644 : 0444, \ }, \ - .mode = _mode, \ + .flags = _flags, \ .hint = _hint, \ .help = _help, \ + .get_sb = _sb_opt, \ .set_sb = SET_##_sb_opt, \ _type \ }, @@ -218,7 +204,41 @@ static int bch2_mount_opt_lookup(const char *name) return bch2_opt_lookup(name); } -int bch2_opt_parse(struct bch_fs *c, const struct bch_option *opt, +static int bch2_opt_validate(const struct bch_option *opt, const char *msg, u64 v) +{ + if (v < opt->min) { + if (msg) + pr_err("invalid %s%s: too small (min %llu)", + msg, opt->attr.name, opt->min); + return -ERANGE; + } + + if (opt->max && v >= opt->max) { + if (msg) + pr_err("invalid %s%s: too big (max %llu)", + msg, opt->attr.name, opt->max); + return -ERANGE; + } + + if ((opt->flags & OPT_SB_FIELD_SECTORS) && (v & 511)) { + if (msg) + pr_err("invalid %s %s: not a multiple of 512", + msg, opt->attr.name); + return -EINVAL; + } + + if ((opt->flags & OPT_MUST_BE_POW_2) && !is_power_of_2(v)) { + if (msg) + pr_err("invalid %s%s: must be a power of two", + msg, opt->attr.name); + return -EINVAL; + } + + return 0; +} + +int bch2_opt_parse(struct bch_fs *c, const char *msg, + const struct bch_option *opt, const char *val, u64 *res) { ssize_t ret; @@ -228,30 +248,13 @@ int bch2_opt_parse(struct bch_fs *c, const struct bch_option *opt, ret = kstrtou64(val, 10, res); if (ret < 0) return ret; - - if (*res > 1) - return -ERANGE; break; case BCH_OPT_UINT: - ret = kstrtou64(val, 10, res); + ret = opt->flags & OPT_HUMAN_READABLE + ? bch2_strtou64_h(val, res) + : kstrtou64(val, 10, res); if (ret < 0) return ret; - - if (*res < opt->min || *res >= opt->max) - return -ERANGE; - break; - case BCH_OPT_SECTORS: - ret = bch2_strtou64_h(val, res); - if (ret < 0) - return ret; - - if (*res & 511) - return -EINVAL; - - *res >>= 9; - - if (*res < opt->min || *res >= opt->max) - return -ERANGE; break; case BCH_OPT_STR: ret = match_string(opt->choices, -1, val); @@ -264,10 +267,12 @@ int bch2_opt_parse(struct bch_fs *c, const struct bch_option *opt, if (!c) return 0; - return opt->parse(c, val, res); + ret = opt->parse(c, val, res); + if (ret < 0) + return ret; } - return 0; + return bch2_opt_validate(opt, msg, *res); } void bch2_opt_to_text(struct printbuf *out, struct bch_fs *c, @@ -288,10 +293,10 @@ void bch2_opt_to_text(struct printbuf *out, struct bch_fs *c, switch (opt->type) { case BCH_OPT_BOOL: case BCH_OPT_UINT: - pr_buf(out, "%lli", v); - break; - case BCH_OPT_SECTORS: - bch2_hprint(out, v << 9); + if (opt->flags & OPT_HUMAN_READABLE) + bch2_hprint(out, v); + else + pr_buf(out, "%lli", v); break; case BCH_OPT_STR: if (flags & OPT_SHOW_FULL_LIST) @@ -365,7 +370,8 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts, if (id < 0) goto bad_opt; - ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v); + ret = bch2_opt_parse(c, "mount option ", + &bch2_opt_table[id], val, &v); if (ret < 0) goto bad_val; } else { @@ -385,7 +391,7 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts, goto no_val; } - if (!(bch2_opt_table[id].mode & OPT_MOUNT)) + if (!(bch2_opt_table[id].flags & OPT_MOUNT)) goto bad_opt; if (id == Opt_acl && @@ -420,6 +426,65 @@ out: return ret; } +/* + * Initial options from superblock - here we don't want any options undefined, + * any options the superblock doesn't specify are set to 0: + */ +int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb) +{ + unsigned id; + int ret; + + for (id = 0; id < bch2_opts_nr; id++) { + const struct bch_option *opt = bch2_opt_table + id; + u64 v; + + if (opt->get_sb == NO_SB_OPT) + continue; + + v = opt->get_sb(sb); + + if (opt->flags & OPT_SB_FIELD_ILOG2) + v = 1ULL << v; + + if (opt->flags & OPT_SB_FIELD_SECTORS) + v <<= 9; + + ret = bch2_opt_validate(opt, "superblock option ", v); + if (ret) + return ret; + + bch2_opt_set_by_id(opts, id, v); + } + + return 0; +} + +void __bch2_opt_set_sb(struct bch_sb *sb, const struct bch_option *opt, u64 v) +{ + if (opt->set_sb == SET_NO_SB_OPT) + return; + + if (opt->flags & OPT_SB_FIELD_SECTORS) + v >>= 9; + + if (opt->flags & OPT_SB_FIELD_ILOG2) + v = ilog2(v); + + opt->set_sb(sb, v); +} + +void bch2_opt_set_sb(struct bch_fs *c, const struct bch_option *opt, u64 v) +{ + if (opt->set_sb == SET_NO_SB_OPT) + return; + + mutex_lock(&c->sb_lock); + __bch2_opt_set_sb(c->disk_sb.sb, opt, v); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); +} + /* io opts: */ struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts src) diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index bb2ecc778a8c..45f73601e4a8 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -44,19 +44,22 @@ static inline const char *bch2_d_type_str(unsigned d_type) LE64_BITMASK(NO_SB_OPT, struct bch_sb, flags[0], 0, 0); /* When can be set: */ -enum opt_mode { +enum opt_flags { OPT_FS = (1 << 0), /* Filesystem option */ OPT_DEVICE = (1 << 1), /* Device option */ OPT_INODE = (1 << 2), /* Inode option */ OPT_FORMAT = (1 << 3), /* May be specified at format time */ OPT_MOUNT = (1 << 4), /* May be specified at mount time */ OPT_RUNTIME = (1 << 5), /* May be specified at runtime */ + OPT_HUMAN_READABLE = (1 << 6), + OPT_MUST_BE_POW_2 = (1 << 7), /* Must be power of 2 */ + OPT_SB_FIELD_SECTORS = (1 << 8),/* Superblock field is >> 9 of actual value */ + OPT_SB_FIELD_ILOG2 = (1 << 9), /* Superblock field is ilog2 of actual value */ }; enum opt_type { BCH_OPT_BOOL, BCH_OPT_UINT, - BCH_OPT_SECTORS, BCH_OPT_STR, BCH_OPT_FN, }; @@ -88,13 +91,15 @@ enum opt_type { #define BCH_OPTS() \ x(block_size, u16, \ - OPT_FS|OPT_FORMAT, \ - OPT_SECTORS(1, 128), \ + OPT_FS|OPT_FORMAT| \ + OPT_HUMAN_READABLE|OPT_MUST_BE_POW_2|OPT_SB_FIELD_SECTORS, \ + OPT_UINT(512, 1U << 16), \ BCH_SB_BLOCK_SIZE, 8, \ "size", NULL) \ - x(btree_node_size, u16, \ - OPT_FS|OPT_FORMAT, \ - OPT_SECTORS(1, 512), \ + x(btree_node_size, u32, \ + OPT_FS|OPT_FORMAT| \ + OPT_HUMAN_READABLE|OPT_MUST_BE_POW_2|OPT_SB_FIELD_SECTORS, \ + OPT_UINT(512, 1U << 20), \ BCH_SB_BTREE_NODE_SIZE, 512, \ "size", "Btree node size, default 256k") \ x(errors, u8, \ @@ -198,8 +203,9 @@ enum opt_type { BCH_SB_GC_RESERVE, 8, \ "%", "Percentage of disk space to reserve for copygc")\ x(gc_reserve_bytes, u64, \ - OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ - OPT_SECTORS(0, U64_MAX), \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME| \ + OPT_HUMAN_READABLE|OPT_SB_FIELD_SECTORS, \ + OPT_UINT(0, U64_MAX), \ BCH_SB_GC_RESERVE_BYTES, 0, \ "%", "Amount of disk space to reserve for copygc\n" \ "Takes precedence over gc_reserve_percent if set")\ @@ -360,12 +366,12 @@ enum opt_type { "for performance testing purposes") \ x(fs_size, u64, \ OPT_DEVICE, \ - OPT_SECTORS(0, S64_MAX), \ + OPT_UINT(0, S64_MAX), \ NO_SB_OPT, 0, \ "size", "Size of filesystem on device") \ x(bucket, u32, \ OPT_DEVICE, \ - OPT_SECTORS(0, S64_MAX), \ + OPT_UINT(0, S64_MAX), \ NO_SB_OPT, 0, \ "size", "Size of filesystem on device") \ x(durability, u8, \ @@ -424,13 +430,14 @@ struct printbuf; struct bch_option { struct attribute attr; + u64 (*get_sb)(const struct bch_sb *); void (*set_sb)(struct bch_sb *, u64); - enum opt_mode mode; enum opt_type type; + enum opt_flags flags; + u64 min, max; union { struct { - u64 min, max; }; struct { const char * const *choices; @@ -452,10 +459,13 @@ bool bch2_opt_defined_by_id(const struct bch_opts *, enum bch_opt_id); u64 bch2_opt_get_by_id(const struct bch_opts *, enum bch_opt_id); void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64); -struct bch_opts bch2_opts_from_sb(struct bch_sb *); +int bch2_opts_from_sb(struct bch_opts *, struct bch_sb *); +void __bch2_opt_set_sb(struct bch_sb *, const struct bch_option *, u64); +void bch2_opt_set_sb(struct bch_fs *, const struct bch_option *, u64); int bch2_opt_lookup(const char *); -int bch2_opt_parse(struct bch_fs *, const struct bch_option *, const char *, u64 *); +int bch2_opt_parse(struct bch_fs *, const char *, const struct bch_option *, + const char *, u64 *); #define OPT_SHOW_FULL_LIST (1 << 0) #define OPT_SHOW_MOUNT_STYLE (1 << 1) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 170f7d46fa34..c831d32c26fe 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -267,8 +267,7 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) block_size = le16_to_cpu(sb->block_size); - if (!is_power_of_2(block_size) || - block_size > PAGE_SECTORS) + if (block_size > PAGE_SECTORS) return "Bad block size"; if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) @@ -310,9 +309,6 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) if (!BCH_SB_BTREE_NODE_SIZE(sb)) return "Btree node size not set"; - if (!is_power_of_2(BCH_SB_BTREE_NODE_SIZE(sb))) - return "Btree node size not a power of two"; - if (BCH_SB_GC_RESERVE(sb) < 5) return "gc reserve percentage too small"; @@ -627,8 +623,12 @@ got_super: err = "Superblock block size smaller than device block size"; ret = -EINVAL; if (le16_to_cpu(sb->sb->block_size) << 9 < - bdev_logical_block_size(sb->bdev)) - goto err; + bdev_logical_block_size(sb->bdev)) { + pr_err("error reading superblock: Superblock block size (%u) smaller than device block size (%u)", + le16_to_cpu(sb->sb->block_size) << 9, + bdev_logical_block_size(sb->bdev)); + goto err_no_print; + } ret = 0; sb->have_layout = true; @@ -636,8 +636,9 @@ out: pr_verbose_init(*opts, "ret %i", ret); return ret; err: - bch2_free_super(sb); pr_err("error reading superblock: %s", err); +err_no_print: + bch2_free_super(sb); goto out; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index afa1a8fa493b..e1d4fe5a8e49 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -762,10 +762,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) SET_BCH_SB_JOURNAL_RECLAIM_DELAY(sb, 100); c->opts = bch2_opts_default; - bch2_opts_apply(&c->opts, bch2_opts_from_sb(sb)); + ret = bch2_opts_from_sb(&c->opts, sb); + if (ret) + goto err; + bch2_opts_apply(&c->opts, opts); - c->block_bits = ilog2(c->opts.block_size); + c->block_bits = ilog2(block_sectors(c)); c->btree_foreground_merge_threshold = BTREE_FOREGROUND_MERGE_THRESHOLD(c); if (bch2_fs_init_fault("fs_alloc")) { @@ -877,7 +880,7 @@ static void print_mount_opts(struct bch_fs *c) const struct bch_option *opt = &bch2_opt_table[i]; u64 v = bch2_opt_get_by_id(&c->opts, i); - if (!(opt->mode & OPT_MOUNT)) + if (!(opt->flags & OPT_MOUNT)) continue; if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) @@ -1003,7 +1006,7 @@ static const char *bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c) if (!sb_mi) return "Invalid superblock: member info area missing"; - if (le16_to_cpu(sb->block_size) != c->opts.block_size) + if (le16_to_cpu(sb->block_size) != block_sectors(c)) return "mismatched block size"; if (le16_to_cpu(sb_mi->members[sb->dev_idx].bucket_size) < diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 3f51eda749f0..0a0798bae4d6 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -626,7 +626,7 @@ STORE(bch2_fs_opts_dir) if (!tmp) return -ENOMEM; - ret = bch2_opt_parse(c, opt, strim(tmp), &v); + ret = bch2_opt_parse(c, NULL, opt, strim(tmp), &v); kfree(tmp); if (ret < 0) @@ -636,13 +636,7 @@ STORE(bch2_fs_opts_dir) if (ret < 0) return ret; - if (opt->set_sb != SET_NO_SB_OPT) { - mutex_lock(&c->sb_lock); - opt->set_sb(c->disk_sb.sb, v); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - } - + bch2_opt_set_sb(c, opt, v); bch2_opt_set_by_id(&c->opts, id, v); if ((id == Opt_background_target || @@ -665,7 +659,7 @@ int bch2_opts_create_sysfs_files(struct kobject *kobj) for (i = bch2_opt_table; i < bch2_opt_table + bch2_opts_nr; i++) { - if (!(i->mode & OPT_FS)) + if (!(i->flags & OPT_FS)) continue; ret = sysfs_create_file(kobj, &i->attr); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 21823ce69237..a5122dbb2eb9 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -525,7 +525,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, memcpy(buf, value, size); buf[size] = '\0'; - ret = bch2_opt_parse(c, opt, buf, &v); + ret = bch2_opt_parse(c, NULL, opt, buf, &v); kfree(buf); if (ret < 0) -- cgit v1.2.3 From 669f87a5da1c7b91b64f3c6308820b316e241cc2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 4 Jan 2022 00:33:52 -0500 Subject: bcachefs: Switch to __func__for recording where btree_trans was initialized Symbol decoding, via %ps, isn't supported in userspace - this will also be faster when we're using trans->fn in the fast path, as with the new BCH_JSET_ENTRY_log journal messages. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 2 +- fs/bcachefs/btree_iter.c | 23 +++--- fs/bcachefs/btree_iter.h | 5 +- fs/bcachefs/btree_key_cache.c | 6 +- fs/bcachefs/btree_types.h | 2 +- fs/bcachefs/btree_update_interior.c | 4 +- fs/bcachefs/btree_update_leaf.c | 30 ++++---- fs/bcachefs/fs.c | 1 - fs/bcachefs/trace.h | 138 ++++++++++++++++-------------------- 9 files changed, 102 insertions(+), 109 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 40061887f5d8..cad5d28fed09 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -857,7 +857,7 @@ lock_node: if (bch2_btree_node_relock(trans, path, level + 1)) goto retry; - trace_trans_restart_btree_node_reused(trans->ip, + trace_trans_restart_btree_node_reused(trans->fn, trace_ip, path->btree_id, &path->pos); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 281e5895bc30..bca677c02774 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -350,7 +350,7 @@ bool __bch2_btree_node_lock(struct btree_trans *trans, } if (unlikely(deadlock_path)) { - trace_trans_restart_would_deadlock(trans->ip, ip, + trace_trans_restart_would_deadlock(trans->fn, ip, trans->in_traverse_all, reason, deadlock_path->btree_id, deadlock_path->cached, @@ -535,7 +535,7 @@ bool bch2_trans_relock(struct btree_trans *trans) trans_for_each_path(trans, path) if (path->should_be_locked && !bch2_btree_path_relock(trans, path, _RET_IP_)) { - trace_trans_restart_relock(trans->ip, _RET_IP_, + trace_trans_restart_relock(trans->fn, _RET_IP_, path->btree_id, &path->pos); BUG_ON(!trans->restarted); return false; @@ -1505,7 +1505,9 @@ retry_all: out: bch2_btree_cache_cannibalize_unlock(c); - trace_trans_traverse_all(trans->ip, trace_ip); + trans->in_traverse_all = false; + + trace_trans_traverse_all(trans->fn, trace_ip); return ret; } @@ -2842,7 +2844,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) trans->mem_bytes = new_bytes; if (old_bytes) { - trace_trans_restart_mem_realloced(trans->ip, _RET_IP_, new_bytes); + trace_trans_restart_mem_realloced(trans->fn, _RET_IP_, new_bytes); btree_trans_restart(trans); return ERR_PTR(-EINTR); } @@ -2925,14 +2927,15 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) trans->updates = p; p += updates_bytes; } -void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, - unsigned expected_nr_iters, - size_t expected_mem_bytes) +void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, + unsigned expected_nr_iters, + size_t expected_mem_bytes, + const char *fn) __acquires(&c->btree_trans_barrier) { memset(trans, 0, sizeof(*trans)); trans->c = c; - trans->ip = _RET_IP_; + trans->fn = fn; trans->journal_replay_not_finished = !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags); @@ -2971,7 +2974,7 @@ static void check_btree_paths_leaked(struct btree_trans *trans) goto leaked; return; leaked: - bch_err(c, "btree paths leaked from %pS!", (void *) trans->ip); + bch_err(c, "btree paths leaked from %s!", trans->fn); trans_for_each_path(trans, path) if (path->ref) printk(KERN_ERR " btree %s %pS\n", @@ -3069,7 +3072,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c) if (!trans_has_locks(trans)) continue; - pr_buf(out, "%i %ps\n", trans->pid, (void *) trans->ip); + pr_buf(out, "%i %s\n", trans->pid, trans->fn); trans_for_each_path(trans, path) { if (!path->nodes_locked) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 457a7601b0ce..abbde3666942 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -379,9 +379,12 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, /* new multiple iterator interface: */ void bch2_dump_trans_paths_updates(struct btree_trans *); -void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t); +void __bch2_trans_init(struct btree_trans *, struct bch_fs *, + unsigned, size_t, const char *); void bch2_trans_exit(struct btree_trans *); +#define bch2_trans_init(...) __bch2_trans_init(__VA_ARGS__, __func__) + void bch2_btree_trans_to_text(struct printbuf *, struct bch_fs *); void bch2_fs_btree_iter_exit(struct bch_fs *); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index e5029703240c..13012f26a677 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -224,7 +224,7 @@ static int btree_key_cache_fill(struct btree_trans *trans, goto err; if (!bch2_btree_node_relock(trans, ck_path, 0)) { - trace_transaction_restart_ip(trans->ip, _THIS_IP_); + trace_transaction_restart_ip(trans->fn, _THIS_IP_); ret = btree_trans_restart(trans); goto err; } @@ -319,7 +319,7 @@ retry: if (!trans->restarted) goto retry; - trace_transaction_restart_ip(trans->ip, _THIS_IP_); + trace_transaction_restart_ip(trans->fn, _THIS_IP_); ret = -EINTR; goto err; } @@ -339,7 +339,7 @@ fill: if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) { if (!path->locks_want && !__bch2_btree_path_upgrade(trans, path, 1)) { - trace_transaction_restart_ip(trans->ip, _THIS_IP_); + trace_transaction_restart_ip(trans->fn, _THIS_IP_); ret = btree_trans_restart(trans); goto err; } diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 1fd0cebe30ac..794726c4efd7 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -365,6 +365,7 @@ struct btree_trans_commit_hook { struct btree_trans { struct bch_fs *c; + const char *fn; struct list_head list; struct btree *locking; unsigned locking_path_idx; @@ -372,7 +373,6 @@ struct btree_trans { u8 locking_btree_id; u8 locking_level; pid_t pid; - unsigned long ip; int srcu_idx; u8 nr_sorted; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 51a2ea2c5cd6..29dda2352afd 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -955,7 +955,7 @@ retry: * instead of locking/reserving all the way to the root: */ if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) { - trace_trans_restart_iter_upgrade(trans->ip, _RET_IP_, + trace_trans_restart_iter_upgrade(trans->fn, _RET_IP_, path->btree_id, &path->pos); ret = btree_trans_restart(trans); return ERR_PTR(ret); @@ -1019,7 +1019,7 @@ retry: BTREE_UPDATE_JOURNAL_RES, journal_flags); if (ret) { - trace_trans_restart_journal_preres_get(trans->ip, _RET_IP_); + trace_trans_restart_journal_preres_get(trans->fn, _RET_IP_); goto err; } diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 60897fc70c58..de33491f2535 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -266,7 +266,7 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s, return ret; if (!bch2_trans_relock(trans)) { - trace_trans_restart_journal_preres_get(trans->ip, trace_ip); + trace_trans_restart_journal_preres_get(trans->fn, trace_ip); return -EINTR; } @@ -305,7 +305,8 @@ static noinline void journal_transaction_name(struct btree_trans *trans) l->entry.pad[0] = 0; l->entry.pad[1] = 0; l->entry.pad[2] = 0; - b = snprintf(l->d, buflen, "%ps", (void *) trans->ip); + b = min_t(unsigned, strlen(trans->fn), buflen); + memcpy(l->d, trans->fn, b); while (b < buflen) l->d[b++] = '\0'; @@ -426,7 +427,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, int ret; if (race_fault()) { - trace_trans_restart_fault_inject(trans->ip, trace_ip); + trace_trans_restart_fault_inject(trans->fn, trace_ip); trans->restarted = true; return -EINTR; } @@ -619,7 +620,7 @@ fail: bch2_btree_node_unlock_write_inlined(trans, i->path, insert_l(i)->b); } - trace_trans_restart_would_deadlock_write(trans->ip); + trace_trans_restart_would_deadlock_write(trans->fn); return btree_trans_restart(trans); } @@ -650,9 +651,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, char buf[200]; bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k)); - bch_err(c, "invalid bkey %s on insert from %ps -> %ps: %s\n", - buf, (void *) trans->ip, - (void *) i->ip_allocated, invalid); + bch_err(c, "invalid bkey %s on insert from %s -> %ps: %s\n", + buf, trans->fn, (void *) i->ip_allocated, invalid); bch2_fatal_error(c); return -EINVAL; } @@ -758,7 +758,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, return 0; if (ret == -EINTR) - trace_trans_restart_btree_node_split(trans->ip, trace_ip, + trace_trans_restart_btree_node_split(trans->fn, trace_ip, i->btree_id, &i->path->pos); break; case BTREE_INSERT_NEED_MARK_REPLICAS: @@ -771,7 +771,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (bch2_trans_relock(trans)) return 0; - trace_trans_restart_mark_replicas(trans->ip, trace_ip); + trace_trans_restart_mark_replicas(trans->fn, trace_ip); ret = -EINTR; break; case BTREE_INSERT_NEED_JOURNAL_RES: @@ -791,13 +791,13 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (bch2_trans_relock(trans)) return 0; - trace_trans_restart_journal_res_get(trans->ip, trace_ip); + trace_trans_restart_journal_res_get(trans->fn, trace_ip); ret = -EINTR; break; case BTREE_INSERT_NEED_JOURNAL_RECLAIM: bch2_trans_unlock(trans); - trace_trans_blocked_journal_reclaim(trans->ip, trace_ip); + trace_trans_blocked_journal_reclaim(trans->fn, trace_ip); wait_event_freezable(c->journal.reclaim_wait, (ret = journal_reclaim_wait_done(c))); @@ -807,7 +807,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (bch2_trans_relock(trans)) return 0; - trace_trans_restart_journal_reclaim(trans->ip, trace_ip); + trace_trans_restart_journal_reclaim(trans->fn, trace_ip); ret = -EINTR; break; default: @@ -902,7 +902,7 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans) } if (ret == -EINTR) - trace_trans_restart_mark(trans->ip, _RET_IP_, + trace_trans_restart_mark(trans->fn, _RET_IP_, i->btree_id, &i->path->pos); if (ret) return ret; @@ -932,7 +932,7 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans) BTREE_TRIGGER_OVERWRITE|i->flags); if (ret == -EINTR) - trace_trans_restart_mark(trans->ip, _RET_IP_, + trace_trans_restart_mark(trans->fn, _RET_IP_, i->btree_id, &i->path->pos); if (ret) return ret; @@ -999,7 +999,7 @@ int __bch2_trans_commit(struct btree_trans *trans) BUG_ON(!i->path->should_be_locked); if (unlikely(!bch2_btree_path_upgrade(trans, i->path, i->level + 1))) { - trace_trans_restart_upgrade(trans->ip, _RET_IP_, + trace_trans_restart_upgrade(trans->fn, _RET_IP_, i->btree_id, &i->path->pos); ret = btree_trans_restart(trans); goto out; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index bbdfccf24e53..10a737965beb 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -135,7 +135,6 @@ int __must_check bch2_write_inode(struct bch_fs *c, int ret; bch2_trans_init(&trans, c, 0, 512); - trans.ip = _RET_IP_; retry: bch2_trans_begin(&trans); diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index ff67e8572ea4..69d1f42fe0f8 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -546,94 +546,81 @@ TRACE_EVENT(copygc_wait, __entry->wait_amount, __entry->until) ); -TRACE_EVENT(transaction_restart_ip, - TP_PROTO(unsigned long caller, unsigned long ip), - TP_ARGS(caller, ip), - - TP_STRUCT__entry( - __field(unsigned long, caller ) - __field(unsigned long, ip ) - ), - - TP_fast_assign( - __entry->caller = caller; - __entry->ip = ip; - ), - - TP_printk("%pS %pS", (void *) __entry->caller, (void *) __entry->ip) -); - DECLARE_EVENT_CLASS(transaction_restart, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip), + TP_ARGS(trans_fn, caller_ip), TP_STRUCT__entry( - __field(unsigned long, trans_ip ) + __array(char, trans_fn, 24 ) __field(unsigned long, caller_ip ) ), TP_fast_assign( - __entry->trans_ip = trans_ip; + strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; ), - TP_printk("%pS %pS", - (void *) __entry->trans_ip, - (void *) __entry->caller_ip) + TP_printk("%s %pS", __entry->trans_fn, (void *) __entry->caller_ip) +); + +DEFINE_EVENT(transaction_restart, transaction_restart_ip, + TP_PROTO(const char *trans_fn, + unsigned long caller_ip), + TP_ARGS(trans_fn, caller_ip) ); DEFINE_EVENT(transaction_restart, trans_blocked_journal_reclaim, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip) + TP_ARGS(trans_fn, caller_ip) ); DEFINE_EVENT(transaction_restart, trans_restart_journal_res_get, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip) + TP_ARGS(trans_fn, caller_ip) ); DEFINE_EVENT(transaction_restart, trans_restart_journal_preres_get, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip) + TP_ARGS(trans_fn, caller_ip) ); DEFINE_EVENT(transaction_restart, trans_restart_journal_reclaim, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip) + TP_ARGS(trans_fn, caller_ip) ); DEFINE_EVENT(transaction_restart, trans_restart_fault_inject, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip) + TP_ARGS(trans_fn, caller_ip) ); DEFINE_EVENT(transaction_restart, trans_traverse_all, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip) + TP_ARGS(trans_fn, caller_ip) ); DEFINE_EVENT(transaction_restart, trans_restart_mark_replicas, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip), - TP_ARGS(trans_ip, caller_ip) + TP_ARGS(trans_fn, caller_ip) ); DECLARE_EVENT_CLASS(transaction_restart_iter, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos), + TP_ARGS(trans_fn, caller_ip, btree_id, pos), TP_STRUCT__entry( - __field(unsigned long, trans_ip ) + __array(char, trans_fn, 24 ) __field(unsigned long, caller_ip ) __field(u8, btree_id ) __field(u64, pos_inode ) @@ -642,7 +629,7 @@ DECLARE_EVENT_CLASS(transaction_restart_iter, ), TP_fast_assign( - __entry->trans_ip = trans_ip; + strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; __entry->btree_id = btree_id; __entry->pos_inode = pos->inode; @@ -650,8 +637,8 @@ DECLARE_EVENT_CLASS(transaction_restart_iter, __entry->pos_snapshot = pos->snapshot; ), - TP_printk("%ps %pS btree %u pos %llu:%llu:%u", - (void *) __entry->trans_ip, + TP_printk("%s %pS btree %u pos %llu:%llu:%u", + __entry->trans_fn, (void *) __entry->caller_ip, __entry->btree_id, __entry->pos_inode, @@ -660,63 +647,63 @@ DECLARE_EVENT_CLASS(transaction_restart_iter, ); DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_reused, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos) + TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos) + TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_mark, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos) + TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_upgrade, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos) + TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_iter_upgrade, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos) + TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_relock, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos) + TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, enum btree_id btree_id, struct bpos *pos), - TP_ARGS(trans_ip, caller_ip, btree_id, pos) + TP_ARGS(trans_fn, caller_ip, btree_id, pos) ); TRACE_EVENT(trans_restart_would_deadlock, - TP_PROTO(unsigned long trans_ip, + TP_PROTO(const char *trans_fn, unsigned long caller_ip, bool in_traverse_all, unsigned reason, @@ -726,12 +713,12 @@ TRACE_EVENT(trans_restart_would_deadlock, enum btree_id want_btree_id, unsigned want_iter_type, struct bpos *want_pos), - TP_ARGS(trans_ip, caller_ip, in_traverse_all, reason, + TP_ARGS(trans_fn, caller_ip, in_traverse_all, reason, have_btree_id, have_iter_type, have_pos, want_btree_id, want_iter_type, want_pos), TP_STRUCT__entry( - __field(unsigned long, trans_ip ) + __array(char, trans_fn, 24 ) __field(unsigned long, caller_ip ) __field(u8, in_traverse_all ) __field(u8, reason ) @@ -749,7 +736,7 @@ TRACE_EVENT(trans_restart_would_deadlock, ), TP_fast_assign( - __entry->trans_ip = trans_ip; + strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; __entry->in_traverse_all = in_traverse_all; __entry->reason = reason; @@ -767,8 +754,8 @@ TRACE_EVENT(trans_restart_would_deadlock, __entry->want_pos_snapshot = want_pos->snapshot; ), - TP_printk("%pS %pS traverse_all %u because %u have %u:%u %llu:%llu:%u want %u:%u %llu:%llu:%u", - (void *) __entry->trans_ip, + TP_printk("%s %pS traverse_all %u because %u have %u:%u %llu:%llu:%u want %u:%u %llu:%llu:%u", + __entry->trans_fn, (void *) __entry->caller_ip, __entry->in_traverse_all, __entry->reason, @@ -785,39 +772,40 @@ TRACE_EVENT(trans_restart_would_deadlock, ); TRACE_EVENT(trans_restart_would_deadlock_write, - TP_PROTO(unsigned long trans_ip), - TP_ARGS(trans_ip), + TP_PROTO(const char *trans_fn), + TP_ARGS(trans_fn), TP_STRUCT__entry( - __field(unsigned long, trans_ip ) + __array(char, trans_fn, 24 ) ), TP_fast_assign( - __entry->trans_ip = trans_ip; + strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); ), - TP_printk("%ps", (void *) __entry->trans_ip) + TP_printk("%s", __entry->trans_fn) ); TRACE_EVENT(trans_restart_mem_realloced, - TP_PROTO(unsigned long trans_ip, unsigned long caller_ip, + TP_PROTO(const char *trans_fn, + unsigned long caller_ip, unsigned long bytes), - TP_ARGS(trans_ip, caller_ip, bytes), + TP_ARGS(trans_fn, caller_ip, bytes), TP_STRUCT__entry( - __field(unsigned long, trans_ip ) + __array(char, trans_fn, 24 ) __field(unsigned long, caller_ip ) __field(unsigned long, bytes ) ), TP_fast_assign( - __entry->trans_ip = trans_ip; + strncpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn)); __entry->caller_ip = caller_ip; __entry->bytes = bytes; ), - TP_printk("%pS %pS bytes %lu", - (void *) __entry->trans_ip, + TP_printk("%s %pS bytes %lu", + __entry->trans_fn, (void *) __entry->caller_ip, __entry->bytes) ); -- cgit v1.2.3 From 7c8f6f980dc85fefea69dc1aa161fd2af2d8b3d5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 12 Jan 2022 02:13:21 -0500 Subject: bcachefs: btree_id_cached() Add a new helper that returns true if the given btree ID uses the btree key cache. This enables some new cleanups, since the helper can check the options for whether caching is enabled on a given btree. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 6 ++++++ fs/bcachefs/btree_iter.c | 3 +++ fs/bcachefs/fs.c | 4 ++-- fs/bcachefs/inode.c | 15 +++++---------- fs/bcachefs/inode.h | 2 +- fs/bcachefs/super.c | 4 ++++ 6 files changed, 21 insertions(+), 13 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 59c0963f785f..55db3c00f8dc 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -716,6 +716,7 @@ struct bch_fs { bool btree_trans_barrier_initialized; struct btree_key_cache btree_key_cache; + unsigned btree_key_cache_btrees; struct workqueue_struct *btree_update_wq; struct workqueue_struct *btree_io_complete_wq; @@ -952,6 +953,11 @@ static inline size_t btree_sectors(const struct bch_fs *c) return c->opts.btree_node_size >> 9; } +static inline bool btree_id_cached(const struct bch_fs *c, enum btree_id btree) +{ + return c->btree_key_cache_btrees & (1U << btree); +} + static inline struct timespec64 bch2_time_to_timespec(const struct bch_fs *c, s64 time) { struct timespec64 t; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 100305cf93bf..986ee0927e4e 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2860,6 +2860,9 @@ static void __bch2_trans_iter_init(struct btree_trans *trans, if (trans->journal_replay_not_finished) flags |= BTREE_ITER_WITH_JOURNAL; + if (!btree_id_cached(trans->c, btree_id)) + flags &= ~BTREE_ITER_CACHED; + iter->trans = trans; iter->path = NULL; iter->update_path = NULL; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 10a737965beb..9e8b085e36d7 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -105,7 +105,7 @@ void bch2_inode_update_after_write(struct btree_trans *trans, bch2_assert_pos_locked(trans, BTREE_ID_inodes, POS(0, bi->bi_inum), - 0 && c->opts.inodes_use_key_cache); + c->opts.inodes_use_key_cache); set_nlink(&inode->v, bch2_inode_nlink_get(bi)); i_uid_write(&inode->v, bi->bi_uid); @@ -1473,7 +1473,7 @@ static void bch2_evict_inode(struct inode *vinode) KEY_TYPE_QUOTA_WARN); bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, KEY_TYPE_QUOTA_WARN); - bch2_inode_rm(c, inode_inum(inode), true); + bch2_inode_rm(c, inode_inum(inode)); } } diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 79ee9ca2f1d0..9214f68f017c 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -252,15 +252,13 @@ int bch2_inode_peek(struct btree_trans *trans, u32 snapshot; int ret; - if (trans->c->opts.inodes_use_key_cache) - flags |= BTREE_ITER_CACHED; - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); if (ret) return ret; bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, - SPOS(0, inum.inum, snapshot), flags); + SPOS(0, inum.inum, snapshot), + flags|BTREE_ITER_CACHED); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) @@ -631,20 +629,16 @@ err: return ret; } -int bch2_inode_rm(struct bch_fs *c, subvol_inum inum, bool cached) +int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) { struct btree_trans trans; struct btree_iter iter = { NULL }; struct bkey_i_inode_generation delete; struct bch_inode_unpacked inode_u; struct bkey_s_c k; - unsigned iter_flags = BTREE_ITER_INTENT; u32 snapshot; int ret; - if (cached && c->opts.inodes_use_key_cache) - iter_flags |= BTREE_ITER_CACHED; - bch2_trans_init(&trans, c, 0, 1024); /* @@ -668,7 +662,8 @@ retry: goto err; bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes, - SPOS(0, inum.inum, snapshot), iter_flags); + SPOS(0, inum.inum, snapshot), + BTREE_ITER_INTENT|BTREE_ITER_CACHED); k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 723186d8afb6..77957cc7f9dd 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -87,7 +87,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *, int bch2_inode_create(struct btree_trans *, struct btree_iter *, struct bch_inode_unpacked *, u32, u64); -int bch2_inode_rm(struct bch_fs *, subvol_inum, bool); +int bch2_inode_rm(struct bch_fs *, subvol_inum); int bch2_inode_find_by_inum_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 3094eb1e3406..a90fa0ae550b 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -770,6 +770,10 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_opts_apply(&c->opts, opts); + c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc; + if (c->opts.inodes_use_key_cache) + c->btree_key_cache_btrees |= 1U << BTREE_ID_inodes; + c->block_bits = ilog2(block_sectors(c)); c->btree_foreground_merge_threshold = BTREE_FOREGROUND_MERGE_THRESHOLD(c); -- cgit v1.2.3 From fa8e94faeece12c20b541f647059f29867e98bc0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 25 Feb 2022 13:18:19 -0500 Subject: bcachefs: Heap allocate printbufs This patch changes printbufs dynamically allocate and reallocate a buffer as needed. Stack usage has become a bit of a problem, and a major cause of that has been static size string buffers on the stack. The most involved part of this refactoring is that printbufs must now be exited with printbuf_exit(). Signed-off-by: Kent Overstreet --- fs/bcachefs/bkey.c | 9 +- fs/bcachefs/bset.c | 51 +++++----- fs/bcachefs/btree_cache.c | 18 ++-- fs/bcachefs/btree_gc.c | 181 +++++++++++++++++++++------------ fs/bcachefs/btree_io.c | 103 ++++++++++--------- fs/bcachefs/btree_iter.c | 76 ++++++++------ fs/bcachefs/btree_update_interior.c | 31 +++--- fs/bcachefs/btree_update_leaf.c | 7 +- fs/bcachefs/buckets.c | 119 +++++++++++++--------- fs/bcachefs/clock.c | 2 + fs/bcachefs/debug.c | 42 ++++---- fs/bcachefs/ec.c | 7 +- fs/bcachefs/fs.c | 13 ++- fs/bcachefs/fsck.c | 178 ++++++++++++++++++++------------- fs/bcachefs/io.c | 14 +-- fs/bcachefs/journal.c | 24 +++-- fs/bcachefs/journal_io.c | 53 +++++----- fs/bcachefs/journal_reclaim.c | 11 +- fs/bcachefs/rebalance.c | 42 +++++--- fs/bcachefs/recovery.c | 22 +++- fs/bcachefs/replicas.c | 7 +- fs/bcachefs/super-io.c | 33 ++---- fs/bcachefs/super.c | 43 +++----- fs/bcachefs/sysfs.c | 193 +++++++++++++++--------------------- fs/bcachefs/tests.c | 14 ++- fs/bcachefs/util.c | 35 ++++++- fs/bcachefs/util.h | 78 ++++++++------- fs/bcachefs/xattr.c | 22 ++-- 28 files changed, 808 insertions(+), 620 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index 3e62eeb6774e..a1115abf83bb 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -57,11 +57,12 @@ static void bch2_bkey_pack_verify(const struct bkey_packed *packed, tmp = __bch2_bkey_unpack_key(format, packed); if (memcmp(&tmp, unpacked, sizeof(struct bkey))) { - char buf1[160], buf2[160]; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; char buf3[160], buf4[160]; - bch2_bkey_to_text(&PBUF(buf1), unpacked); - bch2_bkey_to_text(&PBUF(buf2), &tmp); + bch2_bkey_to_text(&buf1, unpacked); + bch2_bkey_to_text(&buf2, &tmp); bch2_to_binary(buf3, (void *) unpacked, 80); bch2_to_binary(buf4, high_word(format, packed), 80); @@ -72,7 +73,7 @@ static void bch2_bkey_pack_verify(const struct bkey_packed *packed, format->bits_per_field[2], format->bits_per_field[3], format->bits_per_field[4], - buf1, buf2, buf3, buf4); + buf1.buf, buf2.buf, buf3, buf4); } } diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index 58e510fa19bd..adea3cea343b 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -58,7 +58,7 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b, struct bkey_packed *_k, *_n; struct bkey uk, n; struct bkey_s_c k; - char buf[200]; + struct printbuf buf = PRINTBUF; if (!i->u64s) return; @@ -69,12 +69,14 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b, _n = bkey_next(_k); k = bkey_disassemble(b, _k, &uk); + + printbuf_reset(&buf); if (c) - bch2_bkey_val_to_text(&PBUF(buf), c, k); + bch2_bkey_val_to_text(&buf, c, k); else - bch2_bkey_to_text(&PBUF(buf), k.k); + bch2_bkey_to_text(&buf, k.k); printk(KERN_ERR "block %u key %5zu: %s\n", set, - _k->_data - i->_data, buf); + _k->_data - i->_data, buf.buf); if (_n == vstruct_last(i)) continue; @@ -90,6 +92,8 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b, !bpos_cmp(n.p, k.k->p)) printk(KERN_ERR "Duplicate keys\n"); } + + printbuf_exit(&buf); } void bch2_dump_btree_node(struct bch_fs *c, struct btree *b) @@ -106,6 +110,7 @@ void bch2_dump_btree_node_iter(struct btree *b, struct btree_node_iter *iter) { struct btree_node_iter_set *set; + struct printbuf buf = PRINTBUF; printk(KERN_ERR "btree node iter with %u/%u sets:\n", __btree_node_iter_used(iter), b->nsets); @@ -114,12 +119,14 @@ void bch2_dump_btree_node_iter(struct btree *b, struct bkey_packed *k = __btree_node_offset_to_key(b, set->k); struct bset_tree *t = bch2_bkey_to_bset(b, k); struct bkey uk = bkey_unpack_key(b, k); - char buf[100]; - bch2_bkey_to_text(&PBUF(buf), &uk); + printbuf_reset(&buf); + bch2_bkey_to_text(&buf, &uk); printk(KERN_ERR "set %zu key %u: %s\n", - t - b->set, set->k, buf); + t - b->set, set->k, buf.buf); } + + printbuf_exit(&buf); } #ifdef CONFIG_BCACHEFS_DEBUG @@ -155,13 +162,14 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter, struct btree_node_iter_set *set; struct bkey ku = bkey_unpack_key(b, k); struct bkey nu = bkey_unpack_key(b, n); - char buf1[80], buf2[80]; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; bch2_dump_btree_node(NULL, b); - bch2_bkey_to_text(&PBUF(buf1), &ku); - bch2_bkey_to_text(&PBUF(buf2), &nu); + bch2_bkey_to_text(&buf1, &ku); + bch2_bkey_to_text(&buf2, &nu); printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n", - buf1, buf2); + buf1.buf, buf2.buf); printk(KERN_ERR "iter was:"); btree_node_iter_for_each(_iter, set) { @@ -226,6 +234,8 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, struct bset_tree *t = bch2_bkey_to_bset(b, where); struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where); struct bkey_packed *next = (void *) (where->_data + clobber_u64s); + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; #if 0 BUG_ON(prev && bkey_iter_cmp(b, prev, insert) > 0); @@ -234,17 +244,15 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, bkey_iter_cmp(b, prev, insert) > 0) { struct bkey k1 = bkey_unpack_key(b, prev); struct bkey k2 = bkey_unpack_key(b, insert); - char buf1[100]; - char buf2[100]; bch2_dump_btree_node(NULL, b); - bch2_bkey_to_text(&PBUF(buf1), &k1); - bch2_bkey_to_text(&PBUF(buf2), &k2); + bch2_bkey_to_text(&buf1, &k1); + bch2_bkey_to_text(&buf2, &k2); panic("prev > insert:\n" "prev key %s\n" "insert key %s\n", - buf1, buf2); + buf1.buf, buf2.buf); } #endif #if 0 @@ -255,17 +263,15 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, bkey_iter_cmp(b, insert, next) > 0) { struct bkey k1 = bkey_unpack_key(b, insert); struct bkey k2 = bkey_unpack_key(b, next); - char buf1[100]; - char buf2[100]; bch2_dump_btree_node(NULL, b); - bch2_bkey_to_text(&PBUF(buf1), &k1); - bch2_bkey_to_text(&PBUF(buf2), &k2); + bch2_bkey_to_text(&buf1, &k1); + bch2_bkey_to_text(&buf2, &k2); panic("insert > next:\n" "insert key %s\n" "next key %s\n", - buf1, buf2); + buf1.buf, buf2.buf); } #endif } @@ -1555,9 +1561,6 @@ void bch2_bfloat_to_text(struct printbuf *out, struct btree *b, struct bkey uk; unsigned j, inorder; - if (out->pos != out->end) - *out->pos = '\0'; - if (!bset_has_ro_aux_tree(t)) return; diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index c17db1d07187..dbf3b084478f 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -742,14 +742,16 @@ static int lock_node_check_fn(struct six_lock *lock, void *p) static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) { - char buf1[200], buf2[100], buf3[100]; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; + struct printbuf buf3 = PRINTBUF; if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) return; - bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&b->key)); - bch2_bpos_to_text(&PBUF(buf2), b->data->min_key); - bch2_bpos_to_text(&PBUF(buf3), b->data->max_key); + bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&b->key)); + bch2_bpos_to_text(&buf2, b->data->min_key); + bch2_bpos_to_text(&buf3, b->data->max_key); bch2_fs_inconsistent(c, "btree node header doesn't match ptr\n" "btree %s level %u\n" @@ -757,10 +759,14 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) "header: btree %s level %llu\n" "min %s max %s\n", bch2_btree_ids[b->c.btree_id], b->c.level, - buf1, + buf1.buf, bch2_btree_ids[BTREE_NODE_ID(b->data)], BTREE_NODE_LEVEL(b->data), - buf2, buf3); + buf2.buf, buf3.buf); + + printbuf_exit(&buf3); + printbuf_exit(&buf2); + printbuf_exit(&buf1); } static inline void btree_check_header(struct bch_fs *c, struct btree *b) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index fbd54ac790ba..8eae5fb35c84 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -70,23 +70,23 @@ static int bch2_gc_check_topology(struct bch_fs *c, struct bpos expected_start = bkey_deleted(&prev->k->k) ? node_start : bpos_successor(prev->k->k.p); - char buf1[200], buf2[200]; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; int ret = 0; if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) { struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k); - if (bkey_deleted(&prev->k->k)) { - struct printbuf out = PBUF(buf1); - pr_buf(&out, "start of node: "); - bch2_bpos_to_text(&out, node_start); - } else { - bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev->k)); - } - if (bpos_cmp(expected_start, bp->v.min_key)) { bch2_topology_error(c); + if (bkey_deleted(&prev->k->k)) { + pr_buf(&buf1, "start of node: "); + bch2_bpos_to_text(&buf1, node_start); + } else { + bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(prev->k)); + } + bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k)); + if (__fsck_err(c, FSCK_CAN_FIX| FSCK_CAN_IGNORE| @@ -95,11 +95,11 @@ static int bch2_gc_check_topology(struct bch_fs *c, " prev %s\n" " cur %s", bch2_btree_ids[b->c.btree_id], b->c.level, - buf1, - (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(cur.k)), buf2)) && + buf1.buf, buf2.buf) && !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { bch_info(c, "Halting mark and sweep to start topology repair pass"); - return FSCK_ERR_START_TOPOLOGY_REPAIR; + ret = FSCK_ERR_START_TOPOLOGY_REPAIR; + goto err; } else { set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); } @@ -109,6 +109,12 @@ static int bch2_gc_check_topology(struct bch_fs *c, if (is_last && bpos_cmp(cur.k->k.p, node_end)) { bch2_topology_error(c); + printbuf_reset(&buf1); + printbuf_reset(&buf2); + + bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k)); + bch2_bpos_to_text(&buf2, node_end); + if (__fsck_err(c, FSCK_CAN_FIX| FSCK_CAN_IGNORE| @@ -117,18 +123,21 @@ static int bch2_gc_check_topology(struct bch_fs *c, " %s\n" " expected %s", bch2_btree_ids[b->c.btree_id], b->c.level, - (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(cur.k)), buf1), - (bch2_bpos_to_text(&PBUF(buf2), node_end), buf2)) && + buf1.buf, buf2.buf) && !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { bch_info(c, "Halting mark and sweep to start topology repair pass"); - return FSCK_ERR_START_TOPOLOGY_REPAIR; + ret = FSCK_ERR_START_TOPOLOGY_REPAIR; + goto err; } else { set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags); } } bch2_bkey_buf_copy(prev, c, cur.k); +err: fsck_err: + printbuf_exit(&buf2); + printbuf_exit(&buf1); return ret; } @@ -251,18 +260,17 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, struct bpos expected_start = !prev ? b->data->min_key : bpos_successor(prev->key.k.p); - char buf1[200], buf2[200]; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; int ret = 0; if (!prev) { - struct printbuf out = PBUF(buf1); - pr_buf(&out, "start of node: "); - bch2_bpos_to_text(&out, b->data->min_key); + pr_buf(&buf1, "start of node: "); + bch2_bpos_to_text(&buf1, b->data->min_key); } else { - bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&prev->key)); + bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&prev->key)); } - bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&cur->key)); + bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&cur->key)); if (prev && bpos_cmp(expected_start, cur->data->min_key) > 0 && @@ -275,8 +283,10 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, " node %s\n" " next %s", bch2_btree_ids[b->c.btree_id], b->c.level, - buf1, buf2)) - return DROP_PREV_NODE; + buf1.buf, buf2.buf)) { + ret = DROP_PREV_NODE; + goto out; + } if (mustfix_fsck_err_on(bpos_cmp(prev->key.k.p, bpos_predecessor(cur->data->min_key)), c, @@ -284,7 +294,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, " node %s\n" " next %s", bch2_btree_ids[b->c.btree_id], b->c.level, - buf1, buf2)) + buf1.buf, buf2.buf)) ret = set_node_max(c, prev, bpos_predecessor(cur->data->min_key)); } else { @@ -296,39 +306,49 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, " prev %s\n" " node %s", bch2_btree_ids[b->c.btree_id], b->c.level, - buf1, buf2)) - return DROP_THIS_NODE; + buf1.buf, buf2.buf)) { + ret = DROP_THIS_NODE; + goto out; + } if (mustfix_fsck_err_on(bpos_cmp(expected_start, cur->data->min_key), c, "btree node with incorrect min_key at btree %s level %u:\n" " prev %s\n" " node %s", bch2_btree_ids[b->c.btree_id], b->c.level, - buf1, buf2)) + buf1.buf, buf2.buf)) ret = set_node_min(c, cur, expected_start); } +out: fsck_err: + printbuf_exit(&buf2); + printbuf_exit(&buf1); return ret; } static int btree_repair_node_end(struct bch_fs *c, struct btree *b, struct btree *child) { - char buf1[200], buf2[200]; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; int ret = 0; + bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&child->key)); + bch2_bpos_to_text(&buf2, b->key.k.p); + if (mustfix_fsck_err_on(bpos_cmp(child->key.k.p, b->key.k.p), c, "btree node with incorrect max_key at btree %s level %u:\n" " %s\n" " expected %s", bch2_btree_ids[b->c.btree_id], b->c.level, - (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&child->key)), buf1), - (bch2_bpos_to_text(&PBUF(buf2), b->key.k.p), buf2))) { + buf1.buf, buf2.buf)) { ret = set_node_max(c, child, b->key.k.p); if (ret) - return ret; + goto err; } +err: fsck_err: + printbuf_exit(&buf2); + printbuf_exit(&buf1); return ret; } @@ -339,7 +359,7 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b) struct bkey_buf prev_k, cur_k; struct btree *prev = NULL, *cur = NULL; bool have_child, dropped_children = false; - char buf[200]; + struct printbuf buf; int ret = 0; if (!b->c.level) @@ -363,12 +383,15 @@ again: false); ret = PTR_ERR_OR_ZERO(cur); + printbuf_reset(&buf); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); + if (mustfix_fsck_err_on(ret == -EIO, c, "Unreadable btree node at btree %s level %u:\n" " %s", bch2_btree_ids[b->c.btree_id], b->c.level - 1, - (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(cur_k.k)), buf))) { + buf.buf)) { bch2_btree_node_evict(c, cur_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, cur_k.k->k.p); @@ -468,12 +491,14 @@ again: have_child = true; } + printbuf_reset(&buf); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + if (mustfix_fsck_err_on(!have_child, c, "empty interior btree node at btree %s level %u\n" " %s", bch2_btree_ids[b->c.btree_id], - b->c.level, - (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(&b->key)), buf))) + b->c.level, buf.buf)) ret = DROP_THIS_NODE; err: fsck_err: @@ -489,6 +514,7 @@ fsck_err: if (!ret && dropped_children) goto again; + printbuf_exit(&buf); return ret; } @@ -524,7 +550,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, const union bch_extent_entry *entry; struct extent_ptr_decoded p = { 0 }; bool do_update = false; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret = 0; /* @@ -542,7 +568,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[ptr_data_type(k->k, &p.ptr)], p.ptr.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) { if (!p.ptr.cached) { g->_mark.gen = p.ptr.gen; g->gen_valid = true; @@ -557,7 +584,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[ptr_data_type(k->k, &p.ptr)], p.ptr.gen, g->mark.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) { if (!p.ptr.cached) { g->_mark.gen = p.ptr.gen; g->gen_valid = true; @@ -576,7 +604,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->mark.gen, bch2_data_types[ptr_data_type(k->k, &p.ptr)], p.ptr.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) do_update = true; if (fsck_err_on(!p.ptr.cached && @@ -586,7 +615,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[ptr_data_type(k->k, &p.ptr)], p.ptr.gen, g->mark.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) do_update = true; if (data_type != BCH_DATA_btree && p.ptr.gen != g->mark.gen) @@ -599,7 +629,8 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), bch2_data_types[g->mark.data_type], bch2_data_types[data_type], - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) { if (data_type == BCH_DATA_btree) { g->_mark.data_type = data_type; set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); @@ -615,14 +646,16 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, "pointer to nonexistent stripe %llu\n" "while marking %s", (u64) p.ec.idx, - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) do_update = true; if (fsck_err_on(!bch2_ptr_matches_stripe_m(m, p), c, "pointer does not match stripe %llu\n" "while marking %s", (u64) p.ec.idx, - (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) do_update = true; } } @@ -635,13 +668,15 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, if (is_root) { bch_err(c, "cannot update btree roots yet"); - return -EINVAL; + ret = -EINVAL; + goto err; } new = kmalloc(bkey_bytes(k->k), GFP_KERNEL); if (!new) { bch_err(c, "%s: error allocating new key", __func__); - return -ENOMEM; + ret = -ENOMEM; + goto err; } bkey_reassemble(new, *k); @@ -705,19 +740,25 @@ found: ret = bch2_journal_key_insert_take(c, btree_id, level, new); if (ret) { kfree(new); - return ret; + goto err; } if (level) bch2_btree_node_update_key_early(c, btree_id, level - 1, *k, new); - bch2_bkey_val_to_text(&PBUF(buf), c, *k); - bch_info(c, "updated %s", buf); - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(new)); - bch_info(c, "new key %s", buf); + printbuf_reset(&buf); + bch2_bkey_val_to_text(&buf, c, *k); + bch_info(c, "updated %s", buf.buf); + + printbuf_reset(&buf); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new)); + bch_info(c, "new key %s", buf.buf); + *k = bkey_i_to_s_c(new); } +err: fsck_err: + printbuf_exit(&buf); return ret; } @@ -852,7 +893,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b struct btree_and_journal_iter iter; struct bkey_s_c k; struct bkey_buf cur, prev; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret = 0; bch2_btree_and_journal_iter_init_node_iter(&iter, c, b); @@ -913,7 +954,8 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b " %s", bch2_btree_ids[b->c.btree_id], b->c.level - 1, - (bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(cur.k)), buf)) && + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) && !test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) { ret = FSCK_ERR_START_TOPOLOGY_REPAIR; bch_info(c, "Halting mark and sweep to start topology repair pass"); @@ -943,6 +985,7 @@ fsck_err: bch2_bkey_buf_exit(&cur, c); bch2_bkey_buf_exit(&prev, c); bch2_btree_and_journal_iter_exit(&iter); + printbuf_exit(&buf); return ret; } @@ -956,7 +999,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans, : bch2_expensive_debug_checks ? 0 : !btree_node_type_needs_gc(btree_id) ? 1 : 0; - char buf[100]; + struct printbuf buf = PRINTBUF; int ret = 0; b = c->btree_roots[btree_id].b; @@ -965,17 +1008,19 @@ static int bch2_gc_btree_init(struct btree_trans *trans, return 0; six_lock_read(&b->c.lock, NULL, NULL); + printbuf_reset(&buf); + bch2_bpos_to_text(&buf, b->data->min_key); if (mustfix_fsck_err_on(bpos_cmp(b->data->min_key, POS_MIN), c, - "btree root with incorrect min_key: %s", - (bch2_bpos_to_text(&PBUF(buf), b->data->min_key), buf))) { + "btree root with incorrect min_key: %s", buf.buf)) { bch_err(c, "repair unimplemented"); ret = FSCK_ERR_EXIT; goto fsck_err; } + printbuf_reset(&buf); + bch2_bpos_to_text(&buf, b->data->max_key); if (mustfix_fsck_err_on(bpos_cmp(b->data->max_key, SPOS_MAX), c, - "btree root with incorrect max_key: %s", - (bch2_bpos_to_text(&PBUF(buf), b->data->max_key), buf))) { + "btree root with incorrect max_key: %s", buf.buf)) { bch_err(c, "repair unimplemented"); ret = FSCK_ERR_EXIT; goto fsck_err; @@ -995,6 +1040,7 @@ fsck_err: if (ret < 0) bch_err(c, "%s: ret %i", __func__, ret); + printbuf_exit(&buf); return ret; } @@ -1131,6 +1177,7 @@ static int bch2_gc_done(struct bch_fs *c, bool initial, bool metadata_only) { struct bch_dev *ca = NULL; + struct printbuf buf = PRINTBUF; bool verify = !metadata_only && (!initial || (c->sb.compat & (1ULL << BCH_COMPAT_alloc_info))); unsigned i, dev; @@ -1201,16 +1248,16 @@ static int bch2_gc_done(struct bch_fs *c, for (i = 0; i < c->replicas.nr; i++) { struct bch_replicas_entry *e = cpu_replicas_entry(&c->replicas, i); - char buf[80]; if (metadata_only && (e->data_type == BCH_DATA_user || e->data_type == BCH_DATA_cached)) continue; - bch2_replicas_entry_to_text(&PBUF(buf), e); + printbuf_reset(&buf); + bch2_replicas_entry_to_text(&buf, e); - copy_fs_field(replicas[i], "%s", buf); + copy_fs_field(replicas[i], "%s", buf.buf); } } @@ -1225,6 +1272,7 @@ fsck_err: bch_err(c, "%s: ret %i", __func__, ret); percpu_up_write(&c->mark_lock); + printbuf_exit(&buf); return ret; } @@ -1424,7 +1472,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) struct bkey_s_c k; struct reflink_gc *r; size_t idx = 0; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret = 0; if (metadata_only) @@ -1452,7 +1500,8 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) "reflink key has wrong refcount:\n" " %s\n" " should be %u", - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf), + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf), r->refcount)) { struct bkey_i *new; @@ -1481,6 +1530,7 @@ fsck_err: bch2_trans_iter_exit(&trans, &iter); c->reflink_gc_nr = 0; bch2_trans_exit(&trans); + printbuf_exit(&buf); return ret; } @@ -1539,7 +1589,7 @@ static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only) struct bkey_s_c k; struct gc_stripe *m; const struct bch_stripe *s; - char buf[200]; + struct printbuf buf = PRINTBUF; unsigned i; int ret = 0; @@ -1565,7 +1615,8 @@ inconsistent: "stripe has wrong block sector count %u:\n" " %s\n" " should be %u", i, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf), + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf), m ? m->block_sectors[i] : 0)) { struct bkey_i_stripe *new; @@ -1589,6 +1640,8 @@ fsck_err: bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); + + printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index c65c640753b6..1dc21b5948ea 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -534,13 +534,7 @@ enum btree_validate_ret { #define btree_err(type, c, ca, b, i, msg, ...) \ ({ \ __label__ out; \ - char _buf[300]; \ - char *_buf2 = _buf; \ - struct printbuf out = PBUF(_buf); \ - \ - _buf2 = kmalloc(4096, GFP_ATOMIC); \ - if (_buf2) \ - out = _PBUF(_buf2, 4986); \ + struct printbuf out = PRINTBUF; \ \ btree_err_msg(&out, c, ca, b, i, b->written, write); \ pr_buf(&out, ": " msg, ##__VA_ARGS__); \ @@ -548,14 +542,13 @@ enum btree_validate_ret { if (type == BTREE_ERR_FIXABLE && \ write == READ && \ !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \ - mustfix_fsck_err(c, "%s", _buf2); \ + mustfix_fsck_err(c, "%s", out.buf); \ goto out; \ } \ \ switch (write) { \ case READ: \ - if (_buf2) \ - bch_err(c, "%s", _buf2); \ + bch_err(c, "%s", out.buf); \ \ switch (type) { \ case BTREE_ERR_FIXABLE: \ @@ -576,7 +569,7 @@ enum btree_validate_ret { } \ break; \ case WRITE: \ - bch_err(c, "corrupt metadata before write: %s", _buf2); \ + bch_err(c, "corrupt metadata before write: %s", out.buf);\ \ if (bch2_fs_inconsistent(c)) { \ ret = BCH_FSCK_ERRORS_NOT_FIXED; \ @@ -585,8 +578,7 @@ enum btree_validate_ret { break; \ } \ out: \ - if (_buf2 != _buf) \ - kfree(_buf2); \ + printbuf_exit(&out); \ true; \ }) @@ -648,8 +640,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, { unsigned version = le16_to_cpu(i->version); const char *err; - char buf1[100]; - char buf2[100]; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; int ret = 0; btree_err_on((version != BCH_BSET_VERSION_OLD && @@ -686,7 +678,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BTREE_ERR_FIXABLE, c, ca, b, i, "bset past end of btree node")) { i->u64s = 0; - return 0; + ret = 0; + goto out; } btree_err_on(offset && !i->u64s, @@ -737,14 +730,17 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, btree_err_on(bpos_cmp(b->data->min_key, bp->min_key), BTREE_ERR_MUST_RETRY, c, ca, b, NULL, "incorrect min_key: got %s should be %s", - (bch2_bpos_to_text(&PBUF(buf1), bn->min_key), buf1), - (bch2_bpos_to_text(&PBUF(buf2), bp->min_key), buf2)); + (printbuf_reset(&buf1), + bch2_bpos_to_text(&buf1, bn->min_key), buf1.buf), + (printbuf_reset(&buf2), + bch2_bpos_to_text(&buf2, bp->min_key), buf2.buf)); } btree_err_on(bpos_cmp(bn->max_key, b->key.k.p), BTREE_ERR_MUST_RETRY, c, ca, b, i, "incorrect max key %s", - (bch2_bpos_to_text(&PBUF(buf1), bn->max_key), buf1)); + (printbuf_reset(&buf1), + bch2_bpos_to_text(&buf1, bn->max_key), buf1.buf)); if (write) compat_btree_node(b->c.level, b->c.btree_id, version, @@ -759,7 +755,10 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BSET_BIG_ENDIAN(i), write, &bn->format); } +out: fsck_err: + printbuf_exit(&buf2); + printbuf_exit(&buf1); return ret; } @@ -769,6 +768,8 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, { unsigned version = le16_to_cpu(i->version); struct bkey_packed *k, *prev = NULL; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); int ret = 0; @@ -807,11 +808,10 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, (!updated_range ? bch2_bkey_in_btree_node(b, u.s_c) : NULL) ?: (write ? bch2_bkey_val_invalid(c, u.s_c) : NULL); if (invalid) { - char buf[160]; - - bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c); + printbuf_reset(&buf1); + bch2_bkey_val_to_text(&buf1, c, u.s_c); btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i, - "invalid bkey: %s\n%s", invalid, buf); + "invalid bkey: %s\n%s", invalid, buf1.buf); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); memmove_u64s_down(k, bkey_next(k), @@ -825,18 +825,18 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, &b->format, k); if (prev && bkey_iter_cmp(b, prev, k) > 0) { - char buf1[80]; - char buf2[80]; struct bkey up = bkey_unpack_key(b, prev); - bch2_bkey_to_text(&PBUF(buf1), &up); - bch2_bkey_to_text(&PBUF(buf2), u.k); + printbuf_reset(&buf1); + bch2_bkey_to_text(&buf1, &up); + printbuf_reset(&buf2); + bch2_bkey_to_text(&buf2, u.k); bch2_dump_bset(c, b, i, 0); if (btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i, "keys out of order: %s > %s", - buf1, buf2)) { + buf1.buf, buf2.buf)) { i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); memmove_u64s_down(k, bkey_next(k), (u64 *) vstruct_end(i) - (u64 *) k); @@ -848,6 +848,8 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, k = bkey_next(k); } fsck_err: + printbuf_exit(&buf2); + printbuf_exit(&buf1); return ret; } @@ -1063,11 +1065,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, if (invalid || (bch2_inject_invalid_keys && !bversion_cmp(u.k->version, MAX_VERSION))) { - char buf[160]; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c); + bch2_bkey_val_to_text(&buf, c, u.s_c); btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i, "invalid bkey %s: %s", buf, invalid); + printbuf_exit(&buf); btree_keys_account_key_drop(&b->nr, 0, k); @@ -1124,8 +1127,7 @@ static void btree_node_read_work(struct work_struct *work) struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev); struct bio *bio = &rb->bio; struct bch_io_failures failed = { .nr = 0 }; - char buf[200]; - struct printbuf out; + struct printbuf buf = PRINTBUF; bool saw_error = false; bool can_retry; @@ -1145,10 +1147,10 @@ static void btree_node_read_work(struct work_struct *work) bio->bi_status = BLK_STS_REMOVED; } start: - out = PBUF(buf); - btree_pos_to_text(&out, c, b); + printbuf_reset(&buf); + btree_pos_to_text(&buf, c, b); bch2_dev_io_err_on(bio->bi_status, ca, "btree read error %s for %s", - bch2_blk_status_to_str(bio->bi_status), buf); + bch2_blk_status_to_str(bio->bi_status), buf.buf); if (rb->have_ioref) percpu_ref_put(&ca->io_ref); rb->have_ioref = false; @@ -1174,6 +1176,7 @@ start: bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], rb->start_time); bio_put(&rb->bio); + printbuf_exit(&buf); if (saw_error && !btree_node_read_error(b)) bch2_btree_node_rewrite_async(c, b); @@ -1254,6 +1257,7 @@ static void btree_node_read_all_replicas_done(struct closure *cl) container_of(cl, struct btree_node_read_all, cl); struct bch_fs *c = ra->c; struct btree *b = ra->b; + struct printbuf buf = PRINTBUF; bool dump_bset_maps = false; bool have_retry = false; int ret = 0, best = -1, write = READ; @@ -1297,8 +1301,6 @@ static void btree_node_read_all_replicas_done(struct closure *cl) fsck_err: if (dump_bset_maps) { for (i = 0; i < ra->nr; i++) { - char buf[200]; - struct printbuf out = PBUF(buf); struct btree_node *bn = ra->buf[i]; struct btree_node_entry *bne = NULL; unsigned offset = 0, sectors; @@ -1307,6 +1309,8 @@ fsck_err: if (ra->err[i]) continue; + printbuf_reset(&buf); + while (offset < btree_sectors(c)) { if (!offset) { sectors = vstruct_sectors(bn, c->block_bits); @@ -1317,10 +1321,10 @@ fsck_err: sectors = vstruct_sectors(bne, c->block_bits); } - pr_buf(&out, " %u-%u", offset, offset + sectors); + pr_buf(&buf, " %u-%u", offset, offset + sectors); if (bne && bch2_journal_seq_is_blacklisted(c, le64_to_cpu(bne->keys.journal_seq), false)) - pr_buf(&out, "*"); + pr_buf(&buf, "*"); offset += sectors; } @@ -1328,19 +1332,19 @@ fsck_err: bne = ra->buf[i] + (offset << 9); if (bne->keys.seq == bn->keys.seq) { if (!gap) - pr_buf(&out, " GAP"); + pr_buf(&buf, " GAP"); gap = true; sectors = vstruct_sectors(bne, c->block_bits); - pr_buf(&out, " %u-%u", offset, offset + sectors); + pr_buf(&buf, " %u-%u", offset, offset + sectors); if (bch2_journal_seq_is_blacklisted(c, le64_to_cpu(bne->keys.journal_seq), false)) - pr_buf(&out, "*"); + pr_buf(&buf, "*"); } offset++; } - bch_err(c, "replica %u:%s", i, buf); + bch_err(c, "replica %u:%s", i, buf.buf); } } @@ -1361,6 +1365,7 @@ fsck_err: closure_debug_destroy(&ra->cl); kfree(ra); + printbuf_exit(&buf); clear_btree_node_read_in_flight(b); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); @@ -1461,23 +1466,23 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b, struct btree_read_bio *rb; struct bch_dev *ca; struct bio *bio; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret; - btree_pos_to_text(&PBUF(buf), c, b); + btree_pos_to_text(&buf, c, b); trace_btree_read(c, b); if (bch2_verify_all_btree_replicas && !btree_node_read_all_replicas(c, b, sync)) - return; + goto out; ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick); if (bch2_fs_fatal_err_on(ret <= 0, c, "btree node read error: no device to read from\n" - " at %s", buf)) { + " at %s", buf.buf)) { set_btree_node_read_error(b); - return; + goto out; } ca = bch_dev_bkey_exists(c, pick.ptr.dev); @@ -1519,6 +1524,8 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b, else queue_work(c->io_complete_wq, &rb->work); } +out: + printbuf_exit(&buf); } int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index b58219292f34..92258281fdc7 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -574,7 +574,9 @@ static void bch2_btree_path_verify_level(struct btree_trans *trans, struct btree_node_iter tmp; bool locked; struct bkey_packed *p, *k; - char buf1[100], buf2[100], buf3[100]; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; + struct printbuf buf3 = PRINTBUF; const char *msg; if (!bch2_debug_check_iterators) @@ -622,26 +624,27 @@ static void bch2_btree_path_verify_level(struct btree_trans *trans, btree_node_unlock(path, level); return; err: - strcpy(buf2, "(none)"); - strcpy(buf3, "(none)"); - - bch2_bpos_to_text(&PBUF(buf1), path->pos); + bch2_bpos_to_text(&buf1, path->pos); if (p) { struct bkey uk = bkey_unpack_key(l->b, p); - bch2_bkey_to_text(&PBUF(buf2), &uk); + bch2_bkey_to_text(&buf2, &uk); + } else { + pr_buf(&buf2, "(none)"); } if (k) { struct bkey uk = bkey_unpack_key(l->b, k); - bch2_bkey_to_text(&PBUF(buf3), &uk); + bch2_bkey_to_text(&buf3, &uk); + } else { + pr_buf(&buf3, "(none)"); } panic("path should be %s key at level %u:\n" "path pos %s\n" "prev key %s\n" "cur key %s\n", - msg, level, buf1, buf2, buf3); + msg, level, buf1.buf, buf2.buf, buf3.buf); } static void bch2_btree_path_verify(struct btree_trans *trans, @@ -739,16 +742,16 @@ static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k if (!bkey_cmp(prev.k->p, k.k->p) && bch2_snapshot_is_ancestor(trans->c, iter->snapshot, prev.k->p.snapshot) > 0) { - char buf1[100], buf2[200]; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; - bch2_bkey_to_text(&PBUF(buf1), k.k); - bch2_bkey_to_text(&PBUF(buf2), prev.k); + bch2_bkey_to_text(&buf1, k.k); + bch2_bkey_to_text(&buf2, prev.k); panic("iter snap %u\n" "k %s\n" "prev %s\n", iter->snapshot, - buf1, buf2); + buf1.buf, buf2.buf); } out: bch2_trans_iter_exit(trans, ©); @@ -760,7 +763,7 @@ void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, { struct btree_path *path; unsigned idx; - char buf[100]; + struct printbuf buf = PRINTBUF; trans_for_each_path_inorder(trans, path, idx) { int cmp = cmp_int(path->btree_id, id) ?: @@ -786,9 +789,10 @@ void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id, } bch2_dump_trans_paths_updates(trans); + bch2_bpos_to_text(&buf, pos); + panic("not locked: %s %s%s\n", - bch2_btree_ids[id], - (bch2_bpos_to_text(&PBUF(buf), pos), buf), + bch2_btree_ids[id], buf.buf, key_cache ? " cached" : ""); } @@ -1071,23 +1075,23 @@ static void btree_path_verify_new_node(struct btree_trans *trans, if (!k || bkey_deleted(k) || bkey_cmp_left_packed(l->b, k, &b->key.k.p)) { - char buf1[100]; - char buf2[100]; - char buf3[100]; - char buf4[100]; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; + struct printbuf buf3 = PRINTBUF; + struct printbuf buf4 = PRINTBUF; struct bkey uk = bkey_unpack_key(b, k); bch2_dump_btree_node(c, l->b); - bch2_bpos_to_text(&PBUF(buf1), path->pos); - bch2_bkey_to_text(&PBUF(buf2), &uk); - bch2_bpos_to_text(&PBUF(buf3), b->data->min_key); - bch2_bpos_to_text(&PBUF(buf3), b->data->max_key); + bch2_bpos_to_text(&buf1, path->pos); + bch2_bkey_to_text(&buf2, &uk); + bch2_bpos_to_text(&buf3, b->data->min_key); + bch2_bpos_to_text(&buf3, b->data->max_key); panic("parent iter doesn't point to new node:\n" "iter pos %s %s\n" "iter key %s\n" "new node %s-%s\n", - bch2_btree_ids[path->btree_id], buf1, - buf2, buf3, buf4); + bch2_btree_ids[path->btree_id], + buf1.buf, buf2.buf, buf3.buf, buf4.buf); } if (!parent_locked) @@ -1783,18 +1787,22 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans) { struct btree_path *path; struct btree_insert_entry *i; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; unsigned idx; - char buf1[300], buf2[300]; btree_trans_sort_paths(trans); - trans_for_each_path_inorder(trans, path, idx) + trans_for_each_path_inorder(trans, path, idx) { + printbuf_reset(&buf1); + + bch2_bpos_to_text(&buf1, path->pos); + printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree %s pos %s locks %u %pS\n", path->idx, path->ref, path->intent_ref, path->should_be_locked ? " S" : "", path->preserve ? " P" : "", bch2_btree_ids[path->btree_id], - (bch2_bpos_to_text(&PBUF(buf1), path->pos), buf1), + buf1.buf, path->nodes_locked, #ifdef CONFIG_BCACHEFS_DEBUG (void *) path->ip_allocated @@ -1802,17 +1810,25 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans) NULL #endif ); + } trans_for_each_update(trans, i) { struct bkey u; struct bkey_s_c old = bch2_btree_path_peek_slot(i->path, &u); + printbuf_reset(&buf1); + printbuf_reset(&buf2); + bch2_bkey_val_to_text(&buf1, trans->c, old); + bch2_bkey_val_to_text(&buf2, trans->c, bkey_i_to_s_c(i->k)); + printk(KERN_ERR "update: btree %s %pS\n old %s\n new %s", bch2_btree_ids[i->btree_id], (void *) i->ip_allocated, - (bch2_bkey_val_to_text(&PBUF(buf1), trans->c, old), buf1), - (bch2_bkey_val_to_text(&PBUF(buf2), trans->c, bkey_i_to_s_c(i->k)), buf2)); + buf1.buf, buf2.buf); } + + printbuf_exit(&buf2); + printbuf_exit(&buf1); } static struct btree_path *btree_path_alloc(struct btree_trans *trans, diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 255753b2dc0e..ed0a70f7ea68 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -41,7 +41,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) struct bkey_s_c k; struct bkey_s_c_btree_ptr_v2 bp; struct bkey unpacked; - char buf1[100], buf2[100]; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; BUG_ON(!b->c.level); @@ -58,9 +58,9 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) if (bpos_cmp(next_node, bp.v->min_key)) { bch2_dump_btree_node(c, b); - panic("expected next min_key %s got %s\n", - (bch2_bpos_to_text(&PBUF(buf1), next_node), buf1), - (bch2_bpos_to_text(&PBUF(buf2), bp.v->min_key), buf2)); + bch2_bpos_to_text(&buf1, next_node); + bch2_bpos_to_text(&buf2, bp.v->min_key); + panic("expected next min_key %s got %s\n", buf1.buf, buf2.buf); } bch2_btree_node_iter_advance(&iter, b); @@ -68,9 +68,9 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) if (bch2_btree_node_iter_end(&iter)) { if (bpos_cmp(k.k->p, b->key.k.p)) { bch2_dump_btree_node(c, b); - panic("expected end %s got %s\n", - (bch2_bpos_to_text(&PBUF(buf1), b->key.k.p), buf1), - (bch2_bpos_to_text(&PBUF(buf2), k.k->p), buf2)); + bch2_bpos_to_text(&buf1, b->key.k.p); + bch2_bpos_to_text(&buf2, k.k->p); + panic("expected end %s got %s\n", buf1.buf, buf2.buf); } break; } @@ -1151,10 +1151,11 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?: bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert)); if (invalid) { - char buf[160]; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(insert)); - bch2_fs_inconsistent(c, "inserting invalid bkey %s: %s", buf, invalid); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); + bch2_fs_inconsistent(c, "inserting invalid bkey %s: %s", buf.buf, invalid); + printbuf_exit(&buf); dump_stack(); } @@ -1636,15 +1637,17 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, } if (bkey_cmp(bpos_successor(prev->data->max_key), next->data->min_key)) { - char buf1[100], buf2[100]; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; - bch2_bpos_to_text(&PBUF(buf1), prev->data->max_key); - bch2_bpos_to_text(&PBUF(buf2), next->data->min_key); + bch2_bpos_to_text(&buf1, prev->data->max_key); + bch2_bpos_to_text(&buf2, next->data->min_key); bch_err(c, "btree topology error in btree merge:\n" " prev ends at %s\n" " next starts at %s", - buf1, buf2); + buf1.buf, buf2.buf); + printbuf_exit(&buf1); + printbuf_exit(&buf2); bch2_topology_error(c); ret = -EIO; goto err; diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index e9e10df8ee95..4b0e00f32a96 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -831,11 +831,12 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, const char *invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type); if (invalid) { - char buf[200]; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k)); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); bch2_fs_fatal_error(c, "invalid bkey %s on insert from %s -> %ps: %s\n", - buf, trans->fn, (void *) i->ip_allocated, invalid); + buf.buf, trans->fn, (void *) i->ip_allocated, invalid); + printbuf_exit(&buf); return -EINVAL; } btree_insert_entry_checks(trans, i); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 136a5727ea20..7d3636e20c81 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -376,22 +376,23 @@ static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k, { struct bch_fs_usage __percpu *fs_usage; int idx, ret = 0; - char buf[200]; + struct printbuf buf = PRINTBUF; percpu_down_read(&c->mark_lock); + buf.atomic++; idx = bch2_replicas_entry_idx(c, r); if (idx < 0 && (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || fsck_err(c, "no replicas entry\n" " while marking %s", - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))) { + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { percpu_up_read(&c->mark_lock); ret = bch2_mark_replicas(c, r); - if (ret) - return ret; - percpu_down_read(&c->mark_lock); + + if (ret) + goto err; idx = bch2_replicas_entry_idx(c, r); } if (idx < 0) { @@ -407,6 +408,7 @@ static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k, err: fsck_err: percpu_up_read(&c->mark_lock); + printbuf_exit(&buf); return ret; } @@ -678,7 +680,8 @@ static int check_bucket_ref(struct bch_fs *c, u16 bucket_sectors = !ptr->cached ? dirty_sectors : cached_sectors; - char buf[200]; + struct printbuf buf = PRINTBUF; + int ret = 0; if (gen_after(ptr->gen, b_gen)) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, @@ -687,8 +690,9 @@ static int check_bucket_ref(struct bch_fs *c, ptr->dev, bucket_nr, b_gen, bch2_data_types[bucket_data_type ?: ptr_data_type], ptr->gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + goto err; } if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { @@ -698,8 +702,10 @@ static int check_bucket_ref(struct bch_fs *c, ptr->dev, bucket_nr, b_gen, bch2_data_types[bucket_data_type ?: ptr_data_type], ptr->gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + goto err; } if (b_gen != ptr->gen && !ptr->cached) { @@ -710,12 +716,16 @@ static int check_bucket_ref(struct bch_fs *c, *bucket_gen(ca, bucket_nr), bch2_data_types[bucket_data_type ?: ptr_data_type], ptr->gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + goto err; } - if (b_gen != ptr->gen) - return 1; + if (b_gen != ptr->gen) { + ret = 1; + goto err; + } if (bucket_data_type && ptr_data_type && bucket_data_type != ptr_data_type) { @@ -725,8 +735,10 @@ static int check_bucket_ref(struct bch_fs *c, ptr->dev, bucket_nr, b_gen, bch2_data_types[bucket_data_type], bch2_data_types[ptr_data_type], - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + goto err; } if ((unsigned) (bucket_sectors + sectors) > U16_MAX) { @@ -736,11 +748,14 @@ static int check_bucket_ref(struct bch_fs *c, ptr->dev, bucket_nr, b_gen, bch2_data_types[bucket_data_type ?: ptr_data_type], bucket_sectors, sectors, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); - return -EIO; + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + goto err; } - - return 0; +err: + printbuf_exit(&buf); + return ret; } static int mark_stripe_bucket(struct btree_trans *trans, @@ -759,7 +774,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); struct bucket *g; struct bucket_mark new, old; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret = 0; BUG_ON(!(flags & BTREE_TRIGGER_GC)); @@ -767,6 +782,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, /* * XXX doesn't handle deletion */ percpu_down_read(&c->mark_lock); + buf.atomic++; g = PTR_GC_BUCKET(ca, ptr); if (g->mark.dirty_sectors || @@ -774,7 +790,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, bch2_fs_inconsistent(c, "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", ptr->dev, PTR_BUCKET_NR(ca, ptr), g->mark.gen, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); ret = -EINVAL; goto err; } @@ -799,8 +815,8 @@ static int mark_stripe_bucket(struct btree_trans *trans, bch2_dev_usage_update(c, ca, old, new, journal_seq, true); err: percpu_up_read(&c->mark_lock); - - return 0; + printbuf_exit(&buf); + return ret; } static int __mark_pointer(struct btree_trans *trans, @@ -987,10 +1003,11 @@ static int bch2_mark_extent(struct btree_trans *trans, if (r.e.nr_devs) { ret = update_replicas(c, k, &r.e, dirty_sectors, journal_seq, true); if (ret) { - char buf[200]; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf), c, k); - bch2_fs_fatal_error(c, "no replicas entry for %s", buf); + bch2_bkey_val_to_text(&buf, c, k); + bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf); + printbuf_exit(&buf); return ret; } } @@ -1019,13 +1036,16 @@ static int bch2_mark_stripe(struct btree_trans *trans, struct stripe *m = genradix_ptr(&c->stripes, idx); if (!m || (old_s && !m->alive)) { - char buf1[200], buf2[200]; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf1), c, old); - bch2_bkey_val_to_text(&PBUF(buf2), c, new); + bch2_bkey_val_to_text(&buf1, c, old); + bch2_bkey_val_to_text(&buf2, c, new); bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n" "old %s\n" - "new %s", idx, buf1, buf2); + "new %s", idx, buf1.buf, buf2.buf); + printbuf_exit(&buf2); + printbuf_exit(&buf1); bch2_inconsistent_error(c); return -1; } @@ -1090,10 +1110,11 @@ static int bch2_mark_stripe(struct btree_trans *trans, ((s64) m->sectors * m->nr_redundant), journal_seq, gc); if (ret) { - char buf[200]; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf), c, new); - bch2_fs_fatal_error(c, "no replicas entry for %s", buf); + bch2_bkey_val_to_text(&buf, c, new); + bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf); + printbuf_exit(&buf); return ret; } } @@ -1174,7 +1195,7 @@ static s64 __bch2_mark_reflink_p(struct btree_trans *trans, int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; u64 next_idx = end; s64 ret = 0; - char buf[200]; + struct printbuf buf = PRINTBUF; if (r_idx >= c->reflink_gc_nr) goto not_found; @@ -1193,7 +1214,7 @@ not_found: if (fsck_err(c, "pointer to missing indirect extent\n" " %s\n" " missing range %llu-%llu", - (bch2_bkey_val_to_text(&PBUF(buf), c, p.s_c), buf), + (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), *idx, next_idx)) { struct bkey_i_error new; @@ -1207,6 +1228,7 @@ not_found: *idx = next_idx; fsck_err: + printbuf_exit(&buf); return ret; } @@ -1289,7 +1311,7 @@ void fs_usage_apply_warn(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_insert_entry *i; - char buf[200]; + struct printbuf buf = PRINTBUF; bch_err(c, "disk usage increased %lli more than %u sectors reserved", should_not_have_added, disk_res_sectors); @@ -1298,13 +1320,17 @@ void fs_usage_apply_warn(struct btree_trans *trans, struct bkey_s_c old = { &i->old_k, i->old_v }; pr_err("while inserting"); - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k)); - pr_err(" %s", buf); + printbuf_reset(&buf); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); + pr_err(" %s", buf.buf); pr_err("overlapping with"); - bch2_bkey_val_to_text(&PBUF(buf), c, old); - pr_err(" %s", buf); + printbuf_reset(&buf); + bch2_bkey_val_to_text(&buf, c, old); + pr_err(" %s", buf.buf); } + __WARN(); + printbuf_exit(&buf); } int bch2_trans_fs_usage_apply(struct btree_trans *trans, @@ -1744,7 +1770,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, struct bkey_i *n; __le64 *refcount; int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink, POS(0, *idx), @@ -1764,19 +1790,19 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, refcount = bkey_refcount(n); if (!refcount) { - bch2_bkey_val_to_text(&PBUF(buf), c, p.s_c); + bch2_bkey_val_to_text(&buf, c, p.s_c); bch2_fs_inconsistent(c, "nonexistent indirect extent at %llu while marking\n %s", - *idx, buf); + *idx, buf.buf); ret = -EIO; goto err; } if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) { - bch2_bkey_val_to_text(&PBUF(buf), c, p.s_c); + bch2_bkey_val_to_text(&buf, c, p.s_c); bch2_fs_inconsistent(c, "indirect extent refcount underflow at %llu while marking\n %s", - *idx, buf); + *idx, buf.buf); ret = -EIO; goto err; } @@ -1811,6 +1837,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, *idx = k.k->p.offset; err: bch2_trans_iter_exit(trans, &iter); + printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c index da91c95e3ffc..342797303415 100644 --- a/fs/bcachefs/clock.c +++ b/fs/bcachefs/clock.c @@ -157,6 +157,7 @@ void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock) unsigned long now; unsigned i; + out->atomic++; spin_lock(&clock->timer_lock); now = atomic64_read(&clock->now); @@ -165,6 +166,7 @@ void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock) clock->timers.data[i]->fn, clock->timers.data[i]->expire - now); spin_unlock(&clock->timer_lock); + --out->atomic; } void bch2_io_clock_exit(struct io_clock *clock) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 02a5ef5ecb3e..c3bfa7f1d77d 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -169,10 +169,11 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) failed |= bch2_btree_verify_replica(c, b, p); if (failed) { - char buf[200]; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(&b->key)); - bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf.buf); + printbuf_exit(&buf); } out: mutex_unlock(&c->verify_lock); @@ -188,8 +189,7 @@ struct dump_iter { struct bch_fs *c; enum btree_id id; - char buf[1 << 12]; - size_t bytes; /* what's currently in buf */ + struct printbuf buf; char __user *ubuf; /* destination user buffer */ size_t size; /* size of requested read */ @@ -198,9 +198,9 @@ struct dump_iter { static int flush_buf(struct dump_iter *i) { - if (i->bytes) { - size_t bytes = min(i->bytes, i->size); - int err = copy_to_user(i->ubuf, i->buf, bytes); + if (i->buf.pos) { + size_t bytes = min_t(size_t, i->buf.pos, i->size); + int err = copy_to_user(i->ubuf, i->buf.buf, bytes); if (err) return err; @@ -208,8 +208,8 @@ static int flush_buf(struct dump_iter *i) i->ret += bytes; i->ubuf += bytes; i->size -= bytes; - i->bytes -= bytes; - memmove(i->buf, i->buf + bytes, i->bytes); + i->buf.pos -= bytes; + memmove(i->buf.buf, i->buf.buf + bytes, i->buf.pos); } return 0; @@ -228,13 +228,17 @@ static int bch2_dump_open(struct inode *inode, struct file *file) i->from = POS_MIN; i->c = container_of(bd, struct bch_fs, btree_debug[bd->id]); i->id = bd->id; + i->buf = PRINTBUF; return 0; } static int bch2_dump_release(struct inode *inode, struct file *file) { - kfree(file->private_data); + struct dump_iter *i = file->private_data; + + printbuf_exit(&i->buf); + kfree(i); return 0; } @@ -266,11 +270,8 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, k = bch2_btree_iter_peek(&iter); while (k.k && !(err = bkey_err(k))) { - bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k); - i->bytes = strlen(i->buf); - BUG_ON(i->bytes >= sizeof(i->buf)); - i->buf[i->bytes] = '\n'; - i->bytes++; + bch2_bkey_val_to_text(&i->buf, i->c, k); + pr_char(&i->buf, '\n'); k = bch2_btree_iter_next(&iter); i->from = iter.pos; @@ -319,8 +320,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, bch2_trans_init(&trans, i->c, 0, 0); for_each_btree_node(&trans, iter, i->id, i->from, 0, b, err) { - bch2_btree_node_to_text(&PBUF(i->buf), i->c, b); - i->bytes = strlen(i->buf); + bch2_btree_node_to_text(&i->buf, i->c, b); err = flush_buf(i); if (err) break; @@ -384,16 +384,14 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, bch2_btree_node_iter_peek(&l->iter, l->b); if (l->b != prev_node) { - bch2_btree_node_to_text(&PBUF(i->buf), i->c, l->b); - i->bytes = strlen(i->buf); + bch2_btree_node_to_text(&i->buf, i->c, l->b); err = flush_buf(i); if (err) break; } prev_node = l->b; - bch2_bfloat_to_text(&PBUF(i->buf), l->b, _k); - i->bytes = strlen(i->buf); + bch2_bfloat_to_text(&i->buf, l->b, _k); err = flush_buf(i); if (err) break; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 86421f65d139..b220b523d856 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -286,14 +286,15 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) struct bch_csum got = ec_block_checksum(buf, i, offset); if (bch2_crc_cmp(want, got)) { - char buf2[200]; + struct printbuf buf2 = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&buf->key.k_i)); + bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&buf->key.k_i)); bch_err_ratelimited(c, "stripe checksum error for %ps at %u:%u: csum type %u, expected %llx got %llx\n%s", (void *) _RET_IP_, i, j, v->csum_type, - want.lo, got.lo, buf2); + want.lo, got.lo, buf2.buf); + printbuf_exit(&buf2); clear_bit(i, buf->valid); break; } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 9e8b085e36d7..2aaeee585157 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1676,7 +1676,8 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root) { struct bch_fs *c = root->d_sb->s_fs_info; enum bch_opt_id i; - char buf[512]; + struct printbuf buf = PRINTBUF; + int ret = 0; for (i = 0; i < bch2_opts_nr; i++) { const struct bch_option *opt = &bch2_opt_table[i]; @@ -1688,13 +1689,17 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root) if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) continue; - bch2_opt_to_text(&PBUF(buf), c, opt, v, + printbuf_reset(&buf); + bch2_opt_to_text(&buf, c, opt, v, OPT_SHOW_MOUNT_STYLE); seq_putc(seq, ','); - seq_puts(seq, buf); + seq_puts(seq, buf.buf); } - return 0; + if (buf.allocation_failure) + ret = -ENOMEM; + printbuf_exit(&buf); + return ret; } static void bch2_put_super(struct super_block *sb) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index ced4d671eb8d..8783b950055e 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -698,15 +698,16 @@ static int check_key_has_snapshot(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret = 0; if (mustfix_fsck_err_on(!snapshot_t(c, k.k->p.snapshot)->equiv, c, "key in missing snapshot: %s", - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) - return bch2_btree_delete_at(trans, iter, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) + ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1; fsck_err: + printbuf_exit(&buf); return ret; } @@ -746,7 +747,7 @@ static int hash_check_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_iter iter = { NULL }; - char buf[200]; + struct printbuf buf = PRINTBUF; struct bkey_s_c k; u64 hash; int ret = 0; @@ -770,8 +771,9 @@ static int hash_check_key(struct btree_trans *trans, if (fsck_err_on(k.k->type == desc.key_type && !desc.cmp_bkey(k, hash_k), c, "duplicate hash table keys:\n%s", - (bch2_bkey_val_to_text(&PBUF(buf), c, - hash_k), buf))) { + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, hash_k), + buf.buf))) { ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1; break; } @@ -782,13 +784,16 @@ static int hash_check_key(struct btree_trans *trans, } } +out: bch2_trans_iter_exit(trans, &iter); + printbuf_exit(&buf); return ret; bad_hash: if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, " "hashed to %llu\n%s", desc.btree_id, hash_k.k->p.inode, hash_k.k->p.offset, hash, - (bch2_bkey_val_to_text(&PBUF(buf), c, hash_k), buf)) == FSCK_ERR_IGNORE) + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf)) == FSCK_ERR_IGNORE) return 0; ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); @@ -796,9 +801,9 @@ bad_hash: bch_err(c, "hash_redo_key err %i", ret); return ret; } - return -EINTR; + ret = -EINTR; fsck_err: - return ret; + goto out; } static int check_inode(struct btree_trans *trans, @@ -1166,32 +1171,34 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, struct bch_fs *c = trans->c; struct bkey_s_c k; struct inode_walker_entry *i; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret = 0; k = bch2_btree_iter_peek(iter); if (!k.k) - return 0; + goto out; ret = bkey_err(k); if (ret) - return ret; + goto err; ret = check_key_has_snapshot(trans, iter, k); - if (ret) - return ret < 0 ? ret : 0; + if (ret) { + ret = ret < 0 ? ret : 0; + goto out; + } ret = snapshots_seen_update(c, s, k.k->p); if (ret) - return ret; + goto err; if (k.k->type == KEY_TYPE_whiteout) - return 0; + goto out; if (inode->cur_inum != k.k->p.inode) { ret = check_i_sectors(trans, inode); if (ret) - return ret; + goto err; } #if 0 if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) { @@ -1201,22 +1208,29 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k)); bch2_bkey_val_to_text(&PBUF(buf2), c, k); - if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) - return fix_overlapping_extent(trans, k, prev.k->k.p) ?: -EINTR; + if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) { + ret = fix_overlapping_extent(trans, k, prev.k->k.p) ?: -EINTR; + goto out; + } } #endif ret = __walk_inode(trans, inode, k.k->p); if (ret < 0) - return ret; + goto err; if (fsck_err_on(ret == INT_MAX, c, "extent in missing inode:\n %s", - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) - return bch2_btree_delete_at(trans, iter, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + goto out; + } - if (ret == INT_MAX) - return 0; + if (ret == INT_MAX) { + ret = 0; + goto out; + } i = inode->d + ret; ret = 0; @@ -1225,9 +1239,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, !S_ISLNK(i->inode.bi_mode), c, "extent in non regular inode mode %o:\n %s", i->inode.bi_mode, - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) - return bch2_btree_delete_at(trans, iter, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + goto out; + } if (!bch2_snapshot_internal_node(c, k.k->p.snapshot)) { for_each_visible_inode(c, s, inode, k.k->p.snapshot, i) { @@ -1237,11 +1254,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, "extent type %u offset %llu past end of inode %llu, i_size %llu", k.k->type, k.k->p.offset, k.k->p.inode, i->inode.bi_size)) { bch2_fs_lazy_rw(c); - return bch2_btree_delete_range_trans(trans, BTREE_ID_extents, + ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, SPOS(k.k->p.inode, round_up(i->inode.bi_size, block_bytes(c)) >> 9, k.k->p.snapshot), POS(k.k->p.inode, U64_MAX), 0, NULL) ?: -EINTR; + goto out; } } } @@ -1253,7 +1271,10 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, bch2_bkey_buf_reassemble(&prev, c, k); #endif +out: +err: fsck_err: + printbuf_exit(&buf); return ret; } @@ -1351,7 +1372,7 @@ static int check_dirent_target(struct btree_trans *trans, struct bch_fs *c = trans->c; struct bkey_i_dirent *n; bool backpointer_exists = true; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret = 0; if (!target->bi_dir && @@ -1377,9 +1398,7 @@ static int check_dirent_target(struct btree_trans *trans, "directory %llu with multiple links", target->bi_inum)) { ret = __remove_dirent(trans, d.k->p); - if (ret) - goto err; - return 0; + goto out; } if (fsck_err_on(backpointer_exists && @@ -1416,18 +1435,19 @@ static int check_dirent_target(struct btree_trans *trans, "incorrect d_type: got %s, should be %s:\n%s", bch2_d_type_str(d.v->d_type), bch2_d_type_str(inode_d_type(target)), - (bch2_bkey_val_to_text(&PBUF(buf), c, d.s_c), buf))) { + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); ret = PTR_ERR_OR_ZERO(n); if (ret) - return ret; + goto err; bkey_reassemble(&n->k_i, d.s_c); n->v.d_type = inode_d_type(target); ret = bch2_trans_update(trans, iter, &n->k_i, 0); if (ret) - return ret; + goto err; d = dirent_i_to_s_c(n); } @@ -1441,19 +1461,21 @@ static int check_dirent_target(struct btree_trans *trans, n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); ret = PTR_ERR_OR_ZERO(n); if (ret) - return ret; + goto err; bkey_reassemble(&n->k_i, d.s_c); n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol); ret = bch2_trans_update(trans, iter, &n->k_i, 0); if (ret) - return ret; + goto err; d = dirent_i_to_s_c(n); } +out: err: fsck_err: + printbuf_exit(&buf); return ret; } @@ -1467,46 +1489,53 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k; struct bkey_s_c_dirent d; struct inode_walker_entry *i; - char buf[200]; - int ret; + struct printbuf buf = PRINTBUF; + int ret = 0; k = bch2_btree_iter_peek(iter); if (!k.k) - return 0; + goto out; ret = bkey_err(k); if (ret) - return ret; + goto err; ret = check_key_has_snapshot(trans, iter, k); - if (ret) - return ret < 0 ? ret : 0; + if (ret) { + ret = ret < 0 ? ret : 0; + goto out; + } ret = snapshots_seen_update(c, s, k.k->p); if (ret) - return ret; + goto err; if (k.k->type == KEY_TYPE_whiteout) - return 0; + goto out; if (dir->cur_inum != k.k->p.inode) { ret = check_subdir_count(trans, dir); if (ret) - return ret; + goto err; } ret = __walk_inode(trans, dir, k.k->p); if (ret < 0) - return ret; + goto err; if (fsck_err_on(ret == INT_MAX, c, "dirent in nonexisting directory:\n%s", - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) - return bch2_btree_delete_at(trans, iter, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + goto out; + } - if (ret == INT_MAX) - return 0; + if (ret == INT_MAX) { + ret = 0; + goto out; + } i = dir->d + ret; ret = 0; @@ -1514,8 +1543,11 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), c, "dirent in non directory inode type %s:\n%s", bch2_d_type_str(inode_d_type(&i->inode)), - (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) - return bch2_btree_delete_at(trans, iter, 0); + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = bch2_btree_delete_at(trans, iter, 0); + goto out; + } if (dir->first_this_inode) *hash_info = bch2_hash_info_init(c, &dir->d[0].inode); @@ -1523,12 +1555,15 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); if (ret < 0) - return ret; - if (ret) /* dirent has been deleted */ - return 0; + goto err; + if (ret) { + /* dirent has been deleted */ + ret = 0; + goto out; + } if (k.k->type != KEY_TYPE_dirent) - return 0; + goto out; d = bkey_s_c_to_dirent(k); @@ -1541,24 +1576,27 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ret = __subvol_lookup(trans, target_subvol, &target_snapshot, &target_inum); if (ret && ret != -ENOENT) - return ret; + goto err; if (fsck_err_on(ret, c, "dirent points to missing subvolume %llu", - le64_to_cpu(d.v->d_child_subvol))) - return __remove_dirent(trans, d.k->p); + le64_to_cpu(d.v->d_child_subvol))) { + ret = __remove_dirent(trans, d.k->p); + goto err; + } ret = __lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); if (ret && ret != -ENOENT) - return ret; + goto err; if (fsck_err_on(ret, c, "subvolume %u points to missing subvolume root %llu", target_subvol, target_inum)) { bch_err(c, "repair not implemented yet"); - return -EINVAL; + ret = -EINVAL; + goto err; } if (fsck_err_on(subvol_root.bi_subvol != target_subvol, c, @@ -1568,32 +1606,33 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, subvol_root.bi_subvol = target_subvol; ret = __write_inode(trans, &subvol_root, target_snapshot); if (ret) - return ret; + goto err; } ret = check_dirent_target(trans, iter, d, &subvol_root, target_snapshot); if (ret) - return ret; + goto err; } else { ret = __get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); if (ret) - return ret; + goto err; if (fsck_err_on(!target->nr, c, "dirent points to missing inode:\n%s", - (bch2_bkey_val_to_text(&PBUF(buf), c, - k), buf))) { + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), + buf.buf))) { ret = __remove_dirent(trans, d.k->p); if (ret) - return ret; + goto err; } for (i = target->d; i < target->d + target->nr; i++) { ret = check_dirent_target(trans, iter, d, &i->inode, i->snapshot); if (ret) - return ret; + goto err; } } @@ -1601,7 +1640,10 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) i->count++; +out: +err: fsck_err: + printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 4b9ff76dd19f..c9204cab055d 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -2057,11 +2057,11 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev); struct btree_iter iter; - char buf[200]; + struct printbuf buf = PRINTBUF; int ret; - bch2_bkey_val_to_text(&PBUF(buf), c, k); - bch2_fs_inconsistent(c, "Attempting to read from stale dirty pointer: %s", buf); + bch2_bkey_val_to_text(&buf, c, k); + bch2_fs_inconsistent(c, "Attempting to read from stale dirty pointer: %s", buf.buf); bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(ptr.dev, PTR_BUCKET_NR(ca, &ptr)), @@ -2069,12 +2069,14 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); if (ret) - return; + goto out; - bch2_bkey_val_to_text(&PBUF(buf), c, k); - bch_err(c, "%s", buf); + bch2_bkey_val_to_text(&buf, c, k); + bch_err(c, "%s", buf.buf); bch_err(c, "memory gen: %u", *bucket_gen(ca, iter.pos.offset)); bch2_trans_iter_exit(trans, &iter); +out: + printbuf_exit(&buf); } int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 279e960f2307..a579e6483d1e 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -414,18 +414,18 @@ unlock: !can_discard && j->reservations.idx == j->reservations.unwritten_idx && (flags & JOURNAL_RES_GET_RESERVED)) { - char *journal_debug_buf = kmalloc(4096, GFP_ATOMIC); + struct printbuf buf = PRINTBUF; bch_err(c, "Journal stuck! Hava a pre-reservation but journal full"); - if (journal_debug_buf) { - bch2_journal_debug_to_text(&_PBUF(journal_debug_buf, 4096), j); - bch_err(c, "%s", journal_debug_buf); - bch2_journal_pins_to_text(&_PBUF(journal_debug_buf, 4096), j); - bch_err(c, "Journal pins:\n%s", journal_debug_buf); - kfree(journal_debug_buf); - } + bch2_journal_debug_to_text(&buf, j); + bch_err(c, "%s", buf.buf); + + printbuf_reset(&buf); + bch2_journal_pins_to_text(&buf, j); + bch_err(c, "Journal pins:\n%s", buf.buf); + printbuf_exit(&buf); bch2_fatal_error(c); dump_stack(); } @@ -1186,6 +1186,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) unsigned long now = jiffies; unsigned i; + out->atomic++; + rcu_read_lock(); s = READ_ONCE(j->reservations); @@ -1270,6 +1272,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) } rcu_read_unlock(); + + --out->atomic; } void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) @@ -1286,6 +1290,8 @@ void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j) u64 i; spin_lock(&j->lock); + out->atomic++; + fifo_for_each_entry_ptr(pin_list, &j->pin, i) { pr_buf(out, "%llu: count %u\n", i, atomic_read(&pin_list->count)); @@ -1305,5 +1311,7 @@ void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j) pr_buf(out, "\t%px %ps\n", pin, pin->flush); } + + --out->atomic; spin_unlock(&j->lock); } diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 302af332b632..bbec4d85b6bc 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -251,14 +251,15 @@ static int journal_validate_key(struct bch_fs *c, const char *where, invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k), __btree_node_type(level, btree_id)); if (invalid) { - char buf[160]; + struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k)); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); mustfix_fsck_err(c, "invalid %s in %s entry offset %zi/%u: %s\n%s", type, where, (u64 *) k - entry->_data, le16_to_cpu(entry->u64s), - invalid, buf); + invalid, buf.buf); + printbuf_exit(&buf); le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); @@ -995,6 +996,7 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, struct journal_replay *i, *t; struct bch_dev *ca; unsigned iter; + struct printbuf buf = PRINTBUF; size_t keys = 0, entries = 0; bool degraded = false; u64 seq, last_seq = 0; @@ -1053,7 +1055,8 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, if (!last_seq) { fsck_err(c, "journal read done, but no entries found after dropping non-flushes"); - return -1; + ret = -1; + goto err; } /* Drop blacklisted entries and entries older than last_seq: */ @@ -1085,7 +1088,7 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, while (seq < le64_to_cpu(i->j.seq)) { u64 missing_start, missing_end; - char buf1[200], buf2[200]; + struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; while (seq < le64_to_cpu(i->j.seq) && bch2_journal_seq_is_blacklisted(c, seq, false)) @@ -1101,14 +1104,13 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, seq++; if (i->list.prev != list) { - struct printbuf out = PBUF(buf1); struct journal_replay *p = list_prev_entry(i, list); - bch2_journal_ptrs_to_text(&out, c, p); - pr_buf(&out, " size %zu", vstruct_sectors(&p->j, c->block_bits)); + bch2_journal_ptrs_to_text(&buf1, c, p); + pr_buf(&buf1, " size %zu", vstruct_sectors(&p->j, c->block_bits)); } else - sprintf(buf1, "(none)"); - bch2_journal_ptrs_to_text(&PBUF(buf2), c, i); + pr_buf(&buf1, "(none)"); + bch2_journal_ptrs_to_text(&buf2, c, i); missing_end = seq - 1; fsck_err(c, "journal entries %llu-%llu missing! (replaying %llu-%llu)\n" @@ -1116,7 +1118,10 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, " next at %s", missing_start, missing_end, last_seq, *blacklist_seq - 1, - buf1, buf2); + buf1.buf, buf2.buf); + + printbuf_exit(&buf1); + printbuf_exit(&buf2); } seq++; @@ -1130,14 +1135,13 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, .e.nr_required = 1, }; unsigned ptr; - char buf[80]; if (i->ignore) continue; ret = jset_validate_entries(c, &i->j, READ); if (ret) - goto fsck_err; + goto err; for (ptr = 0; ptr < i->nr_ptrs; ptr++) replicas.e.devs[replicas.e.nr_devs++] = i->ptrs[ptr].dev; @@ -1149,15 +1153,17 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, * the devices - this is wrong: */ + printbuf_reset(&buf); + bch2_replicas_entry_to_text(&buf, &replicas.e); + if (!degraded && (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || fsck_err_on(!bch2_replicas_marked(c, &replicas.e), c, "superblock not marked as containing replicas %s", - (bch2_replicas_entry_to_text(&PBUF(buf), - &replicas.e), buf)))) { + buf.buf))) { ret = bch2_mark_replicas(c, &replicas.e); if (ret) - return ret; + goto err; } for_each_jset_key(k, _n, entry, &i->j) @@ -1171,7 +1177,9 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list, if (*start_seq != *blacklist_seq) bch_info(c, "dropped unflushed entries %llu-%llu", *blacklist_seq, *start_seq - 1); +err: fsck_err: + printbuf_exit(&buf); return ret; } @@ -1481,7 +1489,7 @@ void bch2_journal_write(struct closure *cl) struct jset_entry *start, *end; struct jset *jset; struct bio *bio; - char *journal_debug_buf = NULL; + struct printbuf journal_debug_buf = PRINTBUF; bool validate_before_checksum = false; unsigned i, sectors, bytes, u64s, nr_rw_members = 0; int ret; @@ -1586,11 +1594,8 @@ retry_alloc: goto retry_alloc; } - if (ret) { - journal_debug_buf = kmalloc(4096, GFP_ATOMIC); - if (journal_debug_buf) - __bch2_journal_debug_to_text(&_PBUF(journal_debug_buf, 4096), j); - } + if (ret) + __bch2_journal_debug_to_text(&journal_debug_buf, j); /* * write is allocated, no longer need to account for it in @@ -1607,8 +1612,8 @@ retry_alloc: if (ret) { bch_err(c, "Unable to allocate journal write:\n%s", - journal_debug_buf); - kfree(journal_debug_buf); + journal_debug_buf.buf); + printbuf_exit(&journal_debug_buf); bch2_fatal_error(c); continue_at(cl, journal_write_done, c->io_complete_wq); return; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index c15b18831512..2d5382a83003 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -216,14 +216,11 @@ void bch2_journal_space_available(struct journal *j) if (!clean_ondisk && j->reservations.idx == j->reservations.unwritten_idx) { - char *buf = kmalloc(4096, GFP_ATOMIC); + struct printbuf buf = PRINTBUF; - bch_err(c, "journal stuck"); - if (buf) { - __bch2_journal_debug_to_text(&_PBUF(buf, 4096), j); - pr_err("\n%s", buf); - kfree(buf); - } + __bch2_journal_debug_to_text(&buf, j); + bch_err(c, "journal stuck\n%s", buf.buf); + printbuf_exit(&buf); bch2_fatal_error(c); ret = cur_entry_journal_stuck; diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index fe0a1dbac199..babf98894e87 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -257,35 +257,47 @@ void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c) { struct bch_fs_rebalance *r = &c->rebalance; struct rebalance_work w = rebalance_work(c); - char h1[21], h2[21]; - bch2_hprint(&PBUF(h1), w.dev_most_full_work << 9); - bch2_hprint(&PBUF(h2), w.dev_most_full_capacity << 9); - pr_buf(out, "fullest_dev (%i):\t%s/%s\n", - w.dev_most_full_idx, h1, h2); + out->tabstops[0] = 20; - bch2_hprint(&PBUF(h1), w.total_work << 9); - bch2_hprint(&PBUF(h2), c->capacity << 9); - pr_buf(out, "total work:\t\t%s/%s\n", h1, h2); + pr_buf(out, "fullest_dev (%i):", w.dev_most_full_idx); + pr_tab(out); - pr_buf(out, "rate:\t\t\t%u\n", r->pd.rate.rate); + bch2_hprint(out, w.dev_most_full_work << 9); + pr_buf(out, "/"); + bch2_hprint(out, w.dev_most_full_capacity << 9); + pr_newline(out); + + pr_buf(out, "total work:"); + pr_tab(out); + + bch2_hprint(out, w.total_work << 9); + pr_buf(out, "/"); + bch2_hprint(out, c->capacity << 9); + pr_newline(out); + + pr_buf(out, "rate:"); + pr_tab(out); + pr_buf(out, "%u", r->pd.rate.rate); + pr_newline(out); switch (r->state) { case REBALANCE_WAITING: - pr_buf(out, "waiting\n"); + pr_buf(out, "waiting"); break; case REBALANCE_THROTTLED: - bch2_hprint(&PBUF(h1), + pr_buf(out, "throttled for %lu sec or ", + (r->throttled_until_cputime - jiffies) / HZ); + bch2_hprint(out, (r->throttled_until_iotime - atomic64_read(&c->io_clock[WRITE].now)) << 9); - pr_buf(out, "throttled for %lu sec or %s io\n", - (r->throttled_until_cputime - jiffies) / HZ, - h1); + pr_buf(out, " io"); break; case REBALANCE_RUNNING: - pr_buf(out, "running\n"); + pr_buf(out, "running"); break; } + pr_newline(out); } void bch2_rebalance_stop(struct bch_fs *c) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index ae9ae1c7138c..6c4ffc5abdc5 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -760,6 +760,8 @@ static int verify_superblock_clean(struct bch_fs *c, { unsigned i; struct bch_sb_field_clean *clean = *cleanp; + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; int ret = 0; if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c, @@ -772,7 +774,6 @@ static int verify_superblock_clean(struct bch_fs *c, } for (i = 0; i < BTREE_ID_NR; i++) { - char buf1[200], buf2[200]; struct bkey_i *k1, *k2; unsigned l1 = 0, l2 = 0; @@ -782,6 +783,19 @@ static int verify_superblock_clean(struct bch_fs *c, if (!k1 && !k2) continue; + printbuf_reset(&buf1); + printbuf_reset(&buf2); + + if (k1) + bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(k1)); + else + pr_buf(&buf1, "(none)"); + + if (k2) + bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(k2)); + else + pr_buf(&buf2, "(none)"); + mustfix_fsck_err_on(!k1 || !k2 || IS_ERR(k1) || IS_ERR(k2) || @@ -791,10 +805,12 @@ static int verify_superblock_clean(struct bch_fs *c, "superblock btree root %u doesn't match journal after clean shutdown\n" "sb: l=%u %s\n" "journal: l=%u %s\n", i, - l1, (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(k1)), buf1), - l2, (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(k2)), buf2)); + l1, buf1.buf, + l2, buf2.buf); } fsck_err: + printbuf_exit(&buf2); + printbuf_exit(&buf1); return ret; } diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 7cc2414893fc..e26642c01fd7 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -997,11 +997,12 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, if (dflags & ~flags) { if (print) { - char buf[100]; + struct printbuf buf = PRINTBUF; - bch2_replicas_entry_to_text(&PBUF(buf), e); + bch2_replicas_entry_to_text(&buf, e); bch_err(c, "insufficient devices online (%u) for replicas entry %s", - nr_online, buf); + nr_online, buf.buf); + printbuf_exit(&buf); } ret = false; break; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 47eeb48c8c60..c616ce5ed194 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -572,16 +572,10 @@ int bch2_read_super(const char *path, struct bch_opts *opts, { u64 offset = opt_get(*opts, sb); struct bch_sb_layout layout; - char *_err; - struct printbuf err; + struct printbuf err = PRINTBUF; __le64 *i; int ret; - _err = kmalloc(4096, GFP_KERNEL); - if (!_err) - return -ENOMEM; - err = _PBUF(_err, 4096); - pr_verbose_init(*opts, ""); memset(sb, 0, sizeof(*sb)); @@ -633,8 +627,8 @@ int bch2_read_super(const char *path, struct bch_opts *opts, goto err; printk(KERN_ERR "bcachefs (%s): error reading default superblock: %s", - path, _err); - err = _PBUF(_err, 4096); + path, err.buf); + printbuf_reset(&err); /* * Error reading primary superblock - read location of backup @@ -689,16 +683,16 @@ got_super: ret = bch2_sb_validate(sb, &err); if (ret) { printk(KERN_ERR "bcachefs (%s): error validating superblock: %s", - path, _err); + path, err.buf); goto err_no_print; } out: pr_verbose_init(*opts, "ret %i", ret); - kfree(_err); + printbuf_exit(&err); return ret; err: printk(KERN_ERR "bcachefs (%s): error reading superblock: %s", - path, _err); + path, err.buf); err_no_print: bch2_free_super(sb); goto out; @@ -768,6 +762,7 @@ int bch2_write_super(struct bch_fs *c) { struct closure *cl = &c->sb_write; struct bch_dev *ca; + struct printbuf err = PRINTBUF; unsigned i, sb = 0, nr_wrote; struct bch_devs_mask sb_written; bool wrote, can_mount_without_written, can_mount_with_written; @@ -795,18 +790,11 @@ int bch2_write_super(struct bch_fs *c) bch2_sb_from_fs(c, ca); for_each_online_member(ca, c, i) { - struct printbuf buf = { NULL, NULL }; + printbuf_reset(&err); - ret = bch2_sb_validate(&ca->disk_sb, &buf); + ret = bch2_sb_validate(&ca->disk_sb, &err); if (ret) { - char *_buf = kmalloc(4096, GFP_NOFS); - if (_buf) { - buf = _PBUF(_buf, 4096); - bch2_sb_validate(&ca->disk_sb, &buf); - } - - bch2_fs_inconsistent(c, "sb invalid before write: %s", _buf); - kfree(_buf); + bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf); percpu_ref_put(&ca->io_ref); goto out; } @@ -897,6 +885,7 @@ int bch2_write_super(struct bch_fs *c) out: /* Make new options visible after they're persistent: */ bch2_sb_update(c); + printbuf_exit(&err); return ret; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index d9b69c4244d5..27716d6e962d 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -870,12 +870,9 @@ noinline_for_stack static void print_mount_opts(struct bch_fs *c) { enum bch_opt_id i; - char buf[512]; - struct printbuf p = PBUF(buf); + struct printbuf p = PRINTBUF; bool first = true; - strcpy(buf, "(null)"); - if (c->opts.read_only) { pr_buf(&p, "ro"); first = false; @@ -897,7 +894,11 @@ static void print_mount_opts(struct bch_fs *c) bch2_opt_to_text(&p, c, opt, v, OPT_SHOW_MOUNT_STYLE); } - bch_info(c, "mounted with opts: %s", buf); + if (!p.pos) + pr_buf(&p, "(null)"); + + bch_info(c, "mounted with opts: %s", p.buf); + printbuf_exit(&p); } int bch2_fs_start(struct bch_fs *c) @@ -1561,11 +1562,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) data = bch2_dev_has_data(c, ca); if (data) { - char data_has_str[100]; + struct printbuf data_has = PRINTBUF; - bch2_flags_to_text(&PBUF(data_has_str), - bch2_data_types, data); - bch_err(ca, "Remove failed, still has data (%s)", data_has_str); + bch2_flags_to_text(&data_has, bch2_data_types, data); + bch_err(ca, "Remove failed, still has data (%s)", data_has.buf); + printbuf_exit(&data_has); ret = -EBUSY; goto err; } @@ -1614,16 +1615,9 @@ int bch2_dev_add(struct bch_fs *c, const char *path) struct bch_sb_field_members *mi; struct bch_member dev_mi; unsigned dev_idx, nr_devices, u64s; - char *_errbuf; - struct printbuf errbuf; + struct printbuf errbuf = PRINTBUF; int ret; - _errbuf = kmalloc(4096, GFP_KERNEL); - if (!_errbuf) - return -ENOMEM; - - errbuf = _PBUF(_errbuf, 4096); - ret = bch2_read_super(path, &opts, &sb); if (ret) { bch_err(c, "device add error: error reading super: %i", ret); @@ -1741,7 +1735,7 @@ err: if (ca) bch2_dev_free(ca); bch2_free_super(&sb); - kfree(_errbuf); + printbuf_exit(&errbuf); return ret; err_late: up_write(&c->state_lock); @@ -1906,8 +1900,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, struct bch_sb_field_members *mi; unsigned i, best_sb = 0; const char *err; - char *_errbuf = NULL; - struct printbuf errbuf; + struct printbuf errbuf = PRINTBUF; int ret = 0; if (!try_module_get(THIS_MODULE)) @@ -1920,14 +1913,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, goto err; } - _errbuf = kmalloc(4096, GFP_KERNEL); - if (!_errbuf) { - ret = -ENOMEM; - goto err; - } - - errbuf = _PBUF(_errbuf, 4096); - sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL); if (!sb) { ret = -ENOMEM; @@ -1991,7 +1976,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, } out: kfree(sb); - kfree(_errbuf); + printbuf_exit(&errbuf); module_put(THIS_MODULE); pr_verbose_init(opts, "ret %i", PTR_ERR_OR_ZERO(c)); return c; diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 1a3068f658a1..ce32b9068518 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -46,8 +46,28 @@ struct sysfs_ops type ## _sysfs_ops = { \ } #define SHOW(fn) \ +static ssize_t fn ## _to_text(struct printbuf *, \ + struct kobject *, struct attribute *);\ + \ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ char *buf) \ +{ \ + struct printbuf out = PRINTBUF; \ + ssize_t ret = fn ## _to_text(&out, kobj, attr); \ + \ + if (!ret && out.allocation_failure) \ + ret = -ENOMEM; \ + \ + if (!ret) { \ + ret = min_t(size_t, out.pos, PAGE_SIZE - 1); \ + memcpy(buf, out.buf, ret); \ + } \ + printbuf_exit(&out); \ + return ret; \ +} \ + \ +static ssize_t fn ## _to_text(struct printbuf *out, struct kobject *kobj,\ + struct attribute *attr) #define STORE(fn) \ static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\ @@ -64,22 +84,19 @@ static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\ #define sysfs_printf(file, fmt, ...) \ do { \ if (attr == &sysfs_ ## file) \ - return scnprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__);\ + pr_buf(out, fmt "\n", __VA_ARGS__); \ } while (0) #define sysfs_print(file, var) \ do { \ if (attr == &sysfs_ ## file) \ - return snprint(buf, PAGE_SIZE, var); \ + snprint(out, var); \ } while (0) #define sysfs_hprint(file, val) \ do { \ - if (attr == &sysfs_ ## file) { \ - bch2_hprint(&out, val); \ - pr_buf(&out, "\n"); \ - return out.pos - buf; \ - } \ + if (attr == &sysfs_ ## file) \ + bch2_hprint(out, val); \ } while (0) #define var_printf(_var, fmt) sysfs_printf(_var, fmt, var(_var)) @@ -348,7 +365,6 @@ static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c) SHOW(bch2_fs) { struct bch_fs *c = container_of(kobj, struct bch_fs, kobj); - struct printbuf out = _PBUF(buf, PAGE_SIZE); sysfs_print(minor, c->minor); sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b); @@ -365,10 +381,8 @@ SHOW(bch2_fs) sysfs_printf(btree_gc_periodic, "%u", (int) c->btree_gc_periodic); - if (attr == &sysfs_gc_gens_pos) { - bch2_gc_gens_pos_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_gc_gens_pos) + bch2_gc_gens_pos_to_text(out, c); sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); @@ -378,83 +392,54 @@ SHOW(bch2_fs) max(0LL, c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now)) << 9); - if (attr == &sysfs_rebalance_work) { - bch2_rebalance_work_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_rebalance_work) + bch2_rebalance_work_to_text(out, c); sysfs_print(promote_whole_extents, c->promote_whole_extents); /* Debugging: */ - if (attr == &sysfs_journal_debug) { - bch2_journal_debug_to_text(&out, &c->journal); - return out.pos - buf; - } + if (attr == &sysfs_journal_debug) + bch2_journal_debug_to_text(out, &c->journal); - if (attr == &sysfs_journal_pins) { - bch2_journal_pins_to_text(&out, &c->journal); - return out.pos - buf; - } + if (attr == &sysfs_journal_pins) + bch2_journal_pins_to_text(out, &c->journal); - if (attr == &sysfs_btree_updates) { - bch2_btree_updates_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_btree_updates) + bch2_btree_updates_to_text(out, c); - if (attr == &sysfs_dirty_btree_nodes) { - bch2_dirty_btree_nodes_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_dirty_btree_nodes) + bch2_dirty_btree_nodes_to_text(out, c); - if (attr == &sysfs_btree_cache) { - bch2_btree_cache_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_btree_cache) + bch2_btree_cache_to_text(out, c); - if (attr == &sysfs_btree_key_cache) { - bch2_btree_key_cache_to_text(&out, &c->btree_key_cache); - return out.pos - buf; - } + if (attr == &sysfs_btree_key_cache) + bch2_btree_key_cache_to_text(out, &c->btree_key_cache); - if (attr == &sysfs_btree_transactions) { - bch2_btree_trans_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_btree_transactions) + bch2_btree_trans_to_text(out, c); - if (attr == &sysfs_stripes_heap) { - bch2_stripes_heap_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_stripes_heap) + bch2_stripes_heap_to_text(out, c); - if (attr == &sysfs_open_buckets) { - bch2_open_buckets_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_open_buckets) + bch2_open_buckets_to_text(out, c); - if (attr == &sysfs_compression_stats) { - bch2_compression_stats_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_compression_stats) + bch2_compression_stats_to_text(out, c); - if (attr == &sysfs_new_stripes) { - bch2_new_stripes_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_new_stripes) + bch2_new_stripes_to_text(out, c); - if (attr == &sysfs_io_timers_read) { - bch2_io_timers_to_text(&out, &c->io_clock[READ]); - return out.pos - buf; - } - if (attr == &sysfs_io_timers_write) { - bch2_io_timers_to_text(&out, &c->io_clock[WRITE]); - return out.pos - buf; - } + if (attr == &sysfs_io_timers_read) + bch2_io_timers_to_text(out, &c->io_clock[READ]); - if (attr == &sysfs_data_jobs) { - data_progress_to_text(&out, c); - return out.pos - buf; - } + if (attr == &sysfs_io_timers_write) + bch2_io_timers_to_text(out, &c->io_clock[WRITE]); + + if (attr == &sysfs_data_jobs) + data_progress_to_text(out, c); return 0; } @@ -567,7 +552,7 @@ struct attribute *bch2_fs_files[] = { SHOW(bch2_fs_internal) { struct bch_fs *c = container_of(kobj, struct bch_fs, internal); - return bch2_fs_show(&c->kobj, attr, buf); + return bch2_fs_to_text(out, &c->kobj, attr); } STORE(bch2_fs_internal) @@ -617,16 +602,15 @@ struct attribute *bch2_fs_internal_files[] = { SHOW(bch2_fs_opts_dir) { - struct printbuf out = _PBUF(buf, PAGE_SIZE); struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir); const struct bch_option *opt = container_of(attr, struct bch_option, attr); int id = opt - bch2_opt_table; u64 v = bch2_opt_get_by_id(&c->opts, id); - bch2_opt_to_text(&out, c, opt, v, OPT_SHOW_FULL_LIST); - pr_buf(&out, "\n"); + bch2_opt_to_text(out, c, opt, v, OPT_SHOW_FULL_LIST); + pr_char(out, '\n'); - return out.pos - buf; + return 0; } STORE(bch2_fs_opts_dir) @@ -690,13 +674,10 @@ int bch2_opts_create_sysfs_files(struct kobject *kobj) SHOW(bch2_fs_time_stats) { struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats); - struct printbuf out = _PBUF(buf, PAGE_SIZE); #define x(name) \ - if (attr == &sysfs_time_stat_##name) { \ - bch2_time_stats_to_text(&out, &c->times[BCH_TIME_##name]);\ - return out.pos - buf; \ - } + if (attr == &sysfs_time_stat_##name) \ + bch2_time_stats_to_text(out, &c->times[BCH_TIME_##name]); BCH_TIME_STATS() #undef x @@ -812,7 +793,6 @@ SHOW(bch2_dev) { struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); struct bch_fs *c = ca->fs; - struct printbuf out = _PBUF(buf, PAGE_SIZE); sysfs_printf(uuid, "%pU\n", ca->uuid.b); @@ -825,58 +805,47 @@ SHOW(bch2_dev) if (attr == &sysfs_label) { if (ca->mi.group) { mutex_lock(&c->sb_lock); - bch2_disk_path_to_text(&out, c->disk_sb.sb, + bch2_disk_path_to_text(out, c->disk_sb.sb, ca->mi.group - 1); mutex_unlock(&c->sb_lock); } - pr_buf(&out, "\n"); - return out.pos - buf; + pr_char(out, '\n'); } if (attr == &sysfs_has_data) { - bch2_flags_to_text(&out, bch2_data_types, + bch2_flags_to_text(out, bch2_data_types, bch2_dev_has_data(c, ca)); - pr_buf(&out, "\n"); - return out.pos - buf; + pr_char(out, '\n'); } if (attr == &sysfs_state_rw) { - bch2_string_opt_to_text(&out, bch2_member_states, + bch2_string_opt_to_text(out, bch2_member_states, ca->mi.state); - pr_buf(&out, "\n"); - return out.pos - buf; + pr_char(out, '\n'); } - if (attr == &sysfs_iodone) { - dev_iodone_to_text(&out, ca); - return out.pos - buf; - } + if (attr == &sysfs_iodone) + dev_iodone_to_text(out, ca); sysfs_print(io_latency_read, atomic64_read(&ca->cur_latency[READ])); sysfs_print(io_latency_write, atomic64_read(&ca->cur_latency[WRITE])); - if (attr == &sysfs_io_latency_stats_read) { - bch2_time_stats_to_text(&out, &ca->io_latency[READ]); - return out.pos - buf; - } - if (attr == &sysfs_io_latency_stats_write) { - bch2_time_stats_to_text(&out, &ca->io_latency[WRITE]); - return out.pos - buf; - } + if (attr == &sysfs_io_latency_stats_read) + bch2_time_stats_to_text(out, &ca->io_latency[READ]); + + if (attr == &sysfs_io_latency_stats_write) + bch2_time_stats_to_text(out, &ca->io_latency[WRITE]); sysfs_printf(congested, "%u%%", clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX) * 100 / CONGESTED_MAX); - if (attr == &sysfs_reserve_stats) { - reserve_stats_to_text(&out, ca); - return out.pos - buf; - } - if (attr == &sysfs_alloc_debug) { - dev_alloc_debug_to_text(&out, ca); - return out.pos - buf; - } + if (attr == &sysfs_reserve_stats) + reserve_stats_to_text(out, ca); + + if (attr == &sysfs_alloc_debug) + dev_alloc_debug_to_text(out, ca); return 0; } diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 1f7f2533e544..978d92e0b5eb 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -871,7 +871,9 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname, u64 nr, unsigned nr_threads) { struct test_job j = { .c = c, .nr = nr, .nr_threads = nr_threads }; - char name_buf[20], nr_buf[20], per_sec_buf[20]; + char name_buf[20]; + struct printbuf nr_buf = PRINTBUF; + struct printbuf per_sec_buf = PRINTBUF; unsigned i; u64 time; @@ -932,13 +934,15 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname, time = j.finish - j.start; scnprintf(name_buf, sizeof(name_buf), "%s:", testname); - bch2_hprint(&PBUF(nr_buf), nr); - bch2_hprint(&PBUF(per_sec_buf), div64_u64(nr * NSEC_PER_SEC, time)); + bch2_hprint(&nr_buf, nr); + bch2_hprint(&per_sec_buf, div64_u64(nr * NSEC_PER_SEC, time)); printk(KERN_INFO "%-12s %s with %u threads in %5llu sec, %5llu nsec per iter, %5s per sec\n", - name_buf, nr_buf, nr_threads, + name_buf, nr_buf.buf, nr_threads, div_u64(time, NSEC_PER_SEC), div_u64(time * nr_threads, nr), - per_sec_buf); + per_sec_buf.buf); + printbuf_exit(&per_sec_buf); + printbuf_exit(&nr_buf); return j.ret; } diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 2296658b9f0d..7a896ddc9a22 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -99,6 +99,38 @@ STRTO_H(strtoll, long long) STRTO_H(strtoull, unsigned long long) STRTO_H(strtou64, u64) +static int bch2_printbuf_realloc(struct printbuf *out, unsigned extra) +{ + unsigned new_size = roundup_pow_of_two(out->size + extra); + char *buf = krealloc(out->buf, new_size, !out->atomic ? GFP_KERNEL : GFP_ATOMIC); + + if (!buf) { + out->allocation_failure = true; + return -ENOMEM; + } + + out->buf = buf; + out->size = new_size; + return 0; +} + +void bch2_pr_buf(struct printbuf *out, const char *fmt, ...) +{ + va_list args; + int len; + + do { + va_start(args, fmt); + len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args); + va_end(args); + } while (len + 1 >= printbuf_remaining(out) && + !bch2_printbuf_realloc(out, len + 1)); + + len = min_t(size_t, len, + printbuf_remaining(out) ? printbuf_remaining(out) - 1 : 0); + out->pos += len; +} + void bch2_hprint(struct printbuf *buf, s64 v) { int u, t = 0; @@ -151,9 +183,6 @@ void bch2_flags_to_text(struct printbuf *out, unsigned bit, nr = 0; bool first = true; - if (out->pos != out->end) - *out->pos = '\0'; - while (list[nr]) nr++; diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 58427edcfaa4..7667944f9ae4 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -242,19 +242,39 @@ enum printbuf_units { }; struct printbuf { - char *pos; - char *end; - char *last_newline; - char *last_field; + char *buf; + unsigned size; + unsigned pos; + unsigned last_newline; + unsigned last_field; unsigned indent; - enum printbuf_units units; - unsigned tabstop; - unsigned tabstops[4]; + enum printbuf_units units:8; + u8 atomic; + bool allocation_failure:1; + u8 tabstop; + u8 tabstops[4]; }; +#define PRINTBUF ((struct printbuf) { NULL }) + +static inline void printbuf_exit(struct printbuf *buf) +{ + kfree(buf->buf); + buf->buf = ERR_PTR(-EINTR); /* poison value */ +} + +static inline void printbuf_reset(struct printbuf *buf) +{ + buf->pos = 0; + buf->last_newline = 0; + buf->last_field = 0; + buf->indent = 0; + buf->tabstop = 0; +} + static inline size_t printbuf_remaining(struct printbuf *buf) { - return buf->end - buf->pos; + return buf->size - buf->pos; } static inline size_t printbuf_linelen(struct printbuf *buf) @@ -262,29 +282,13 @@ static inline size_t printbuf_linelen(struct printbuf *buf) return buf->pos - buf->last_newline; } -#define _PBUF(_buf, _len) \ - ((struct printbuf) { \ - .pos = _buf, \ - .end = _buf + _len, \ - .last_newline = _buf, \ - .last_field = _buf, \ - }) +void bch2_pr_buf(struct printbuf *out, const char *fmt, ...); -#define PBUF(_buf) _PBUF(_buf, sizeof(_buf)) - - -#define pr_buf(_out, ...) \ -do { \ - (_out)->pos += scnprintf((_out)->pos, printbuf_remaining(_out), \ - __VA_ARGS__); \ -} while (0) +#define pr_buf(_out, ...) bch2_pr_buf(_out, __VA_ARGS__) static inline void pr_char(struct printbuf *out, char c) { - if (printbuf_remaining(out) > 1) { - *out->pos = c; - out->pos++; - } + bch2_pr_buf(out, "%c", c); } static inline void pr_indent_push(struct printbuf *buf, unsigned spaces) @@ -298,7 +302,7 @@ static inline void pr_indent_pop(struct printbuf *buf, unsigned spaces) { if (buf->last_newline + buf->indent == buf->pos) { buf->pos -= spaces; - buf->pos = '\0'; + buf->buf[buf->pos] = '\0'; } buf->indent -= spaces; } @@ -341,12 +345,12 @@ static inline void pr_tab_rjust(struct printbuf *buf) BUG_ON(buf->tabstop > ARRAY_SIZE(buf->tabstops)); if (shift > 0) { - memmove(buf->last_field + shift, - buf->last_field, + memmove(buf->buf + buf->last_field + shift, + buf->buf + buf->last_field, move); - memset(buf->last_field, ' ', shift); + memset(buf->buf + buf->last_field, ' ', shift); buf->pos += shift; - *buf->pos = 0; + buf->buf[buf->pos] = 0; } buf->last_field = buf->pos; @@ -460,8 +464,8 @@ static inline int bch2_strtoul_h(const char *cp, long *res) _r; \ }) -#define snprint(buf, size, var) \ - snprintf(buf, size, \ +#define snprint(out, var) \ + pr_buf(out, \ type_is(var, int) ? "%i\n" \ : type_is(var, unsigned) ? "%u\n" \ : type_is(var, long) ? "%li\n" \ @@ -605,10 +609,8 @@ do { \ sysfs_print(name##_rate_d_term, (var)->d_term); \ sysfs_print(name##_rate_p_term_inverse, (var)->p_term_inverse); \ \ - if (attr == &sysfs_##name##_rate_debug) { \ - bch2_pd_controller_debug_to_text(&out, var); \ - return out.pos - buf; \ - } \ + if (attr == &sysfs_##name##_rate_debug) \ + bch2_pd_controller_debug_to_text(out, var); \ } while (0) #define sysfs_pd_controller_store(name, var) \ diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 9cce3953ee0c..f4e20e796ba0 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -426,9 +426,8 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, bch2_inode_opts_to_opts(bch2_inode_opts_get(&inode->ei_inode)); const struct bch_option *opt; int id, inode_opt_id; - char buf[512]; - struct printbuf out = PBUF(buf); - unsigned val_len; + struct printbuf out = PRINTBUF; + int ret; u64 v; id = bch2_opt_lookup(name); @@ -451,14 +450,19 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, v = bch2_opt_get_by_id(&opts, id); bch2_opt_to_text(&out, c, opt, v, 0); - val_len = out.pos - buf; + ret = out.pos; - if (buffer && val_len > size) - return -ERANGE; + if (out.allocation_failure) { + ret = -ENOMEM; + } else if (buffer) { + if (out.pos > size) + ret = -ERANGE; + else + memcpy(buffer, out.buf, out.pos); + } - if (buffer) - memcpy(buffer, buf, val_len); - return val_len; + printbuf_exit(&out); + return ret; } static int bch2_xattr_bcachefs_get(const struct xattr_handler *handler, -- cgit v1.2.3 From 4eea53de8a1882e75d3640dce06c8c2874a77b05 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 4 Mar 2022 21:57:11 -0500 Subject: bcachefs: Fix transaction path overflow in fiemap Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 2aaeee585157..310e317738b9 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -935,7 +935,8 @@ retry: bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, SPOS(ei->v.i_ino, start, snapshot), 0); - while ((k = bch2_btree_iter_peek(&iter)).k && + while (!(ret = btree_trans_too_many_iters(&trans)) && + (k = bch2_btree_iter_peek(&iter)).k && !(ret = bkey_err(k)) && bkey_cmp(iter.pos, end) < 0) { enum btree_id data_btree = BTREE_ID_extents; -- cgit v1.2.3 From 5521b1dfa20262a9cb8d1214c095c9ca2a4cb127 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 5 Mar 2022 12:01:16 -0500 Subject: bcachefs: Convert bch2_sb_to_text to master option list Options no longer have to be manually added to bch2_sb_to_text() - it now uses the master list of options in opts.h. Also, improve some of the formatting by converting it to tabstops. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_groups.c | 87 +++++++++------------ fs/bcachefs/disk_groups.h | 4 +- fs/bcachefs/fs.c | 2 +- fs/bcachefs/opts.c | 45 ++++++++--- fs/bcachefs/opts.h | 62 +++++++-------- fs/bcachefs/super-io.c | 191 ++++++++++++++++++++++------------------------ fs/bcachefs/super.c | 2 +- fs/bcachefs/sysfs.c | 2 +- fs/bcachefs/xattr.c | 2 +- 9 files changed, 198 insertions(+), 199 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index e9ee37f1e07d..97eb21827cb3 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -445,7 +445,10 @@ int bch2_opt_target_parse(struct bch_fs *c, const char *buf, u64 *v) return -EINVAL; } -void bch2_sb_target_to_text(struct printbuf *out, struct bch_sb *sb, u64 v) +void bch2_opt_target_to_text(struct printbuf *out, + struct bch_fs *c, + struct bch_sb *sb, + u64 v) { struct target t = target_decode(v); @@ -453,60 +456,46 @@ void bch2_sb_target_to_text(struct printbuf *out, struct bch_sb *sb, u64 v) case TARGET_NULL: pr_buf(out, "none"); break; - case TARGET_DEV: { - struct bch_sb_field_members *mi = bch2_sb_get_members(sb); - struct bch_member *m = mi->members + t.dev; - - if (bch2_dev_exists(sb, mi, t.dev)) { - pr_buf(out, "Device "); - pr_uuid(out, m->uuid.b); - pr_buf(out, " (%u)", t.dev); + case TARGET_DEV: + if (c) { + struct bch_dev *ca; + + rcu_read_lock(); + ca = t.dev < c->sb.nr_devices + ? rcu_dereference(c->devs[t.dev]) + : NULL; + + if (ca && percpu_ref_tryget(&ca->io_ref)) { + pr_buf(out, "/dev/%pg", ca->disk_sb.bdev); + percpu_ref_put(&ca->io_ref); + } else if (ca) { + pr_buf(out, "offline device %u", t.dev); + } else { + pr_buf(out, "invalid device %u", t.dev); + } + + rcu_read_unlock(); } else { - pr_buf(out, "Bad device %u", t.dev); + struct bch_sb_field_members *mi = bch2_sb_get_members(sb); + struct bch_member *m = mi->members + t.dev; + + if (bch2_dev_exists(sb, mi, t.dev)) { + pr_buf(out, "Device "); + pr_uuid(out, m->uuid.b); + pr_buf(out, " (%u)", t.dev); + } else { + pr_buf(out, "Bad device %u", t.dev); + } } - break; - } case TARGET_GROUP: - bch2_disk_path_to_text(out, sb, t.group); - break; - default: - BUG(); - } -} - -void bch2_opt_target_to_text(struct printbuf *out, struct bch_fs *c, u64 v) -{ - struct target t = target_decode(v); - - switch (t.type) { - case TARGET_NULL: - pr_buf(out, "none"); - break; - case TARGET_DEV: { - struct bch_dev *ca; - - rcu_read_lock(); - ca = t.dev < c->sb.nr_devices - ? rcu_dereference(c->devs[t.dev]) - : NULL; - - if (ca && percpu_ref_tryget(&ca->io_ref)) { - pr_buf(out, "/dev/%pg", ca->disk_sb.bdev); - percpu_ref_put(&ca->io_ref); - } else if (ca) { - pr_buf(out, "offline device %u", t.dev); + if (c) { + mutex_lock(&c->sb_lock); + bch2_disk_path_to_text(out, c->disk_sb.sb, t.group); + mutex_unlock(&c->sb_lock); } else { - pr_buf(out, "invalid device %u", t.dev); + bch2_disk_path_to_text(out, sb, t.group); } - - rcu_read_unlock(); - break; - } - case TARGET_GROUP: - mutex_lock(&c->sb_lock); - bch2_disk_path_to_text(out, c->disk_sb.sb, t.group); - mutex_unlock(&c->sb_lock); break; default: BUG(); diff --git a/fs/bcachefs/disk_groups.h b/fs/bcachefs/disk_groups.h index a274aacbdf92..de915480514b 100644 --- a/fs/bcachefs/disk_groups.h +++ b/fs/bcachefs/disk_groups.h @@ -77,10 +77,8 @@ int bch2_disk_path_find_or_create(struct bch_sb_handle *, const char *); void bch2_disk_path_to_text(struct printbuf *, struct bch_sb *, unsigned); -void bch2_sb_target_to_text(struct printbuf *, struct bch_sb *, u64); - int bch2_opt_target_parse(struct bch_fs *, const char *, u64 *); -void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, u64); +void bch2_opt_target_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); int bch2_sb_disk_groups_to_cpu(struct bch_fs *); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 310e317738b9..4c68cee013e3 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1691,7 +1691,7 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root) continue; printbuf_reset(&buf); - bch2_opt_to_text(&buf, c, opt, v, + bch2_opt_to_text(&buf, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE); seq_putc(seq, ','); seq_puts(seq, buf.buf); diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index 71bf26eb13d5..e78d3b75f6fb 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -96,6 +96,16 @@ const char * const bch2_d_types[BCH_DT_MAX] = { [DT_SUBVOL] = "subvol", }; +u64 BCH2_NO_SB_OPT(const struct bch_sb *sb) +{ + BUG(); +} + +void SET_BCH2_NO_SB_OPT(struct bch_sb *sb, u64 v) +{ + BUG(); +} + void bch2_opts_apply(struct bch_opts *dst, struct bch_opts src) { #define x(_name, ...) \ @@ -280,7 +290,8 @@ int bch2_opt_parse(struct bch_fs *c, const char *msg, return bch2_opt_validate(opt, msg, *res); } -void bch2_opt_to_text(struct printbuf *out, struct bch_fs *c, +void bch2_opt_to_text(struct printbuf *out, + struct bch_fs *c, struct bch_sb *sb, const struct bch_option *opt, u64 v, unsigned flags) { @@ -310,7 +321,7 @@ void bch2_opt_to_text(struct printbuf *out, struct bch_fs *c, pr_buf(out, opt->choices[v]); break; case BCH_OPT_FN: - opt->to_text(out, c, v); + opt->to_text(out, c, sb, v); break; default: BUG(); @@ -431,6 +442,22 @@ out: return ret; } +u64 bch2_opt_from_sb(struct bch_sb *sb, enum bch_opt_id id) +{ + const struct bch_option *opt = bch2_opt_table + id; + u64 v; + + v = opt->get_sb(sb); + + if (opt->flags & OPT_SB_FIELD_ILOG2) + v = 1ULL << v; + + if (opt->flags & OPT_SB_FIELD_SECTORS) + v <<= 9; + + return v; +} + /* * Initial options from superblock - here we don't want any options undefined, * any options the superblock doesn't specify are set to 0: @@ -444,16 +471,10 @@ int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb) const struct bch_option *opt = bch2_opt_table + id; u64 v; - if (opt->get_sb == NO_SB_OPT) + if (opt->get_sb == BCH2_NO_SB_OPT) continue; - v = opt->get_sb(sb); - - if (opt->flags & OPT_SB_FIELD_ILOG2) - v = 1ULL << v; - - if (opt->flags & OPT_SB_FIELD_SECTORS) - v <<= 9; + v = bch2_opt_from_sb(sb, id); ret = bch2_opt_validate(opt, "superblock option ", v); if (ret) @@ -467,7 +488,7 @@ int bch2_opts_from_sb(struct bch_opts *opts, struct bch_sb *sb) void __bch2_opt_set_sb(struct bch_sb *sb, const struct bch_option *opt, u64 v) { - if (opt->set_sb == SET_NO_SB_OPT) + if (opt->set_sb == SET_BCH2_NO_SB_OPT) return; if (opt->flags & OPT_SB_FIELD_SECTORS) @@ -481,7 +502,7 @@ void __bch2_opt_set_sb(struct bch_sb *sb, const struct bch_option *opt, u64 v) void bch2_opt_set_sb(struct bch_fs *c, const struct bch_option *opt, u64 v) { - if (opt->set_sb == SET_NO_SB_OPT) + if (opt->set_sb == SET_BCH2_NO_SB_OPT) return; mutex_lock(&c->sb_lock); diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index b03cac016f0b..fffe3e066864 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -42,7 +42,8 @@ static inline const char *bch2_d_type_str(unsigned d_type) */ /* dummy option, for options that aren't stored in the superblock */ -LE64_BITMASK(NO_SB_OPT, struct bch_sb, flags[0], 0, 0); +u64 BCH2_NO_SB_OPT(const struct bch_sb *); +void SET_BCH2_NO_SB_OPT(struct bch_sb *, u64); /* When can be set: */ enum opt_flags { @@ -202,7 +203,7 @@ enum opt_type { x(btree_node_mem_ptr_optimization, u8, \ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ - NO_SB_OPT, true, \ + BCH2_NO_SB_OPT, true, \ NULL, "Stash pointer to in memory btree node in btree ptr")\ x(gc_reserve_percent, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ @@ -229,7 +230,7 @@ enum opt_type { x(inline_data, u8, \ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ - NO_SB_OPT, true, \ + BCH2_NO_SB_OPT, true, \ NULL, "Enable inline data extents") \ x(acl, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT, \ @@ -254,22 +255,22 @@ enum opt_type { x(degraded, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Allow mounting in degraded mode") \ x(very_degraded, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Allow mounting in when data will be missing") \ x(discard, u8, \ OPT_FS|OPT_MOUNT|OPT_DEVICE, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Enable discard/TRIM support") \ x(verbose, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Extra debugging information during mount/recovery")\ x(journal_flush_delay, u32, \ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ @@ -291,48 +292,48 @@ enum opt_type { x(fsck, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Run fsck on mount") \ x(fix_errors, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Fix errors during fsck without asking") \ x(ratelimit_errors, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, RATELIMIT_ERRORS_DEFAULT, \ + BCH2_NO_SB_OPT, RATELIMIT_ERRORS_DEFAULT, \ NULL, "Ratelimit error messages during fsck") \ x(nochanges, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Super read only mode - no writes at all will be issued,\n"\ "even if we have to replay the journal") \ x(norecovery, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Don't replay the journal") \ x(rebuild_replicas, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Rebuild the superblock replicas section") \ x(keep_journal, u8, \ 0, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Don't free journal entries/keys after startup")\ x(read_entire_journal, u8, \ 0, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Read all journal entries, not just dirty ones")\ x(read_journal_only, u8, \ 0, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Only read the journal, skip the rest of recovery")\ x(journal_transaction_names, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ @@ -342,64 +343,64 @@ enum opt_type { x(noexcl, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Don't open device in exclusive mode") \ x(sb, u64, \ OPT_MOUNT, \ OPT_UINT(0, S64_MAX), \ - NO_SB_OPT, BCH_SB_SECTOR, \ + BCH2_NO_SB_OPT, BCH_SB_SECTOR, \ "offset", "Sector offset of superblock") \ x(read_only, u8, \ OPT_FS, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, NULL) \ x(nostart, u8, \ 0, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Don\'t start filesystem, only open devices") \ x(reconstruct_alloc, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Reconstruct alloc btree") \ x(version_upgrade, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Set superblock to latest version,\n" \ "allowing any new features to be used") \ x(buckets_nouse, u8, \ 0, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Allocate the buckets_nouse bitmap") \ x(project, u8, \ OPT_INODE, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, NULL) \ x(no_data_io, u8, \ OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, false, \ NULL, "Skip submit_bio() for data reads and writes, " \ "for performance testing purposes") \ x(fs_size, u64, \ OPT_DEVICE, \ OPT_UINT(0, S64_MAX), \ - NO_SB_OPT, 0, \ + BCH2_NO_SB_OPT, 0, \ "size", "Size of filesystem on device") \ x(bucket, u32, \ OPT_DEVICE, \ OPT_UINT(0, S64_MAX), \ - NO_SB_OPT, 0, \ + BCH2_NO_SB_OPT, 0, \ "size", "Size of filesystem on device") \ x(durability, u8, \ OPT_DEVICE, \ OPT_UINT(0, BCH_REPLICAS_MAX), \ - NO_SB_OPT, 1, \ + BCH2_NO_SB_OPT, 1, \ "n", "Data written to this device will be considered\n"\ "to have already been replicated n times") @@ -466,7 +467,7 @@ struct bch_option { }; struct { int (*parse)(struct bch_fs *, const char *, u64 *); - void (*to_text)(struct printbuf *, struct bch_fs *, u64); + void (*to_text)(struct printbuf *, struct bch_fs *, struct bch_sb *, u64); }; }; @@ -481,6 +482,7 @@ bool bch2_opt_defined_by_id(const struct bch_opts *, enum bch_opt_id); u64 bch2_opt_get_by_id(const struct bch_opts *, enum bch_opt_id); void bch2_opt_set_by_id(struct bch_opts *, enum bch_opt_id, u64); +u64 bch2_opt_from_sb(struct bch_sb *, enum bch_opt_id); int bch2_opts_from_sb(struct bch_opts *, struct bch_sb *); void __bch2_opt_set_sb(struct bch_sb *, const struct bch_option *, u64); void bch2_opt_set_sb(struct bch_fs *, const struct bch_option *, u64); @@ -492,7 +494,7 @@ int bch2_opt_parse(struct bch_fs *, const char *, const struct bch_option *, #define OPT_SHOW_FULL_LIST (1 << 0) #define OPT_SHOW_MOUNT_STYLE (1 << 1) -void bch2_opt_to_text(struct printbuf *, struct bch_fs *, +void bch2_opt_to_text(struct printbuf *, struct bch_fs *, struct bch_sb *, const struct bch_option *, u64, unsigned); int bch2_opt_check_may_set(struct bch_fs *, int, u64); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 03a8ae496668..bb61a288b7fd 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1047,45 +1047,56 @@ static void bch2_sb_members_to_text(struct printbuf *out, struct bch_sb *sb, if (!bch2_member_exists(m)) continue; - pr_buf(out, "Device: %u", i); + pr_buf(out, "Device:"); + pr_tab(out); + pr_buf(out, "%u", i); pr_newline(out); pr_indent_push(out, 2); - pr_buf(out, "UUID: "); + pr_buf(out, "UUID:"); + pr_tab(out); pr_uuid(out, m->uuid.b); pr_newline(out); - pr_buf(out, "Size: "); + pr_buf(out, "Size:"); + pr_tab(out); pr_units(out, device_size, device_size << 9); pr_newline(out); - pr_buf(out, "Bucket size: "); + pr_buf(out, "Bucket size:"); + pr_tab(out); pr_units(out, bucket_size, bucket_size << 9); pr_newline(out); - pr_buf(out, "First bucket: %u", - le16_to_cpu(m->first_bucket)); + pr_buf(out, "First bucket:"); + pr_tab(out); + pr_buf(out, "%u", le16_to_cpu(m->first_bucket)); pr_newline(out); - pr_buf(out, "Buckets: %llu", - le64_to_cpu(m->nbuckets)); + pr_buf(out, "Buckets:"); + pr_tab(out); + pr_buf(out, "%llu", le64_to_cpu(m->nbuckets)); pr_newline(out); - pr_buf(out, "Last mount: "); + pr_buf(out, "Last mount:"); + pr_tab(out); if (m->last_mount) pr_time(out, le64_to_cpu(m->last_mount)); else pr_buf(out, "(never)"); pr_newline(out); - pr_buf(out, "State: %s", + pr_buf(out, "State:"); + pr_tab(out); + pr_buf(out, "%s", BCH_MEMBER_STATE(m) < BCH_MEMBER_STATE_NR ? bch2_member_states[BCH_MEMBER_STATE(m)] : "unknown"); pr_newline(out); - pr_buf(out, "Group: "); + pr_buf(out, "Group:"); + pr_tab(out); if (BCH_MEMBER_GROUP(m)) { unsigned idx = BCH_MEMBER_GROUP(m) - 1; @@ -1099,7 +1110,8 @@ static void bch2_sb_members_to_text(struct printbuf *out, struct bch_sb *sb, } pr_newline(out); - pr_buf(out, "Data allowed: "); + pr_buf(out, "Data allowed:"); + pr_tab(out); if (BCH_MEMBER_DATA_ALLOWED(m)) bch2_flags_to_text(out, bch2_data_types, BCH_MEMBER_DATA_ALLOWED(m)); @@ -1107,15 +1119,17 @@ static void bch2_sb_members_to_text(struct printbuf *out, struct bch_sb *sb, pr_buf(out, "(none)"); pr_newline(out); - pr_buf(out, "Has data: "); + pr_buf(out, "Has data:"); + pr_tab(out); if (data_have) bch2_flags_to_text(out, bch2_data_types, data_have); else pr_buf(out, "(none)"); pr_newline(out); - pr_buf(out, "Discard: %llu", - BCH_MEMBER_DISCARD(m)); + pr_buf(out, "Discard:"); + pr_tab(out); + pr_buf(out, "%llu", BCH_MEMBER_DISCARD(m)); pr_newline(out); pr_indent_pop(out, 2); @@ -1452,6 +1466,9 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, const struct bch_sb_field_ops *ops = type < BCH_SB_FIELD_NR ? bch2_sb_field_ops[type] : NULL; + if (!out->tabstops[0]) + out->tabstops[0] = 32; + if (ops) pr_buf(out, "%s", bch2_sb_fields[type]); else @@ -1500,6 +1517,9 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, u64 fields_have = 0; unsigned nr_devices = 0; + if (!out->tabstops[0]) + out->tabstops[0] = 32; + mi = bch2_sb_get_members(sb); if (mi) { struct bch_member *m; @@ -1510,137 +1530,106 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, nr_devices += bch2_member_exists(m); } - pr_buf(out, "External UUID: "); + pr_buf(out, "External UUID:"); + pr_tab(out); pr_uuid(out, sb->user_uuid.b); pr_newline(out); - pr_buf(out, "Internal UUID: "); + pr_buf(out, "Internal UUID:"); + pr_tab(out); pr_uuid(out, sb->uuid.b); pr_newline(out); - pr_buf(out, "Device index: %u", sb->dev_idx); + pr_buf(out, "Device index:"); + pr_tab(out); + pr_buf(out, "%u", sb->dev_idx); pr_newline(out); - pr_buf(out, "Label: "); + pr_buf(out, "Label:"); + pr_tab(out); pr_buf(out, "%.*s", (int) sizeof(sb->label), sb->label); pr_newline(out); - pr_buf(out, "Version: %u", le16_to_cpu(sb->version)); + pr_buf(out, "Version:"); + pr_tab(out); + pr_buf(out, "%u", le16_to_cpu(sb->version)); pr_newline(out); - pr_buf(out, "Oldest version on disk: %u", le16_to_cpu(sb->version_min)); + pr_buf(out, "Oldest version on disk:"); + pr_tab(out); + pr_buf(out, "%u", le16_to_cpu(sb->version_min)); pr_newline(out); - pr_buf(out, "Created: "); + pr_buf(out, "Created:"); + pr_tab(out); if (sb->time_base_lo) pr_time(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC)); else pr_buf(out, "(not set)"); pr_newline(out); - pr_buf(out, "Squence number: %llu", le64_to_cpu(sb->seq)); + pr_buf(out, "Sequence number:"); + pr_tab(out); + pr_buf(out, "%llu", le64_to_cpu(sb->seq)); pr_newline(out); - pr_buf(out, "Block_size: "); - pr_units(out, le16_to_cpu(sb->block_size), - (u32) le16_to_cpu(sb->block_size) << 9); + pr_buf(out, "Superblock size:"); + pr_tab(out); + pr_buf(out, "%zu", vstruct_bytes(sb)); pr_newline(out); - pr_buf(out, "Btree node size: "); - pr_units(out, BCH_SB_BTREE_NODE_SIZE(sb), - BCH_SB_BTREE_NODE_SIZE(sb) << 9); + pr_buf(out, "Clean:"); + pr_tab(out); + pr_buf(out, "%llu", BCH_SB_CLEAN(sb)); pr_newline(out); - pr_buf(out, "Error action: %s", - BCH_SB_ERROR_ACTION(sb) < BCH_ON_ERROR_NR - ? bch2_error_actions[BCH_SB_ERROR_ACTION(sb)] - : "unknown"); + pr_buf(out, "Devices:"); + pr_tab(out); + pr_buf(out, "%u", nr_devices); pr_newline(out); - pr_buf(out, "Clean: %llu", BCH_SB_CLEAN(sb)); + pr_buf(out, "Sections:"); + vstruct_for_each(sb, f) + fields_have |= 1 << le32_to_cpu(f->type); + pr_tab(out); + bch2_flags_to_text(out, bch2_sb_fields, fields_have); pr_newline(out); - pr_buf(out, "Features: "); + pr_buf(out, "Features:"); + pr_tab(out); bch2_flags_to_text(out, bch2_sb_features, le64_to_cpu(sb->features[0])); pr_newline(out); - pr_buf(out, "Compat features: "); + pr_buf(out, "Compat features:"); + pr_tab(out); bch2_flags_to_text(out, bch2_sb_compat, le64_to_cpu(sb->compat[0])); pr_newline(out); - pr_buf(out, "Metadata replicas: %llu", BCH_SB_META_REPLICAS_WANT(sb)); pr_newline(out); - - pr_buf(out, "Data replicas: %llu", BCH_SB_DATA_REPLICAS_WANT(sb)); - pr_newline(out); - - pr_buf(out, "Metadata checksum type: %s (%llu)", - BCH_SB_META_CSUM_TYPE(sb) < BCH_CSUM_OPT_NR - ? bch2_csum_opts[BCH_SB_META_CSUM_TYPE(sb)] - : "unknown", - BCH_SB_META_CSUM_TYPE(sb)); - pr_newline(out); - - pr_buf(out, "Data checksum type: %s (%llu)", - BCH_SB_DATA_CSUM_TYPE(sb) < BCH_CSUM_OPT_NR - ? bch2_csum_opts[BCH_SB_DATA_CSUM_TYPE(sb)] - : "unknown", - BCH_SB_DATA_CSUM_TYPE(sb)); - pr_newline(out); - - pr_buf(out, "Compression type: %s (%llu)", - BCH_SB_COMPRESSION_TYPE(sb) < BCH_COMPRESSION_OPT_NR - ? bch2_compression_opts[BCH_SB_COMPRESSION_TYPE(sb)] - : "unknown", - BCH_SB_COMPRESSION_TYPE(sb)); - pr_newline(out); - - pr_buf(out, "Foreground write target: "); - bch2_sb_target_to_text(out, sb, BCH_SB_FOREGROUND_TARGET(sb)); - pr_newline(out); - - pr_buf(out, "Background write target: "); - bch2_sb_target_to_text(out, sb, BCH_SB_BACKGROUND_TARGET(sb)); - pr_newline(out); - - pr_buf(out, "Promote target: "); - bch2_sb_target_to_text(out, sb, BCH_SB_PROMOTE_TARGET(sb)); - pr_newline(out); - - pr_buf(out, "Metadata target: "); - bch2_sb_target_to_text(out, sb, BCH_SB_METADATA_TARGET(sb)); - pr_newline(out); - - pr_buf(out, "String hash type: %s (%llu)", - BCH_SB_STR_HASH_TYPE(sb) < BCH_STR_HASH_NR - ? bch2_str_hash_types[BCH_SB_STR_HASH_TYPE(sb)] - : "unknown", - BCH_SB_STR_HASH_TYPE(sb)); - pr_newline(out); - - pr_buf(out, "32 bit inodes: %llu", BCH_SB_INODE_32BIT(sb)); - pr_newline(out); - - pr_buf(out, "GC reserve percentage: %llu%%", BCH_SB_GC_RESERVE(sb)); + pr_buf(out, "Options:"); pr_newline(out); + pr_indent_push(out, 2); + { + enum bch_opt_id id; - pr_buf(out, "Root reserve percentage: %llu%%", BCH_SB_ROOT_RESERVE(sb)); - pr_newline(out); + for (id = 0; id < bch2_opts_nr; id++) { + const struct bch_option *opt = bch2_opt_table + id; - pr_buf(out, "Devices: %u live, %u total", - nr_devices, sb->nr_devices); - pr_newline(out); + if (opt->get_sb != BCH2_NO_SB_OPT) { + u64 v = bch2_opt_from_sb(sb, id); - pr_buf(out, "Sections: "); - vstruct_for_each(sb, f) - fields_have |= 1 << le32_to_cpu(f->type); - bch2_flags_to_text(out, bch2_sb_fields, fields_have); - pr_newline(out); + pr_buf(out, "%s:", opt->attr.name); + pr_tab(out); + bch2_opt_to_text(out, NULL, sb, opt, v, + OPT_HUMAN_READABLE|OPT_SHOW_FULL_LIST); + pr_newline(out); + } + } + } - pr_buf(out, "Superblock size: %zu", vstruct_bytes(sb)); - pr_newline(out); + pr_indent_pop(out, 2); if (print_layout) { pr_newline(out); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 0bc78c50150a..56b01624d5fb 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -891,7 +891,7 @@ static void print_mount_opts(struct bch_fs *c) if (!first) pr_buf(&p, ","); first = false; - bch2_opt_to_text(&p, c, opt, v, OPT_SHOW_MOUNT_STYLE); + bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE); } if (!p.pos) diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 3018250d421b..49e38859bff8 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -597,7 +597,7 @@ SHOW(bch2_fs_opts_dir) int id = opt - bch2_opt_table; u64 v = bch2_opt_get_by_id(&c->opts, id); - bch2_opt_to_text(out, c, opt, v, OPT_SHOW_FULL_LIST); + bch2_opt_to_text(out, c, c->disk_sb.sb, opt, v, OPT_SHOW_FULL_LIST); pr_char(out, '\n'); return 0; diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index f4e20e796ba0..08b33ab8489f 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -448,7 +448,7 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, return -ENODATA; v = bch2_opt_get_by_id(&opts, id); - bch2_opt_to_text(&out, c, opt, v, 0); + bch2_opt_to_text(&out, c, c->disk_sb.sb, opt, v, 0); ret = out.pos; -- cgit v1.2.3 From 85d8cf161f98993f544c0b2c614873caf7b9c14f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 11 Mar 2022 12:31:52 -0500 Subject: bcachefs: bch2_btree_iter_peek_upto() In BTREE_ITER_FILTER_SNAPHOTS mode, we skip over keys in unrelated snapshots. When we hit the end of an inode, if the next inode(s) are in a different subvolume, we could potentially have to skip past many keys before finding a key we can return to the caller, so they can terminate the iteration. This adds a peek_upto() variant to solve this problem, to be used when we know the range we're searching within. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 36 ++++++++++++++++++++++++++---------- fs/bcachefs/btree_iter.h | 30 ++++++++++++++++++++++++++++-- fs/bcachefs/btree_update_leaf.c | 5 +++-- fs/bcachefs/dirent.c | 17 ++++++----------- fs/bcachefs/fs.c | 5 ++--- fs/bcachefs/inode.c | 4 ++-- fs/bcachefs/str_hash.h | 21 +++++++-------------- fs/bcachefs/xattr.c | 10 +++------- 8 files changed, 77 insertions(+), 51 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index b18e4fcc46e5..317c8066f3fc 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2346,11 +2346,12 @@ out: * bch2_btree_iter_peek: returns first key greater than or equal to iterator's * current position */ -struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) +struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end) { struct btree_trans *trans = iter->trans; struct bpos search_key = btree_iter_search_key(iter); struct bkey_s_c k; + struct bpos iter_pos; int ret; if (iter->update_path) { @@ -2366,6 +2367,24 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) if (!k.k || bkey_err(k)) goto out; + /* + * iter->pos should be mononotically increasing, and always be + * equal to the key we just returned - except extents can + * straddle iter->pos: + */ + if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) + iter_pos = k.k->p; + else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) + iter_pos = bkey_start_pos(k.k); + else + iter_pos = iter->pos; + + if (bkey_cmp(iter_pos, end) > 0) { + bch2_btree_iter_set_pos(iter, end); + k = bkey_s_c_null; + goto out; + } + if (iter->update_path && bkey_cmp(iter->update_path->pos, k.k->p)) { bch2_path_put(trans, iter->update_path, @@ -2419,14 +2438,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) break; } - /* - * iter->pos should be mononotically increasing, and always be equal to - * the key we just returned - except extents can straddle iter->pos: - */ - if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) - iter->pos = k.k->p; - else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) - iter->pos = bkey_start_pos(k.k); + iter->pos = iter_pos; iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p, iter->flags & BTREE_ITER_INTENT); @@ -2658,9 +2670,13 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) if (iter->flags & BTREE_ITER_INTENT) { struct btree_iter iter2; + struct bpos end = iter->pos; + + if (iter->flags & BTREE_ITER_IS_EXTENTS) + end.offset = U64_MAX; bch2_trans_copy_iter(&iter2, iter); - k = bch2_btree_iter_peek(&iter2); + k = bch2_btree_iter_peek_upto(&iter2, end); if (k.k && !bkey_err(k)) { iter->k = iter2.k; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 1e3172a2885a..27b3b82f7df3 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -245,9 +245,14 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *); struct btree *bch2_btree_iter_peek_node(struct btree_iter *); struct btree *bch2_btree_iter_next_node(struct btree_iter *); -struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *); +struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos); struct bkey_s_c bch2_btree_iter_next(struct btree_iter *); +static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) +{ + return bch2_btree_iter_peek_upto(iter, SPOS_MAX); +} + struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *); struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *); @@ -342,13 +347,26 @@ static inline int bkey_err(struct bkey_s_c k) } static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter, - unsigned flags) + unsigned flags) { return flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) : bch2_btree_iter_peek(iter); } +static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter, + struct bpos end, + unsigned flags) +{ + if (!(flags & BTREE_ITER_SLOTS)) + return bch2_btree_iter_peek_upto(iter, end); + + if (bkey_cmp(iter->pos, end) > 0) + return bkey_s_c_null; + + return bch2_btree_iter_peek_slot(iter); +} + static inline int btree_trans_too_many_iters(struct btree_trans *trans) { return hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2 @@ -385,6 +403,14 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, !((_ret) = bkey_err(_k)) && (_k).k; \ bch2_btree_iter_advance(&(_iter))) +#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \ + _start, _end, _flags, _k, _ret) \ + for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ + (_start), (_flags)); \ + (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\ + !((_ret) = bkey_err(_k)) && (_k).k; \ + bch2_btree_iter_advance(&(_iter))) + #define for_each_btree_key_continue(_trans, _iter, _flags, _k, _ret) \ for (; \ (_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\ diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 9f1ff5f8635d..c9cddba0f999 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -1286,7 +1286,7 @@ int bch2_trans_update_extent(struct btree_trans *trans, BTREE_ITER_INTENT| BTREE_ITER_WITH_UPDATES| BTREE_ITER_NOT_EXTENTS); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); if ((ret = bkey_err(k))) goto err; if (!k.k) @@ -1405,7 +1405,8 @@ int bch2_trans_update_extent(struct btree_trans *trans, goto out; } next: - k = bch2_btree_iter_next(&iter); + bch2_btree_iter_advance(&iter); + k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); if ((ret = bkey_err(k))) goto err; if (!k.k) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index a43a24409d37..760e4f74715f 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -470,16 +470,13 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) if (ret) return ret; - for_each_btree_key_norestart(trans, iter, BTREE_ID_dirents, - SPOS(dir.inum, 0, snapshot), 0, k, ret) { - if (k.k->p.inode > dir.inum) - break; - + for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, + SPOS(dir.inum, 0, snapshot), + POS(dir.inum, U64_MAX), 0, k, ret) if (k.k->type == KEY_TYPE_dirent) { ret = -ENOTEMPTY; break; } - } bch2_trans_iter_exit(trans, &iter); return ret; @@ -503,11 +500,9 @@ retry: if (ret) goto err; - for_each_btree_key_norestart(&trans, iter, BTREE_ID_dirents, - SPOS(inum.inum, ctx->pos, snapshot), 0, k, ret) { - if (k.k->p.inode > inum.inum) - break; - + for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_dirents, + SPOS(inum.inum, ctx->pos, snapshot), + POS(inum.inum, U64_MAX), 0, k, ret) { if (k.k->type != KEY_TYPE_dirent) continue; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 4c68cee013e3..afaee020e7e3 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -936,9 +936,8 @@ retry: SPOS(ei->v.i_ino, start, snapshot), 0); while (!(ret = btree_trans_too_many_iters(&trans)) && - (k = bch2_btree_iter_peek(&iter)).k && - !(ret = bkey_err(k)) && - bkey_cmp(iter.pos, end) < 0) { + (k = bch2_btree_iter_peek_upto(&iter, end)).k && + !(ret = bkey_err(k))) { enum btree_id data_btree = BTREE_ID_extents; if (!bkey_extent_is_data(k.k) && diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index ee14ba5ee73d..3735397ee9c5 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -586,12 +586,12 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, bch2_btree_iter_set_snapshot(&iter, snapshot); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX)); ret = bkey_err(k); if (ret) goto err; - if (!k.k || iter.pos.inode != inum.inum) + if (!k.k) break; bkey_init(&delete.k); diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 57d636740d2f..591bbb9f8beb 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -163,12 +163,10 @@ bch2_hash_lookup(struct btree_trans *trans, if (ret) return ret; - for_each_btree_key_norestart(trans, *iter, desc.btree_id, + for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, SPOS(inum.inum, desc.hash_key(info, key), snapshot), + POS(inum.inum, U64_MAX), BTREE_ITER_SLOTS|flags, k, ret) { - if (iter->pos.inode != inum.inum) - break; - if (is_visible_key(desc, inum, k)) { if (!desc.cmp_key(k, key)) return 0; @@ -199,15 +197,12 @@ bch2_hash_hole(struct btree_trans *trans, if (ret) return ret; - for_each_btree_key_norestart(trans, *iter, desc.btree_id, + for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, SPOS(inum.inum, desc.hash_key(info, key), snapshot), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { - if (iter->pos.inode != inum.inum) - break; - + POS(inum.inum, U64_MAX), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) if (!is_visible_key(desc, inum, k)) return 0; - } bch2_trans_iter_exit(trans, iter); return ret ?: -ENOSPC; @@ -260,14 +255,12 @@ int bch2_hash_set(struct btree_trans *trans, if (ret) return ret; - for_each_btree_key_norestart(trans, iter, desc.btree_id, + for_each_btree_key_upto_norestart(trans, iter, desc.btree_id, SPOS(inum.inum, desc.hash_bkey(info, bkey_i_to_s_c(insert)), snapshot), + POS(inum.inum, U64_MAX), BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { - if (iter.pos.inode != inum.inum) - break; - if (is_visible_key(desc, inum, k)) { if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert))) goto found; diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 08b33ab8489f..ecce10342126 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -311,13 +311,9 @@ retry: if (ret) goto err; - for_each_btree_key_norestart(&trans, iter, BTREE_ID_xattrs, - SPOS(inum, offset, snapshot), 0, k, ret) { - BUG_ON(k.k->p.inode < inum); - - if (k.k->p.inode > inum) - break; - + for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_xattrs, + SPOS(inum, offset, snapshot), + POS(inum, U64_MAX), 0, k, ret) { if (k.k->type != KEY_TYPE_xattr) continue; -- cgit v1.2.3 From 91d961badfd123b6759488bc4aa7a4d014b739f1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 29 Mar 2022 15:48:45 -0400 Subject: bcachefs: darrays Inspired by CCAN darray - simple, stupid resizable (dynamic) arrays. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 2 +- fs/bcachefs/darray.h | 77 +++++++++++++++++++++ fs/bcachefs/fs.c | 2 +- fs/bcachefs/fs.h | 4 +- fs/bcachefs/fsck.c | 153 ++++++++++++++++-------------------------- fs/bcachefs/move.c | 8 +-- fs/bcachefs/subvolume.c | 41 ++++------- fs/bcachefs/subvolume.h | 38 ++++------- fs/bcachefs/subvolume_types.h | 8 +-- 9 files changed, 170 insertions(+), 163 deletions(-) create mode 100644 fs/bcachefs/darray.h (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 6cda77ad4342..01e9ed5dfc61 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -665,7 +665,7 @@ struct bch_fs { struct mutex snapshot_table_lock; struct work_struct snapshot_delete_work; struct work_struct snapshot_wait_for_pagecache_and_delete_work; - struct snapshot_id_list snapshots_unlinked; + snapshot_id_list snapshots_unlinked; struct mutex snapshots_unlinked_lock; /* BTREE CACHE */ diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h new file mode 100644 index 000000000000..519ab9b96e67 --- /dev/null +++ b/fs/bcachefs/darray.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_DARRAY_H +#define _BCACHEFS_DARRAY_H + +/* + * Dynamic arrays: + * + * Inspired by CCAN's darray + */ + +#include "util.h" +#include + +#define DARRAY(type) \ +struct { \ + size_t nr, size; \ + type *data; \ +} + +typedef DARRAY(void) darray_void; + +static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more) +{ + if (d->nr + more > d->size) { + size_t new_size = roundup_pow_of_two(d->nr + more); + void *data = krealloc_array(d->data, new_size, t_size, GFP_KERNEL); + + if (!data) + return -ENOMEM; + + d->data = data; + d->size = new_size; + } + + return 0; +} + +#define darray_make_room(_d, _more) \ + __darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more)) + +#define darray_top(_d) ((_d).data[(_d).nr]) + +#define darray_push(_d, _item) \ +({ \ + int _ret = darray_make_room((_d), 1); \ + \ + if (!_ret) \ + (_d)->data[(_d)->nr++] = (_item); \ + _ret; \ +}) + +#define darray_insert_item(_d, _pos, _item) \ +({ \ + size_t pos = (_pos); \ + int _ret = darray_make_room((_d), 1); \ + \ + if (!_ret) \ + array_insert_item((_d)->data, (_d)->nr, pos, (_item)); \ + _ret; \ +}) + +#define darray_for_each(_d, _i) \ + for (_i = (_d).data; _i < (_d).data + (_d).nr; _i++) + +#define darray_init(_d) \ +do { \ + (_d)->data = NULL; \ + (_d)->nr = (_d)->size = 0; \ +} while (0) + +#define darray_exit(_d) \ +do { \ + kfree((_d)->data); \ + darray_init(_d); \ +} while (0) + +#endif /* _BCACHEFS_DARRAY_H */ diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index afaee020e7e3..d8cd32b5d765 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1478,7 +1478,7 @@ static void bch2_evict_inode(struct inode *vinode) } void bch2_evict_subvolume_inodes(struct bch_fs *c, - struct snapshot_id_list *s) + snapshot_id_list *s) { struct super_block *sb = c->vfs_sb; struct inode *inode; diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index a67ab1ad2a31..73b96d0b5d83 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -190,7 +190,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *, struct iattr *); int __bch2_unlink(struct inode *, struct dentry *, bool); -void bch2_evict_subvolume_inodes(struct bch_fs *, struct snapshot_id_list *); +void bch2_evict_subvolume_inodes(struct bch_fs *, snapshot_id_list *); void bch2_vfs_exit(void); int bch2_vfs_init(void); @@ -198,7 +198,7 @@ int bch2_vfs_init(void); #else static inline void bch2_evict_subvolume_inodes(struct bch_fs *c, - struct snapshot_id_list *s) {} + snapshot_id_list *s) {} static inline void bch2_vfs_exit(void) {} static inline int bch2_vfs_init(void) { return 0; } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 8783b950055e..10754b13ec15 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -3,6 +3,7 @@ #include "bcachefs.h" #include "bkey_buf.h" #include "btree_update.h" +#include "darray.h" #include "dirent.h" #include "error.h" #include "fs-common.h" @@ -471,11 +472,11 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, str pos.snapshot = snapshot_t(c, pos.snapshot)->equiv; if (bkey_cmp(s->pos, pos)) - s->nr = 0; + s->ids.nr = 0; s->pos = pos; /* Might get called multiple times due to lock restarts */ - if (s->nr && s->d[s->nr - 1] == pos.snapshot) + if (s->ids.nr && s->ids.data[s->ids.nr - 1] == pos.snapshot) return 0; return snapshots_seen_add(c, s, pos.snapshot); @@ -498,7 +499,7 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see ancestor = snapshot_t(c, ancestor)->equiv; /* @ancestor should be the snapshot most recently added to @seen */ - BUG_ON(!seen->nr || seen->d[seen->nr - 1] != ancestor); + BUG_ON(!seen->ids.nr || seen->ids.data[seen->ids.nr - 1] != ancestor); BUG_ON(seen->pos.snapshot != ancestor); if (id == ancestor) @@ -507,11 +508,11 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see if (!bch2_snapshot_is_ancestor(c, id, ancestor)) return false; - for (i = seen->nr - 2; - i >= 0 && seen->d[i] >= id; + for (i = seen->ids.nr - 2; + i >= 0 && seen->ids.data[i] >= id; --i) - if (bch2_snapshot_is_ancestor(c, id, seen->d[i]) && - bch2_snapshot_is_ancestor(c, seen->d[i], ancestor)) + if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i]) && + bch2_snapshot_is_ancestor(c, seen->ids.data[i], ancestor)) return false; return true; @@ -537,26 +538,25 @@ static int ref_visible(struct bch_fs *c, struct snapshots_seen *s, } #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ - for (_i = (_w)->d; _i < (_w)->d + (_w)->nr && (_i)->snapshot <= (_snapshot); _i++)\ + for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && (_i)->snapshot <= (_snapshot); _i++)\ if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) +struct inode_walker_entry { + struct bch_inode_unpacked inode; + u32 snapshot; + u64 count; +}; + struct inode_walker { bool first_this_inode; u64 cur_inum; - size_t nr; - size_t size; - struct inode_walker_entry { - struct bch_inode_unpacked inode; - u32 snapshot; - u64 count; - } *d; + DARRAY(struct inode_walker_entry) inodes; }; static void inode_walker_exit(struct inode_walker *w) { - kfree(w->d); - w->d = NULL; + darray_exit(&w->inodes); } static struct inode_walker inode_walker_init(void) @@ -564,43 +564,17 @@ static struct inode_walker inode_walker_init(void) return (struct inode_walker) { 0, }; } -static int inode_walker_realloc(struct bch_fs *c, struct inode_walker *w) -{ - if (w->nr == w->size) { - size_t new_size = max_t(size_t, 8UL, w->size * 2); - void *d = krealloc(w->d, new_size * sizeof(w->d[0]), - GFP_KERNEL); - if (!d) { - bch_err(c, "fsck: error allocating memory for inode_walker, size %zu", - new_size); - return -ENOMEM; - } - - w->d = d; - w->size = new_size; - } - - return 0; -} - static int add_inode(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c inode) { struct bch_inode_unpacked u; - int ret; - - ret = inode_walker_realloc(c, w); - if (ret) - return ret; BUG_ON(bch2_inode_unpack(inode, &u)); - w->d[w->nr++] = (struct inode_walker_entry) { + return darray_push(&w->inodes, ((struct inode_walker_entry) { .inode = u, .snapshot = snapshot_t(c, inode.k->p.snapshot)->equiv, - }; - - return 0; + })); } static int __walk_inode(struct btree_trans *trans, @@ -619,7 +593,7 @@ static int __walk_inode(struct btree_trans *trans, goto lookup_snapshot; } - w->nr = 0; + w->inodes.nr = 0; for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, pos.inode), BTREE_ITER_ALL_SNAPSHOTS, k, ret) { @@ -637,26 +611,25 @@ static int __walk_inode(struct btree_trans *trans, w->cur_inum = pos.inode; w->first_this_inode = true; lookup_snapshot: - for (i = 0; i < w->nr; i++) - if (bch2_snapshot_is_ancestor(c, pos.snapshot, w->d[i].snapshot)) + for (i = 0; i < w->inodes.nr; i++) + if (bch2_snapshot_is_ancestor(c, pos.snapshot, w->inodes.data[i].snapshot)) goto found; return INT_MAX; found: - BUG_ON(pos.snapshot > w->d[i].snapshot); + BUG_ON(pos.snapshot > w->inodes.data[i].snapshot); - if (pos.snapshot != w->d[i].snapshot) { + if (pos.snapshot != w->inodes.data[i].snapshot) { ancestor_pos = i; - while (i && w->d[i - 1].snapshot > pos.snapshot) + while (i && w->inodes.data[i - 1].snapshot > pos.snapshot) --i; - ret = inode_walker_realloc(c, w); + ret = darray_insert_item(&w->inodes, i, w->inodes.data[ancestor_pos]); if (ret) return ret; - array_insert_item(w->d, w->nr, i, w->d[ancestor_pos]); - w->d[i].snapshot = pos.snapshot; - w->d[i].count = 0; + w->inodes.data[i].snapshot = pos.snapshot; + w->inodes.data[i].count = 0; } return i; @@ -672,7 +645,7 @@ static int __get_visible_inodes(struct btree_trans *trans, struct bkey_s_c k; int ret; - w->nr = 0; + w->inodes.nr = 0; for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum), BTREE_ITER_ALL_SNAPSHOTS, k, ret) { @@ -1133,7 +1106,7 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) int ret = 0, ret2 = 0; s64 count2; - for (i = w->d; i < w->d + w->nr; i++) { + darray_for_each(w->inodes, i) { if (i->inode.bi_sectors == i->count) continue; @@ -1232,7 +1205,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, goto out; } - i = inode->d + ret; + i = inode->inodes.data + ret; ret = 0; if (fsck_err_on(!S_ISREG(i->inode.bi_mode) && @@ -1333,7 +1306,7 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) int ret = 0, ret2 = 0; s64 count2; - for (i = w->d; i < w->d + w->nr; i++) { + darray_for_each(w->inodes, i) { if (i->inode.bi_nlink == i->count) continue; @@ -1537,7 +1510,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, goto out; } - i = dir->d + ret; + i = dir->inodes.data + ret; ret = 0; if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), c, @@ -1550,7 +1523,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, } if (dir->first_this_inode) - *hash_info = bch2_hash_info_init(c, &dir->d[0].inode); + *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode); ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); @@ -1618,7 +1591,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (ret) goto err; - if (fsck_err_on(!target->nr, c, + if (fsck_err_on(!target->inodes.nr, c, "dirent points to missing inode:\n%s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), @@ -1628,7 +1601,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, goto err; } - for (i = target->d; i < target->d + target->nr; i++) { + darray_for_each(target->inodes, i) { ret = check_dirent_target(trans, iter, d, &i->inode, i->snapshot); if (ret) @@ -1726,7 +1699,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, ret = 0; if (inode->first_this_inode) - *hash_info = bch2_hash_info_init(c, &inode->d[0].inode); + *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode); ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); fsck_err: @@ -1836,21 +1809,18 @@ static int check_root(struct bch_fs *c) check_root_trans(&trans)); } -struct pathbuf { - size_t nr; - size_t size; - - struct pathbuf_entry { - u64 inum; - u32 snapshot; - } *entries; +struct pathbuf_entry { + u64 inum; + u32 snapshot; }; -static bool path_is_dup(struct pathbuf *p, u64 inum, u32 snapshot) +typedef DARRAY(struct pathbuf_entry) pathbuf; + +static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) { struct pathbuf_entry *i; - for (i = p->entries; i < p->entries + p->nr; i++) + darray_for_each(*p, i) if (i->inum == inum && i->snapshot == snapshot) return true; @@ -1858,29 +1828,18 @@ static bool path_is_dup(struct pathbuf *p, u64 inum, u32 snapshot) return false; } -static int path_down(struct bch_fs *c, struct pathbuf *p, +static int path_down(struct bch_fs *c, pathbuf *p, u64 inum, u32 snapshot) { - if (p->nr == p->size) { - size_t new_size = max_t(size_t, 256UL, p->size * 2); - void *n = krealloc(p->entries, - new_size * sizeof(p->entries[0]), - GFP_KERNEL); - if (!n) { - bch_err(c, "fsck: error allocating memory for pathbuf, size %zu", - new_size); - return -ENOMEM; - } - - p->entries = n; - p->size = new_size; - }; - - p->entries[p->nr++] = (struct pathbuf_entry) { + int ret = darray_push(p, ((struct pathbuf_entry) { .inum = inum, .snapshot = snapshot, - }; - return 0; + })); + + if (ret) + bch_err(c, "fsck: error allocating memory for pathbuf, size %zu", + p->size); + return ret; } /* @@ -1889,7 +1848,7 @@ static int path_down(struct bch_fs *c, struct pathbuf *p, * XXX: we should also be verifying that inodes are in the right subvolumes */ static int check_path(struct btree_trans *trans, - struct pathbuf *p, + pathbuf *p, struct bch_inode_unpacked *inode, u32 snapshot) { @@ -1963,7 +1922,7 @@ static int check_path(struct btree_trans *trans, /* XXX print path */ bch_err(c, "directory structure loop"); - for (i = p->entries; i < p->entries + p->nr; i++) + darray_for_each(*p, i) pr_err("%llu:%u", i->inum, i->snapshot); pr_err("%llu:%u", inode->bi_inum, snapshot); @@ -2000,7 +1959,7 @@ static int check_directory_structure(struct bch_fs *c) struct btree_iter iter; struct bkey_s_c k; struct bch_inode_unpacked u; - struct pathbuf path = { 0, 0, NULL }; + pathbuf path = { 0, }; int ret; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); @@ -2030,7 +1989,7 @@ static int check_directory_structure(struct bch_fs *c) BUG_ON(ret == -EINTR); - kfree(path.entries); + darray_exit(&path); bch2_trans_exit(&trans); return ret; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 2eb192da8e1d..b916ee35ee37 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -91,10 +91,10 @@ next: if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot)) { struct bkey_i *update; - size_t i; + u32 *i; - for (i = 0; i < s.nr; i++) - if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, s.d[i])) + darray_for_each(s.ids, i) + if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, *i)) goto next; update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); @@ -124,7 +124,7 @@ next: } } bch2_trans_iter_exit(trans, &iter); - kfree(s.d); + darray_exit(&s.ids); return ret; } diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 69603327d93d..2c5f7e7793a7 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -544,36 +544,21 @@ err: return ret; } -static int snapshot_id_add(struct snapshot_id_list *s, u32 id) +static int snapshot_id_add(snapshot_id_list *s, u32 id) { BUG_ON(snapshot_list_has_id(s, id)); - if (s->nr == s->size) { - size_t new_size = max(8U, s->size * 2); - void *n = krealloc(s->d, - new_size * sizeof(s->d[0]), - GFP_KERNEL); - if (!n) { - pr_err("error allocating snapshot ID list"); - return -ENOMEM; - } - - s->d = n; - s->size = new_size; - }; - - s->d[s->nr++] = id; - return 0; + return darray_push(s, id); } static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans, - struct snapshot_id_list *deleted, + snapshot_id_list *deleted, enum btree_id btree_id) { struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; - struct snapshot_id_list equiv_seen = { 0 }; + snapshot_id_list equiv_seen = { 0 }; struct bpos last_pos = POS_MIN; int ret = 0; @@ -620,7 +605,7 @@ static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans, } bch2_trans_iter_exit(trans, &iter); - kfree(equiv_seen.d); + darray_exit(&equiv_seen); return ret; } @@ -632,7 +617,7 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work) struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_snapshot snap; - struct snapshot_id_list deleted = { 0 }; + snapshot_id_list deleted = { 0 }; u32 i, id, children[2]; int ret = 0; @@ -712,15 +697,15 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work) for (i = 0; i < deleted.nr; i++) { ret = __bch2_trans_do(&trans, NULL, NULL, 0, - bch2_snapshot_node_delete(&trans, deleted.d[i])); + bch2_snapshot_node_delete(&trans, deleted.data[i])); if (ret) { bch_err(c, "error deleting snapshot %u: %i", - deleted.d[i], ret); + deleted.data[i], ret); goto err; } } err: - kfree(deleted.d); + darray_exit(&deleted); bch2_trans_exit(&trans); percpu_ref_put(&c->writes); } @@ -875,14 +860,14 @@ void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, snapshot_wait_for_pagecache_and_delete_work); - struct snapshot_id_list s; + snapshot_id_list s; u32 *id; int ret = 0; while (!ret) { mutex_lock(&c->snapshots_unlinked_lock); s = c->snapshots_unlinked; - memset(&c->snapshots_unlinked, 0, sizeof(c->snapshots_unlinked)); + darray_init(&c->snapshots_unlinked); mutex_unlock(&c->snapshots_unlinked_lock); if (!s.nr) @@ -890,7 +875,7 @@ void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) bch2_evict_subvolume_inodes(c, &s); - for (id = s.d; id < s.d + s.nr; id++) { + for (id = s.data; id < s.data + s.nr; id++) { ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL, bch2_subvolume_delete(&trans, *id)); if (ret) { @@ -899,7 +884,7 @@ void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) } } - kfree(s.d); + darray_exit(&s); } percpu_ref_put(&c->writes); diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index 4abe53df2788..b3d5ae49101d 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -2,6 +2,7 @@ #ifndef _BCACHEFS_SUBVOLUME_H #define _BCACHEFS_SUBVOLUME_H +#include "darray.h" #include "subvolume_types.h" void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -58,15 +59,13 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances struct snapshots_seen { struct bpos pos; - size_t nr; - size_t size; - u32 *d; + DARRAY(u32) ids; }; static inline void snapshots_seen_exit(struct snapshots_seen *s) { - kfree(s->d); - s->d = NULL; + kfree(s->ids.data); + s->ids.data = NULL; } static inline void snapshots_seen_init(struct snapshots_seen *s) @@ -76,30 +75,19 @@ static inline void snapshots_seen_init(struct snapshots_seen *s) static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id) { - if (s->nr == s->size) { - size_t new_size = max(s->size, (size_t) 128) * 2; - u32 *d = krealloc(s->d, new_size * sizeof(s->d[0]), GFP_KERNEL); - - if (!d) { - bch_err(c, "error reallocating snapshots_seen table (new size %zu)", - new_size); - return -ENOMEM; - } - - s->size = new_size; - s->d = d; - } - - s->d[s->nr++] = id; - return 0; + int ret = darray_push(&s->ids, id); + if (ret) + bch_err(c, "error reallocating snapshots_seen table (size %zu)", + s->ids.size); + return ret; } -static inline bool snapshot_list_has_id(struct snapshot_id_list *s, u32 id) +static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id) { - unsigned i; + u32 *i; - for (i = 0; i < s->nr; i++) - if (id == s->d[i]) + darray_for_each(*s, i) + if (*i == id) return true; return false; } diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h index 9410b9587591..f7562b5d51df 100644 --- a/fs/bcachefs/subvolume_types.h +++ b/fs/bcachefs/subvolume_types.h @@ -2,10 +2,8 @@ #ifndef _BCACHEFS_SUBVOLUME_TYPES_H #define _BCACHEFS_SUBVOLUME_TYPES_H -struct snapshot_id_list { - u32 nr; - u32 size; - u32 *d; -}; +#include "darray.h" + +typedef DARRAY(u32) snapshot_id_list; #endif /* _BCACHEFS_SUBVOLUME_TYPES_H */ -- cgit v1.2.3 From facc81479cab081cbcb962bfbe5d61f25230d013 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 5 May 2022 17:20:41 -0400 Subject: bcachefs: Delete bch_writepage Per Dave Chinner and the xfs folks, .writepage is no longer needed, and it's better not to define it if .writepages is the intended path. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 14 -------------- fs/bcachefs/fs-io.h | 1 - fs/bcachefs/fs.c | 1 - 3 files changed, 16 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 256b3dd0d4aa..ad51483ad764 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -1469,20 +1469,6 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc return ret; } -int bch2_writepage(struct page *page, struct writeback_control *wbc) -{ - struct bch_fs *c = page->mapping->host->i_sb->s_fs_info; - struct bch_writepage_state w = - bch_writepage_state_init(c, to_bch_ei(page->mapping->host)); - int ret; - - ret = __bch2_writepage(page_folio(page), wbc, &w); - if (w.io) - bch2_writepage_do_io(&w); - - return ret; -} - /* buffered writes: */ int bch2_write_begin(struct file *file, struct address_space *mapping, diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h index 64b16b44e25a..af905331542d 100644 --- a/fs/bcachefs/fs-io.h +++ b/fs/bcachefs/fs-io.h @@ -15,7 +15,6 @@ int __must_check bch2_write_inode_size(struct bch_fs *, struct bch_inode_info *, loff_t, unsigned); -int bch2_writepage(struct page *, struct writeback_control *); int bch2_read_folio(struct file *, struct folio *); int bch2_writepages(struct address_space *, struct writeback_control *); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index d8cd32b5d765..b2bc28d0cf05 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1113,7 +1113,6 @@ static const struct inode_operations bch_special_inode_operations = { }; static const struct address_space_operations bch_address_space_operations = { - .writepage = bch2_writepage, .read_folio = bch2_read_folio, .writepages = bch2_writepages, .readahead = bch2_readahead, -- cgit v1.2.3 From e68914ca849fa51167e2136ad9f6b43c22956d3c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 13 Jul 2022 05:25:29 -0400 Subject: bcachefs: Rename __bch2_trans_do() -> commit_do() Better/more descriptive naming, and prep for adding nested_lockrestart_do() and nested_commit_do(). Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 14 +++++++------- fs/bcachefs/btree_gc.c | 10 +++++----- fs/bcachefs/btree_update.h | 5 ++--- fs/bcachefs/btree_update_interior.c | 2 +- fs/bcachefs/buckets.c | 2 +- fs/bcachefs/fs.c | 6 +++--- fs/bcachefs/fsck.c | 20 ++++++++++---------- fs/bcachefs/lru.c | 2 +- fs/bcachefs/subvolume.c | 6 +++--- fs/bcachefs/tests.c | 22 +++++++++++----------- 10 files changed, 44 insertions(+), 45 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index a511ab9e4e7c..f515e679a90c 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -787,7 +787,7 @@ int bch2_check_alloc_info(struct bch_fs *c) bch2_trans_iter_init(&trans, &freespace_iter, BTREE_ID_freespace, POS_MIN, BTREE_ITER_PREFETCH); while (1) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, bch2_check_alloc_key(&trans, &iter, @@ -808,7 +808,7 @@ int bch2_check_alloc_info(struct bch_fs *c) bch2_trans_iter_init(&trans, &iter, BTREE_ID_need_discard, POS_MIN, BTREE_ITER_PREFETCH); while (1) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, bch2_check_discard_freespace_key(&trans, &iter)); @@ -825,7 +825,7 @@ int bch2_check_alloc_info(struct bch_fs *c) bch2_trans_iter_init(&trans, &iter, BTREE_ID_freespace, POS_MIN, BTREE_ITER_PREFETCH); while (1) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, bch2_check_discard_freespace_key(&trans, &iter)); @@ -930,7 +930,7 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, bch2_check_alloc_to_lru_ref(&trans, &iter)); @@ -1060,7 +1060,7 @@ static void bch2_do_discards_work(struct work_struct *work) continue; } - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_USE_RESERVE| BTREE_INSERT_NOFAIL, bch2_clear_need_discard(&trans, k.k->p, ca, &discard_done)); @@ -1198,7 +1198,7 @@ static void bch2_do_invalidates_work(struct work_struct *work) should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); while (nr_to_invalidate-- >= 0) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_USE_RESERVE| BTREE_INSERT_NOFAIL, invalidate_one_bucket(&trans, ca, &bucket, @@ -1254,7 +1254,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca) if (iter.pos.offset >= ca->mi.nbuckets) break; - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW, bucket_freespace_init(&trans, &iter)); if (ret) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 8be1c9f2664d..ebb1ad4b8abe 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -799,7 +799,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, atomic64_set(&c->key_version, k->k->version.lo); } - ret = __bch2_trans_do(trans, NULL, NULL, 0, + ret = commit_do(trans, NULL, NULL, 0, bch2_mark_key(trans, old, *k, flags)); fsck_err: err: @@ -1435,7 +1435,7 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only) if (bkey_cmp(iter.pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0) break; - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW, bch2_alloc_write_key(&trans, &iter, metadata_only)); @@ -1589,7 +1589,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) else *bkey_refcount(new) = cpu_to_le64(r->refcount); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, __bch2_btree_insert(&trans, BTREE_ID_reflink, new)); kfree(new); @@ -1702,7 +1702,7 @@ inconsistent: for (i = 0; i < new->v.nr_blocks; i++) stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, __bch2_btree_insert(&trans, BTREE_ID_reflink, &new->k_i)); kfree(new); } @@ -2009,7 +2009,7 @@ int bch2_gc_gens(struct bch_fs *c) for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, bch2_alloc_write_oldest_gen(&trans, &iter)); if (ret) { diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 28f958577006..e9127dbf7e24 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -118,7 +118,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans, _ret; \ }) -#define __bch2_trans_do(_trans, _disk_res, _journal_seq, _flags, _do) \ +#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \ lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_flags))) @@ -128,8 +128,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans, int _ret; \ \ bch2_trans_init(&trans, (_c), 0, 0); \ - _ret = __bch2_trans_do(&trans, _disk_res, _journal_seq, _flags, \ - _do); \ + _ret = commit_do(&trans, _disk_res, _journal_seq, _flags, _do); \ bch2_trans_exit(&trans); \ \ _ret; \ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index eeaea292bd80..ee95a79dc13e 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -599,7 +599,7 @@ static void btree_update_nodes_written(struct btree_update *as) * which may require allocations as well. */ bch2_trans_init(&trans, c, 0, 512); - ret = __bch2_trans_do(&trans, &as->disk_res, &journal_seq, + ret = commit_do(&trans, &as->disk_res, &journal_seq, BTREE_INSERT_NOFAIL| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_JOURNAL_RECLAIM| diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index eab01cc09337..99c9d5b14d48 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1853,7 +1853,7 @@ int bch2_trans_mark_metadata_bucket(struct btree_trans *trans, enum bch_data_type type, unsigned sectors) { - return __bch2_trans_do(trans, NULL, NULL, 0, + return commit_do(trans, NULL, NULL, 0, __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors)); } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b2bc28d0cf05..08268fe1074f 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -443,7 +443,7 @@ static int __bch2_link(struct bch_fs *c, mutex_lock(&inode->ei_update_lock); bch2_trans_init(&trans, c, 4, 1024); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_link_trans(&trans, inode_inum(dir), &dir_u, inode_inum(inode), &inode_u, @@ -492,7 +492,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); bch2_trans_init(&trans, c, 4, 1024); - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, bch2_unlink_trans(&trans, inode_inum(dir), &dir_u, @@ -614,7 +614,7 @@ static int bch2_rename2(struct mnt_idmap *idmap, goto err; } - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_rename_trans(&trans, inode_inum(src_dir), &src_dir_u, inode_inum(dst_dir), &dst_dir_u, diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index f1abec95a740..bdf0183d5d21 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -220,7 +220,7 @@ static int write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, u32 snapshot) { - int ret = __bch2_trans_do(trans, NULL, NULL, + int ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, __write_inode(trans, inode, snapshot)); @@ -434,7 +434,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, u32 inode_snapshot) { - int ret = __bch2_trans_do(trans, NULL, NULL, + int ret = commit_do(trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, __reattach_inode(trans, inode, inode_snapshot)); @@ -940,7 +940,7 @@ static int check_inodes(struct bch_fs *c, bool full) BTREE_ITER_ALL_SNAPSHOTS); do { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, check_inode(&trans, &iter, &prev, full)); @@ -1002,7 +1002,7 @@ static int check_subvols(struct bch_fs *c) BTREE_ITER_PREFETCH); do { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, check_subvol(&trans, &iter)); @@ -1306,7 +1306,7 @@ static int check_extents(struct bch_fs *c) BTREE_ITER_ALL_SNAPSHOTS); do { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, check_extent(&trans, &iter, &w, &s)); @@ -1687,7 +1687,7 @@ static int check_dirents(struct bch_fs *c) BTREE_ITER_ALL_SNAPSHOTS); do { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, check_dirent(&trans, &iter, &hash_info, @@ -1774,7 +1774,7 @@ static int check_xattrs(struct bch_fs *c) BTREE_ITER_ALL_SNAPSHOTS); do { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, check_xattr(&trans, &iter, &hash_info, @@ -1814,7 +1814,7 @@ static int check_root_trans(struct btree_trans *trans) root_subvol.v.flags = 0; root_subvol.v.snapshot = cpu_to_le32(snapshot); root_subvol.v.inode = cpu_to_le64(inum); - ret = __bch2_trans_do(trans, NULL, NULL, + ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, __bch2_btree_insert(trans, BTREE_ID_subvolumes, &root_subvol.k_i)); @@ -1977,7 +1977,7 @@ static int check_path(struct btree_trans *trans, if (!fsck_err(c, "directory structure loop")) return 0; - ret = __bch2_trans_do(trans, NULL, NULL, + ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, remove_backpointer(trans, inode)); @@ -2366,7 +2366,7 @@ static int fix_reflink_p(struct bch_fs *c) BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ret) { if (k.k->type == KEY_TYPE_reflink_p) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, fix_reflink_p_key(&trans, &iter)); diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 5a09b55006ff..94ecb3a39760 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -204,7 +204,7 @@ int bch2_check_lrus(struct bch_fs *c) for_each_btree_key(&trans, iter, BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { - ret = __bch2_trans_do(&trans, NULL, NULL, + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, bch2_check_lru_key(&trans, &iter)); diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 60b60de83f3e..d74dc9843028 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -605,7 +605,7 @@ static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans, bch2_btree_key_cache_flush(trans, btree_id, iter.pos)) continue; - ret = __bch2_trans_do(trans, NULL, NULL, + ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, bch2_btree_iter_traverse(&iter) ?: bch2_btree_delete_at(trans, &iter, @@ -664,7 +664,7 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work) if (ret) continue; - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_snapshot_node_set_deleted(&trans, iter.pos.offset)); if (ret) { bch_err(c, "error deleting snapshot %llu: %i", iter.pos.offset, ret); @@ -713,7 +713,7 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work) } for (i = 0; i < deleted.nr; i++) { - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_snapshot_node_delete(&trans, deleted.data[i])); if (ret) { bch_err(c, "error deleting snapshot %u: %i", diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index fa3712a1478c..bfcb133ff483 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -42,7 +42,7 @@ static int test_delete(struct bch_fs *c, u64 nr) bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p, BTREE_ITER_INTENT); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, &k.k_i, 0)); if (ret) { @@ -51,7 +51,7 @@ static int test_delete(struct bch_fs *c, u64 nr) } pr_info("deleting once"); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { @@ -60,7 +60,7 @@ static int test_delete(struct bch_fs *c, u64 nr) } pr_info("deleting twice"); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { @@ -88,7 +88,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p, BTREE_ITER_INTENT); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, &k.k_i, 0)); if (ret) { @@ -99,7 +99,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_trans_unlock(&trans); bch2_journal_flush_all_pins(&c->journal); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { @@ -552,7 +552,7 @@ static int rand_insert(struct bch_fs *c, u64 nr) k.k.p.offset = test_rand(); k.k.p.snapshot = U32_MAX; - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i)); if (ret) { bch_err(c, "error in rand_insert: %i", ret); @@ -581,7 +581,7 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr) k[j].k.p.snapshot = U32_MAX; } - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[0].k_i) ?: __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[1].k_i) ?: __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[2].k_i) ?: @@ -668,7 +668,7 @@ static int rand_mixed(struct bch_fs *c, u64 nr) for (i = 0; i < nr; i++) { rand = test_rand(); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, rand_mixed_trans(&trans, &iter, &cookie, i, rand)); if (ret) { bch_err(c, "update error in rand_mixed: %i", ret); @@ -714,7 +714,7 @@ static int rand_delete(struct bch_fs *c, u64 nr) for (i = 0; i < nr; i++) { struct bpos pos = SPOS(0, test_rand(), U32_MAX); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, __do_delete(&trans, pos)); if (ret) { bch_err(c, "error in rand_delete: %i", ret); @@ -743,7 +743,7 @@ static int seq_insert(struct bch_fs *c, u64 nr) BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { insert.k.p = iter.pos; - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, &insert.k_i, 0)); if (ret) { @@ -794,7 +794,7 @@ static int seq_overwrite(struct bch_fs *c, u64 nr) bkey_reassemble(&u.k_i, k); - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = commit_do(&trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, &u.k_i, 0)); if (ret) { -- cgit v1.2.3 From d4bf5eecd78a90d019b933929a14c91d6d41af62 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 18 Jul 2022 19:42:58 -0400 Subject: bcachefs: Use bch2_err_str() in error messages Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 4 +-- fs/bcachefs/btree_gc.c | 35 ++++++++++++------------ fs/bcachefs/checksum.c | 31 ++++++++++++--------- fs/bcachefs/ec.c | 3 ++- fs/bcachefs/fs.c | 8 +++--- fs/bcachefs/fsck.c | 61 ++++++++++++++++++++++-------------------- fs/bcachefs/journal_reclaim.c | 9 ++++--- fs/bcachefs/migrate.c | 4 ++- fs/bcachefs/move.c | 3 ++- fs/bcachefs/movinggc.c | 11 +++++--- fs/bcachefs/quota.c | 3 ++- fs/bcachefs/rebalance.c | 9 ++++--- fs/bcachefs/recovery.c | 5 ++-- fs/bcachefs/subvolume.c | 23 ++++++++-------- fs/bcachefs/super.c | 21 ++++++++------- fs/bcachefs/tests.c | 42 ++++++++++++++--------------- 16 files changed, 149 insertions(+), 123 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 9ba1fdba4138..eb44a8bc04fe 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -464,7 +464,7 @@ int bch2_alloc_read(struct bch_fs *c) bch2_trans_exit(&trans); if (ret) - bch_err(c, "error reading alloc info: %i", ret); + bch_err(c, "error reading alloc info: %s", bch2_err_str(ret)); return ret; } @@ -1211,7 +1211,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca) bch2_trans_exit(&trans); if (ret < 0) { - bch_err(ca, "error initializing free space: %i", ret); + bch_err(ca, "error initializing free space: %s", bch2_err_str(ret)); return ret; } diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 7a7639e9ee3f..e7098e910a73 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -402,8 +402,8 @@ again: } if (ret) { - bch_err(c, "%s: error %i getting btree node", - __func__, ret); + bch_err(c, "%s: error getting btree node: %s", + __func__, bch2_err_str(ret)); break; } @@ -471,8 +471,8 @@ again: ret = PTR_ERR_OR_ZERO(cur); if (ret) { - bch_err(c, "%s: error %i getting btree node", - __func__, ret); + bch_err(c, "%s: error getting btree node: %s", + __func__, bch2_err_str(ret)); goto err; } @@ -804,7 +804,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, fsck_err: err: if (ret) - bch_err(c, "%s: ret %i", __func__, ret); + bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret)); return ret; } @@ -910,7 +910,8 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false, &k, true); if (ret) { - bch_err(c, "%s: error %i from bch2_gc_mark_key", __func__, ret); + bch_err(c, "%s: error from bch2_gc_mark_key: %s", + __func__, bch2_err_str(ret)); goto fsck_err; } @@ -970,8 +971,8 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b continue; } } else if (ret) { - bch_err(c, "%s: error %i getting btree node", - __func__, ret); + bch_err(c, "%s: error getting btree node: %s", + __func__, bch2_err_str(ret)); break; } @@ -1038,7 +1039,7 @@ fsck_err: six_unlock_read(&b->c.lock); if (ret < 0) - bch_err(c, "%s: ret %i", __func__, ret); + bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret)); printbuf_exit(&buf); return ret; } @@ -1068,7 +1069,7 @@ static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only) : bch2_gc_btree(&trans, ids[i], initial, metadata_only); if (ret < 0) - bch_err(c, "%s: ret %i", __func__, ret); + bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret)); bch2_trans_exit(&trans); return ret; @@ -1266,7 +1267,7 @@ fsck_err: if (ca) percpu_ref_put(&ca->ref); if (ret) - bch_err(c, "%s: ret %i", __func__, ret); + bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret)); percpu_up_write(&c->mark_lock); printbuf_exit(&buf); @@ -1433,7 +1434,7 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only) bch2_alloc_write_key(&trans, &iter, k, metadata_only)); if (ret < 0) { - bch_err(c, "error writing alloc info: %i", ret); + bch_err(c, "error writing alloc info: %s", bch2_err_str(ret)); percpu_ref_put(&ca->ref); break; } @@ -1497,7 +1498,7 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) bch2_trans_exit(&trans); if (ret) - bch_err(c, "error reading alloc info at gc start: %i", ret); + bch_err(c, "error reading alloc info at gc start: %s", bch2_err_str(ret)); return ret; } @@ -1968,7 +1969,7 @@ int bch2_gc_gens(struct bch_fs *c) BTREE_INSERT_NOFAIL, gc_btree_gens_key(&trans, &iter, k)); if (ret) { - bch_err(c, "error recalculating oldest_gen: %i", ret); + bch_err(c, "error recalculating oldest_gen: %s", bch2_err_str(ret)); goto err; } } @@ -1981,7 +1982,7 @@ int bch2_gc_gens(struct bch_fs *c) BTREE_INSERT_NOFAIL, bch2_alloc_write_oldest_gen(&trans, &iter, k)); if (ret) { - bch_err(c, "error writing oldest_gen: %i", ret); + bch_err(c, "error writing oldest_gen: %s", bch2_err_str(ret)); goto err; } @@ -2053,7 +2054,7 @@ static int bch2_gc_thread(void *arg) ret = bch2_gc_gens(c); #endif if (ret < 0) - bch_err(c, "btree gc failed: %i", ret); + bch_err(c, "btree gc failed: %s", bch2_err_str(ret)); debug_check_no_locks_held(); } @@ -2083,7 +2084,7 @@ int bch2_gc_thread_start(struct bch_fs *c) p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name); if (IS_ERR(p)) { - bch_err(c, "error creating gc thread: %li", PTR_ERR(p)); + bch_err(c, "error creating gc thread: %s", bch2_err_str(PTR_ERR(p))); return PTR_ERR(p); } diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c index 7c2af6754aea..b5850a761b91 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "checksum.h" +#include "errcode.h" #include "super.h" #include "super-io.h" @@ -527,7 +528,7 @@ int bch2_decrypt_sb_key(struct bch_fs *c, ret = bch2_request_key(c->disk_sb.sb, &user_key); if (ret) { - bch_err(c, "error requesting encryption key: %i", ret); + bch_err(c, "error requesting encryption key: %s", bch2_err_str(ret)); goto err; } @@ -552,20 +553,24 @@ err: static int bch2_alloc_ciphers(struct bch_fs *c) { + int ret; + if (!c->chacha20) c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0); - if (IS_ERR(c->chacha20)) { - bch_err(c, "error requesting chacha20 module: %li", - PTR_ERR(c->chacha20)); - return PTR_ERR(c->chacha20); + ret = PTR_ERR_OR_ZERO(c->chacha20); + + if (ret) { + bch_err(c, "error requesting chacha20 module: %s", bch2_err_str(ret)); + return ret; } if (!c->poly1305) c->poly1305 = crypto_alloc_shash("poly1305", 0, 0); - if (IS_ERR(c->poly1305)) { - bch_err(c, "error requesting poly1305 module: %li", - PTR_ERR(c->poly1305)); - return PTR_ERR(c->poly1305); + ret = PTR_ERR_OR_ZERO(c->poly1305); + + if (ret) { + bch_err(c, "error requesting poly1305 module: %s", bch2_err_str(ret)); + return ret; } return 0; @@ -626,7 +631,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) if (keyed) { ret = bch2_request_key(c->disk_sb.sb, &user_key); if (ret) { - bch_err(c, "error requesting encryption key: %i", ret); + bch_err(c, "error requesting encryption key: %s", bch2_err_str(ret)); goto err; } @@ -678,9 +683,9 @@ int bch2_fs_encryption_init(struct bch_fs *c) pr_verbose_init(c->opts, ""); c->sha256 = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(c->sha256)) { - bch_err(c, "error requesting sha256 module"); - ret = PTR_ERR(c->sha256); + ret = PTR_ERR_OR_ZERO(c->sha256); + if (ret) { + bch_err(c, "error requesting sha256 module: %s", bch2_err_str(ret)); goto out; } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 80e1689765e6..947f2f2b1c09 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -949,7 +949,8 @@ static void ec_stripe_create(struct ec_stripe_new *s) for_each_keylist_key(&s->keys, k) { ret = ec_stripe_update_extents(c, &s->new_stripe, &k->k); if (ret) { - bch_err(c, "error creating stripe: error %i updating pointers", ret); + bch_err(c, "error creating stripe: error updating pointers: %s", + bch2_err_str(ret)); break; } } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 08268fe1074f..876552a2a83b 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -8,6 +8,7 @@ #include "buckets.h" #include "chardev.h" #include "dirent.h" +#include "errcode.h" #include "extents.h" #include "fs.h" #include "fs-common.h" @@ -1871,10 +1872,9 @@ got_sb: sb->s_shrink.seeks = 0; vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); - if (IS_ERR(vinode)) { - bch_err(c, "error mounting: error getting root inode %i", - (int) PTR_ERR(vinode)); - ret = PTR_ERR(vinode); + ret = PTR_ERR_OR_ZERO(vinode); + if (ret) { + bch_err(c, "error mounting: error getting root inode: %s", bch2_err_str(ret)); goto err_put_super; } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index e601a1ee0ee1..021affcc82d4 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -137,8 +137,8 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, ret = bch2_inode_unpack(k, inode); err: if (ret && ret != -EINTR) - bch_err(trans->c, "error %i fetching inode %llu", - ret, inode_nr); + bch_err(trans->c, "error fetching inode %llu: %s", + inode_nr, bch2_err_str(ret)); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -165,8 +165,8 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, *snapshot = iter.pos.snapshot; err: if (ret && ret != -EINTR) - bch_err(trans->c, "error %i fetching inode %llu:%u", - ret, inode_nr, *snapshot); + bch_err(trans->c, "error fetching inode %llu:%u: %s", + inode_nr, *snapshot, bch2_err_str(ret)); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -225,7 +225,8 @@ static int write_inode(struct btree_trans *trans, BTREE_INSERT_LAZY_RW, __write_inode(trans, inode, snapshot)); if (ret) - bch_err(trans->c, "error in fsck: error %i updating inode", ret); + bch_err(trans->c, "error in fsck: error updating inode: %s", + bch2_err_str(ret)); return ret; } @@ -314,7 +315,7 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos) bch2_trans_iter_exit(trans, &iter); err: if (ret && ret != -EINTR) - bch_err(c, "error %i from __remove_dirent()", ret); + bch_err(c, "error from __remove_dirent(): %s", bch2_err_str(ret)); return ret; } @@ -350,7 +351,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 subvol, } if (ret && ret != -EINTR) - bch_err(c, "error looking up lost+found: %i", ret); + bch_err(c, "error looking up lost+found: %s", bch2_err_str(ret)); if (ret) return ret; @@ -373,7 +374,7 @@ create_lostfound: 0, 0, S_IFDIR|0700, 0, NULL, NULL, (subvol_inum) { }, 0); if (ret && ret != -EINTR) - bch_err(c, "error creating lost+found: %i", ret); + bch_err(c, "error creating lost+found: %s", bch2_err_str(ret)); return ret; } @@ -437,8 +438,8 @@ static int reattach_inode(struct btree_trans *trans, BTREE_INSERT_NOFAIL, __reattach_inode(trans, inode, inode_snapshot)); if (ret) { - bch_err(trans->c, "error %i reattaching inode %llu", - ret, inode->bi_inum); + bch_err(trans->c, "error reattaching inode %llu: %s", + inode->bi_inum, bch2_err_str(ret)); return ret; } @@ -910,7 +911,8 @@ static int check_inode(struct btree_trans *trans, ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot); if (ret) - bch_err(c, "error in fsck: error %i while deleting inode", ret); + bch_err(c, "error in fsck: error while deleting inode: %s", + bch2_err_str(ret)); return ret; } @@ -933,7 +935,8 @@ static int check_inode(struct btree_trans *trans, POS(u.bi_inum, U64_MAX), 0, NULL); if (ret) { - bch_err(c, "error in fsck: error %i truncating inode", ret); + bch_err(c, "error in fsck: error truncating inode: %s", + bch2_err_str(ret)); return ret; } @@ -958,8 +961,8 @@ static int check_inode(struct btree_trans *trans, sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot); if (sectors < 0) { - bch_err(c, "error in fsck: error %i recounting inode sectors", - (int) sectors); + bch_err(c, "error in fsck: error recounting inode sectors: %s", + bch2_err_str(sectors)); return sectors; } @@ -978,13 +981,13 @@ static int check_inode(struct btree_trans *trans, if (do_update) { ret = __write_inode(trans, &u, iter->pos.snapshot); if (ret) - bch_err(c, "error in fsck: error %i " - "updating inode", ret); + bch_err(c, "error in fsck: error updating inode: %s", + bch2_err_str(ret)); } err: fsck_err: if (ret) - bch_err(c, "error %i from check_inode()", ret); + bch_err(c, "error from check_inode(): %s", bch2_err_str(ret)); return ret; } @@ -1010,7 +1013,7 @@ static int check_inodes(struct bch_fs *c, bool full) bch2_trans_exit(&trans); snapshots_seen_exit(&s); if (ret) - bch_err(c, "error %i from check_inodes()", ret); + bch_err(c, "error from check_inodes(): %s", bch2_err_str(ret)); return ret; } @@ -1145,7 +1148,7 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) } fsck_err: if (ret) - bch_err(c, "error %i from check_i_sectors()", ret); + bch_err(c, "error from check_i_sectors(): %s", bch2_err_str(ret)); return ret ?: ret2; } @@ -1327,7 +1330,7 @@ static int check_extents(struct bch_fs *c) snapshots_seen_exit(&s); if (ret) - bch_err(c, "error %i from check_extents()", ret); + bch_err(c, "error from check_extents(): %s", bch2_err_str(ret)); return ret; } @@ -1366,7 +1369,7 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) } fsck_err: if (ret) - bch_err(c, "error %i from check_subdir_count()", ret); + bch_err(c, "error from check_subdir_count(): %s", bch2_err_str(ret)); return ret ?: ret2; } @@ -1485,7 +1488,7 @@ fsck_err: printbuf_exit(&buf); if (ret && ret != -EINTR) - bch_err(c, "error %i from check_target()", ret); + bch_err(c, "error from check_target(): %s", bch2_err_str(ret)); return ret; } @@ -1658,7 +1661,7 @@ fsck_err: printbuf_exit(&buf); if (ret && ret != -EINTR) - bch_err(c, "error %i from check_dirent()", ret); + bch_err(c, "error from check_dirent(): %s", bch2_err_str(ret)); return ret; } @@ -1697,7 +1700,7 @@ static int check_dirents(struct bch_fs *c) inode_walker_exit(&target); if (ret) - bch_err(c, "error %i from check_dirents()", ret); + bch_err(c, "error from check_dirents(): %s", bch2_err_str(ret)); return ret; } @@ -1733,7 +1736,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); fsck_err: if (ret && ret != -EINTR) - bch_err(c, "error %i from check_xattr()", ret); + bch_err(c, "error from check_xattr(): %s", bch2_err_str(ret)); return ret; } @@ -1765,7 +1768,7 @@ static int check_xattrs(struct bch_fs *c) bch2_trans_exit(&trans); if (ret) - bch_err(c, "error %i from check_xattrs()", ret); + bch_err(c, "error from check_xattrs(): %s", bch2_err_str(ret)); return ret; } @@ -1797,7 +1800,7 @@ static int check_root_trans(struct btree_trans *trans) BTREE_INSERT_LAZY_RW, __bch2_btree_insert(trans, BTREE_ID_subvolumes, &root_subvol.k_i)); if (ret) { - bch_err(c, "error writing root subvol: %i", ret); + bch_err(c, "error writing root subvol: %s", bch2_err_str(ret)); goto err; } @@ -1816,7 +1819,7 @@ static int check_root_trans(struct btree_trans *trans) ret = __write_inode(trans, &root_inode, snapshot); if (ret) - bch_err(c, "error writing root inode: %i", ret); + bch_err(c, "error writing root inode: %s", bch2_err_str(ret)); } err: fsck_err: @@ -1969,7 +1972,7 @@ static int check_path(struct btree_trans *trans, } fsck_err: if (ret) - bch_err(c, "%s: err %i", __func__, ret); + bch_err(c, "%s: err %s", __func__, bch2_err_str(ret)); return ret; } diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 873cc14e2ae9..00d9e3a8e526 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "btree_key_cache.h" +#include "errcode.h" #include "error.h" #include "journal.h" #include "journal_io.h" @@ -741,15 +742,17 @@ int bch2_journal_reclaim_start(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct task_struct *p; + int ret; if (j->reclaim_thread) return 0; p = kthread_create(bch2_journal_reclaim_thread, j, "bch-reclaim/%s", c->name); - if (IS_ERR(p)) { - bch_err(c, "error creating journal reclaim thread: %li", PTR_ERR(p)); - return PTR_ERR(p); + ret = PTR_ERR_OR_ZERO(p); + if (ret) { + bch_err(c, "error creating journal reclaim thread: %s", bch2_err_str(ret)); + return ret; } get_task_struct(p); diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index be89628702f7..baeca0e2a302 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -8,6 +8,7 @@ #include "btree_update.h" #include "btree_update_interior.h" #include "buckets.h" +#include "errcode.h" #include "extents.h" #include "io.h" #include "journal.h" @@ -151,7 +152,8 @@ retry: } if (ret) { - bch_err(c, "Error updating btree node key: %i", ret); + bch_err(c, "Error updating btree node key: %s", + bch2_err_str(ret)); break; } next: diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 8b44d95c32ce..7fba0f70c409 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -8,6 +8,7 @@ #include "btree_update_interior.h" #include "disk_groups.h" #include "ec.h" +#include "errcode.h" #include "inode.h" #include "io.h" #include "journal_reclaim.h" @@ -564,7 +565,7 @@ next: bch2_trans_exit(&trans); if (ret) - bch_err(c, "error %i in bch2_move_btree", ret); + bch_err(c, "error in %s(): %s", __func__, bch2_err_str(ret)); bch2_btree_interior_updates_flush(c); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 49fb405c1430..438ea22ad5bd 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -13,6 +13,7 @@ #include "buckets.h" #include "clock.h" #include "disk_groups.h" +#include "errcode.h" #include "error.h" #include "extents.h" #include "eytzinger.h" @@ -319,7 +320,7 @@ static int bch2_copygc(struct bch_fs *c) false, copygc_pred, NULL); if (ret < 0) - bch_err(c, "error %i from bch2_move_data() in copygc", ret); + bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret)); if (ret) return ret; @@ -427,6 +428,7 @@ void bch2_copygc_stop(struct bch_fs *c) int bch2_copygc_start(struct bch_fs *c) { struct task_struct *t; + int ret; if (c->copygc_thread) return 0; @@ -438,9 +440,10 @@ int bch2_copygc_start(struct bch_fs *c) return -ENOMEM; t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name); - if (IS_ERR(t)) { - bch_err(c, "error creating copygc thread: %li", PTR_ERR(t)); - return PTR_ERR(t); + ret = PTR_ERR_OR_ZERO(t); + if (ret) { + bch_err(c, "error creating copygc thread: %s", bch2_err_str(ret)); + return ret; } get_task_struct(t); diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 42c831da70be..454c76e03be9 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "btree_update.h" +#include "errcode.h" #include "inode.h" #include "quota.h" #include "subvolume.h" @@ -488,7 +489,7 @@ int bch2_fs_quota_read(struct bch_fs *c) POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, bch2_fs_quota_read_inode(&trans, &iter, k)); if (ret) - bch_err(c, "err in quota_read: %i", ret); + bch_err(c, "err in quota_read: %s", bch2_err_str(ret)); bch2_trans_exit(&trans); return ret; diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 1de8183ea295..6b9ccc1b3fe3 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -6,6 +6,7 @@ #include "buckets.h" #include "clock.h" #include "disk_groups.h" +#include "errcode.h" #include "extents.h" #include "io.h" #include "move.h" @@ -332,6 +333,7 @@ void bch2_rebalance_stop(struct bch_fs *c) int bch2_rebalance_start(struct bch_fs *c) { struct task_struct *p; + int ret; if (c->rebalance.thread) return 0; @@ -340,9 +342,10 @@ int bch2_rebalance_start(struct bch_fs *c) return 0; p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); - if (IS_ERR(p)) { - bch_err(c, "error creating rebalance thread: %li", PTR_ERR(p)); - return PTR_ERR(p); + ret = PTR_ERR_OR_ZERO(p); + if (ret) { + bch_err(c, "error creating rebalance thread: %s", bch2_err_str(ret)); + return ret; } get_task_struct(p); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 7fb470e2e7f3..bb04b6f053cc 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -10,6 +10,7 @@ #include "buckets.h" #include "dirent.h" #include "ec.h" +#include "errcode.h" #include "error.h" #include "fs-common.h" #include "fsck.h" @@ -1419,9 +1420,9 @@ out: } if (ret) - bch_err(c, "Error in recovery: %s (%i)", err, ret); + bch_err(c, "Error in recovery: %s (%s)", err, bch2_err_str(ret)); else - bch_verbose(c, "ret %i", ret); + bch_verbose(c, "ret %s", bch2_err_str(ret)); return ret; err: fsck_err: diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 76be8735c700..0469b90064eb 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -3,6 +3,7 @@ #include "bcachefs.h" #include "btree_key_cache.h" #include "btree_update.h" +#include "errcode.h" #include "error.h" #include "fs.h" #include "subvolume.h" @@ -315,8 +316,8 @@ static int check_subvol(struct btree_trans *trans, if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { ret = bch2_subvolume_delete(trans, iter->pos.offset); if (ret && ret != -EINTR) - bch_err(trans->c, "error deleting subvolume %llu: %i", - iter->pos.offset, ret); + bch_err(trans->c, "error deleting subvolume %llu: %s", + iter->pos.offset, bch2_err_str(ret)); if (ret) return ret; } @@ -365,7 +366,7 @@ int bch2_fs_snapshots_start(struct bch_fs *c) bch2_trans_exit(&trans); if (ret) - bch_err(c, "error starting snapshots: %i", ret); + bch_err(c, "error starting snapshots: %s", bch2_err_str(ret)); return ret; } @@ -647,7 +648,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) if (!test_bit(BCH_FS_STARTED, &c->flags)) { ret = bch2_fs_read_write_early(c); if (ret) { - bch_err(c, "error deleleting dead snapshots: error going rw: %i", ret); + bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret)); return ret; } } @@ -663,7 +664,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) NULL, NULL, 0, bch2_delete_redundant_snapshot(&trans, &iter, k)); if (ret) { - bch_err(c, "error deleting redundant snapshots: %i", ret); + bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret)); goto err; } @@ -671,7 +672,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) POS_MIN, 0, k, bch2_snapshot_set_equiv(&trans, k)); if (ret) { - bch_err(c, "error in bch2_snapshots_set_equiv: %i", ret); + bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret)); goto err; } @@ -690,7 +691,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) bch2_trans_iter_exit(&trans, &iter); if (ret) { - bch_err(c, "error walking snapshots: %i", ret); + bch_err(c, "error walking snapshots: %s", bch2_err_str(ret)); goto err; } @@ -710,7 +711,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) darray_exit(&equiv_seen); if (ret) { - bch_err(c, "error deleting snapshot keys: %i", ret); + bch_err(c, "error deleting snapshot keys: %s", bch2_err_str(ret)); goto err; } } @@ -719,8 +720,8 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) ret = commit_do(&trans, NULL, NULL, 0, bch2_snapshot_node_delete(&trans, deleted.data[i])); if (ret) { - bch_err(c, "error deleting snapshot %u: %i", - deleted.data[i], ret); + bch_err(c, "error deleting snapshot %u: %s", + deleted.data[i], bch2_err_str(ret)); goto err; } } @@ -912,7 +913,7 @@ void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work) ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL, bch2_subvolume_delete(&trans, *id)); if (ret) { - bch_err(c, "error %i deleting subvolume %u", ret, *id); + bch_err(c, "error deleting subvolume %u: %s", *id, bch2_err_str(ret)); break; } } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index b926fb1b14a9..87742962d6c2 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -25,6 +25,7 @@ #include "debug.h" #include "disk_groups.h" #include "ec.h" +#include "errcode.h" #include "error.h" #include "fs.h" #include "fs-io.h" @@ -1430,7 +1431,7 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) bch2_btree_delete_range(c, BTREE_ID_alloc, start, end, BTREE_TRIGGER_NORUN, NULL); if (ret) - bch_err(c, "error %i removing dev alloc info", ret); + bch_err(c, "error removing dev alloc info: %s", bch2_err_str(ret)); return ret; } @@ -1458,7 +1459,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) ret = bch2_dev_data_drop(c, ca->dev_idx, flags); if (ret) { - bch_err(ca, "Remove failed: error %i dropping data", ret); + bch_err(ca, "Remove failed: error dropping data: %s", bch2_err_str(ret)); goto err; } @@ -1470,7 +1471,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx); if (ret) { - bch_err(ca, "Remove failed: error %i flushing journal", ret); + bch_err(ca, "Remove failed: error flushing journal: %s", bch2_err_str(ret)); goto err; } @@ -1482,7 +1483,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) ret = bch2_replicas_gc2(c); if (ret) { - bch_err(ca, "Remove failed: error %i from replicas gc", ret); + bch_err(ca, "Remove failed: error from replicas gc: %s", bch2_err_str(ret)); goto err; } @@ -1546,7 +1547,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) ret = bch2_read_super(path, &opts, &sb); if (ret) { - bch_err(c, "device add error: error reading super: %i", ret); + bch_err(c, "device add error: error reading super: %s", bch2_err_str(ret)); goto err; } @@ -1639,13 +1640,13 @@ have_slot: ret = bch2_trans_mark_dev_sb(c, ca); if (ret) { - bch_err(c, "device add error: error marking new superblock: %i", ret); + bch_err(c, "device add error: error marking new superblock: %s", bch2_err_str(ret)); goto err_late; } ret = bch2_fs_freespace_init(c); if (ret) { - bch_err(c, "device add error: error initializing free space: %i", ret); + bch_err(c, "device add error: error initializing free space: %s", bch2_err_str(ret)); goto err_late; } @@ -1707,8 +1708,8 @@ int bch2_dev_online(struct bch_fs *c, const char *path) ret = bch2_trans_mark_dev_sb(c, ca); if (ret) { - bch_err(c, "error bringing %s online: error %i from bch2_trans_mark_dev_sb", - path, ret); + bch_err(c, "error bringing %s online: error from bch2_trans_mark_dev_sb: %s", + path, bch2_err_str(ret)); goto err; } @@ -1777,7 +1778,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ret = bch2_dev_buckets_resize(c, ca, nbuckets); if (ret) { - bch_err(ca, "Resize error: %i", ret); + bch_err(ca, "Resize error: %s", bch2_err_str(ret)); goto err; } diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index bfcb133ff483..bf0a33c0233d 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -46,7 +46,7 @@ static int test_delete(struct bch_fs *c, u64 nr) bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, &k.k_i, 0)); if (ret) { - bch_err(c, "update error in test_delete: %i", ret); + bch_err(c, "update error in test_delete: %s", bch2_err_str(ret)); goto err; } @@ -55,7 +55,7 @@ static int test_delete(struct bch_fs *c, u64 nr) bch2_btree_iter_traverse(&iter) ?: bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { - bch_err(c, "delete error (first) in test_delete: %i", ret); + bch_err(c, "delete error (first) in test_delete: %s", bch2_err_str(ret)); goto err; } @@ -64,7 +64,7 @@ static int test_delete(struct bch_fs *c, u64 nr) bch2_btree_iter_traverse(&iter) ?: bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { - bch_err(c, "delete error (second) in test_delete: %i", ret); + bch_err(c, "delete error (second) in test_delete: %s", bch2_err_str(ret)); goto err; } err: @@ -92,7 +92,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, &k.k_i, 0)); if (ret) { - bch_err(c, "update error in test_delete_written: %i", ret); + bch_err(c, "update error in test_delete_written: %s", bch2_err_str(ret)); goto err; } @@ -103,7 +103,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_btree_iter_traverse(&iter) ?: bch2_btree_delete_at(&trans, &iter, 0)); if (ret) { - bch_err(c, "delete error in test_delete_written: %i", ret); + bch_err(c, "delete error in test_delete_written: %s", bch2_err_str(ret)); goto err; } err: @@ -136,7 +136,7 @@ static int test_iterate(struct bch_fs *c, u64 nr) ret = bch2_btree_insert(c, BTREE_ID_xattrs, &k.k_i, NULL, NULL, 0); if (ret) { - bch_err(c, "insert error in test_iterate: %i", ret); + bch_err(c, "insert error in test_iterate: %s", bch2_err_str(ret)); goto err; } } @@ -192,7 +192,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, NULL, 0); if (ret) { - bch_err(c, "insert error in test_iterate_extents: %i", ret); + bch_err(c, "insert error in test_iterate_extents: %s", bch2_err_str(ret)); goto err; } } @@ -247,7 +247,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) ret = bch2_btree_insert(c, BTREE_ID_xattrs, &k.k_i, NULL, NULL, 0); if (ret) { - bch_err(c, "insert error in test_iterate_slots: %i", ret); + bch_err(c, "insert error in test_iterate_slots: %s", bch2_err_str(ret)); goto err; } } @@ -313,7 +313,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, NULL, 0); if (ret) { - bch_err(c, "insert error in test_iterate_slots_extents: %i", ret); + bch_err(c, "insert error in test_iterate_slots_extents: %s", bch2_err_str(ret)); goto err; } } @@ -419,7 +419,7 @@ static int insert_test_extent(struct bch_fs *c, ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, NULL, 0); if (ret) - bch_err(c, "insert error in insert_test_extent: %i", ret); + bch_err(c, "insert error in insert_test_extent: %s", bch2_err_str(ret)); return ret; } @@ -518,7 +518,7 @@ static int test_snapshots(struct bch_fs *c, u64 nr) ret = test_snapshot_filter(c, snapids[0], snapids[1]); if (ret) { - bch_err(c, "err %i from test_snapshot_filter", ret); + bch_err(c, "err from test_snapshot_filter: %s", bch2_err_str(ret)); return ret; } @@ -555,7 +555,7 @@ static int rand_insert(struct bch_fs *c, u64 nr) ret = commit_do(&trans, NULL, NULL, 0, __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i)); if (ret) { - bch_err(c, "error in rand_insert: %i", ret); + bch_err(c, "error in rand_insert: %s", bch2_err_str(ret)); break; } } @@ -591,7 +591,7 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr) __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[6].k_i) ?: __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[7].k_i)); if (ret) { - bch_err(c, "error in rand_insert_multi: %i", ret); + bch_err(c, "error in rand_insert_multi: %s", bch2_err_str(ret)); break; } } @@ -618,7 +618,7 @@ static int rand_lookup(struct bch_fs *c, u64 nr) k = bch2_btree_iter_peek(&iter); ret = bkey_err(k); if (ret) { - bch_err(c, "error in rand_lookup: %i", ret); + bch_err(c, "error in rand_lookup: %s", bch2_err_str(ret)); break; } } @@ -641,7 +641,7 @@ static int rand_mixed_trans(struct btree_trans *trans, k = bch2_btree_iter_peek(iter); ret = bkey_err(k); if (ret && ret != -EINTR) - bch_err(trans->c, "lookup error in rand_mixed: %i", ret); + bch_err(trans->c, "lookup error in rand_mixed: %s", bch2_err_str(ret)); if (ret) return ret; @@ -671,7 +671,7 @@ static int rand_mixed(struct bch_fs *c, u64 nr) ret = commit_do(&trans, NULL, NULL, 0, rand_mixed_trans(&trans, &iter, &cookie, i, rand)); if (ret) { - bch_err(c, "update error in rand_mixed: %i", ret); + bch_err(c, "update error in rand_mixed: %s", bch2_err_str(ret)); break; } } @@ -717,7 +717,7 @@ static int rand_delete(struct bch_fs *c, u64 nr) ret = commit_do(&trans, NULL, NULL, 0, __do_delete(&trans, pos)); if (ret) { - bch_err(c, "error in rand_delete: %i", ret); + bch_err(c, "error in rand_delete: %s", bch2_err_str(ret)); break; } } @@ -747,7 +747,7 @@ static int seq_insert(struct bch_fs *c, u64 nr) bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, &insert.k_i, 0)); if (ret) { - bch_err(c, "error in seq_insert: %i", ret); + bch_err(c, "error in seq_insert: %s", bch2_err_str(ret)); break; } @@ -798,7 +798,7 @@ static int seq_overwrite(struct bch_fs *c, u64 nr) bch2_btree_iter_traverse(&iter) ?: bch2_trans_update(&trans, &iter, &u.k_i, 0)); if (ret) { - bch_err(c, "error in seq_overwrite: %i", ret); + bch_err(c, "error in seq_overwrite: %s", bch2_err_str(ret)); break; } } @@ -816,7 +816,7 @@ static int seq_delete(struct bch_fs *c, u64 nr) SPOS(0, 0, U32_MAX), SPOS_MAX, 0, NULL); if (ret) - bch_err(c, "error in seq_delete: %i", ret); + bch_err(c, "error in seq_delete: %s", bch2_err_str(ret)); return ret; } @@ -853,7 +853,7 @@ static int btree_perf_test_thread(void *data) ret = j->fn(j->c, div64_u64(j->nr, j->nr_threads)); if (ret) { - bch_err(j->c, "%ps: error %i", j->fn, ret); + bch_err(j->c, "%ps: error %s", j->fn, bch2_err_str(ret)); j->ret = ret; } -- cgit v1.2.3 From 549d173c1bd9b58c2ad41217522462e012a6545f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 17 Jul 2022 23:06:38 -0400 Subject: bcachefs: EINTR -> BCH_ERR_transaction_restart Now that we have error codes, with subtypes, we can switch to our own error code for transaction restarts - and even better, a distinct error code for each transaction restart reason: clearer code and better debugging. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 4 +- fs/bcachefs/alloc_background.c | 2 +- fs/bcachefs/alloc_foreground.c | 17 +++--- fs/bcachefs/btree_cache.c | 54 +++++++++++-------- fs/bcachefs/btree_iter.c | 105 ++++++++++++++++++------------------ fs/bcachefs/btree_iter.h | 54 ++++++++++++------- fs/bcachefs/btree_key_cache.c | 41 +++++++------- fs/bcachefs/btree_locking.h | 38 ++++++------- fs/bcachefs/btree_types.h | 2 +- fs/bcachefs/btree_update.h | 1 - fs/bcachefs/btree_update_interior.c | 25 +++++---- fs/bcachefs/btree_update_leaf.c | 69 ++++++++++-------------- fs/bcachefs/data_update.c | 4 +- fs/bcachefs/dirent.c | 4 +- fs/bcachefs/ec.c | 10 ++-- fs/bcachefs/errcode.h | 25 ++++++++- fs/bcachefs/fs-io.c | 22 ++++---- fs/bcachefs/fs.c | 10 ++-- fs/bcachefs/fsck.c | 39 +++++++------- fs/bcachefs/inode.c | 4 +- fs/bcachefs/io.c | 25 +++++---- fs/bcachefs/journal_seq_blacklist.c | 2 +- fs/bcachefs/migrate.c | 6 +-- fs/bcachefs/move.c | 10 ++-- fs/bcachefs/reflink.c | 5 +- fs/bcachefs/subvolume.c | 2 +- fs/bcachefs/tests.c | 2 +- fs/bcachefs/xattr.c | 2 +- 28 files changed, 314 insertions(+), 270 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 00cd40a8d7fa..7edebeed779e 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -234,7 +234,7 @@ retry: &X_SEARCH(acl_to_xattr_type(type), "", 0), 0); if (ret) { - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (ret != -ENOENT) acl = ERR_PTR(ret); @@ -334,7 +334,7 @@ retry: btree_err: bch2_trans_iter_exit(&trans, &inode_iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (unlikely(ret)) goto err; diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index eb44a8bc04fe..15c3c9a2da7b 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -995,7 +995,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, GFP_KERNEL); *discard_pos_done = iter.pos; - ret = bch2_trans_relock(trans) ? 0 : -EINTR; + ret = bch2_trans_relock(trans); if (ret) goto out; } diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index a9f893361c73..99fbf1d2dee5 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -470,8 +470,9 @@ again: for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k)); alloc_cursor < k.k->p.offset; alloc_cursor++) { - if (btree_trans_too_many_iters(trans)) { - ob = ERR_PTR(-EINTR); + ret = btree_trans_too_many_iters(trans); + if (ret) { + ob = ERR_PTR(ret); break; } @@ -488,7 +489,8 @@ again: break; } } - if (ob) + + if (ob || ret) break; } bch2_trans_iter_exit(trans, &iter); @@ -738,7 +740,7 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans, ret = PTR_ERR_OR_ZERO(ob); if (ret) { - if (ret == -EINTR || cl) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || cl) break; continue; } @@ -925,7 +927,7 @@ static int open_bucket_add_buckets(struct btree_trans *trans, target, erasure_code, nr_replicas, nr_effective, have_cache, flags, _cl); - if (ret == -EINTR || + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || bch2_err_matches(ret, BCH_ERR_freelist_empty) || bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) return ret; @@ -949,7 +951,7 @@ retry_blocking: nr_replicas, nr_effective, have_cache, reserve, flags, cl); if (ret && - ret != -EINTR && + !bch2_err_matches(ret, BCH_ERR_transaction_restart) && !bch2_err_matches(ret, BCH_ERR_insufficient_devices) && !cl && _cl) { cl = _cl; @@ -1191,7 +1193,8 @@ retry: nr_replicas, &nr_effective, &have_cache, reserve, ob_flags, NULL); - if (!ret || ret == -EINTR) + if (!ret || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto alloc_done; ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 1f80f08a69b2..4032c27fcc9c 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -7,6 +7,7 @@ #include "btree_iter.h" #include "btree_locking.h" #include "debug.h" +#include "errcode.h" #include "error.h" #include "trace.h" @@ -692,8 +693,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, if (trans && !bch2_btree_node_relock(trans, path, level + 1)) { trace_trans_restart_relock_parent_for_fill(trans->fn, _THIS_IP_, btree_id, &path->pos); - btree_trans_restart(trans); - return ERR_PTR(-EINTR); + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock)); } b = bch2_btree_node_mem_alloc(c, level != 0); @@ -702,8 +702,8 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, trans->memory_allocation_failure = true; trace_trans_restart_memory_allocation_failure(trans->fn, _THIS_IP_, btree_id, &path->pos); - btree_trans_restart(trans); - return ERR_PTR(-EINTR); + + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail)); } if (IS_ERR(b)) @@ -740,18 +740,19 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, if (!sync) return NULL; - if (trans && - (!bch2_trans_relock(trans) || - !bch2_btree_path_relock_intent(trans, path))) { - BUG_ON(!trans->restarted); - return ERR_PTR(-EINTR); + if (trans) { + int ret = bch2_trans_relock(trans) ?: + bch2_btree_path_relock_intent(trans, path); + if (ret) { + BUG_ON(!trans->restarted); + return ERR_PTR(ret); + } } if (!six_relock_type(&b->c.lock, lock_type, seq)) { trace_trans_restart_relock_after_fill(trans->fn, _THIS_IP_, btree_id, &path->pos); - btree_trans_restart(trans); - return ERR_PTR(-EINTR); + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill)); } return b; @@ -762,7 +763,9 @@ static int lock_node_check_fn(struct six_lock *lock, void *p) struct btree *b = container_of(lock, struct btree, c.lock); const struct bkey_i *k = p; - return b->hash_val == btree_ptr_hash_val(k) ? 0 : -1; + if (b->hash_val != btree_ptr_hash_val(k)) + return BCH_ERR_lock_fail_node_reused; + return 0; } static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) @@ -821,6 +824,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path * struct btree_cache *bc = &c->btree_cache; struct btree *b; struct bset_tree *t; + int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); @@ -885,11 +889,14 @@ lock_node: if (btree_node_read_locked(path, level + 1)) btree_node_unlock(trans, path, level + 1); - if (!btree_node_lock(trans, path, b, k->k.p, level, lock_type, - lock_node_check_fn, (void *) k, trace_ip)) { - if (!trans->restarted) + ret = btree_node_lock(trans, path, b, k->k.p, level, lock_type, + lock_node_check_fn, (void *) k, trace_ip); + if (unlikely(ret)) { + if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused)) goto retry; - return ERR_PTR(-EINTR); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ERR_PTR(ret); + BUG(); } if (unlikely(b->hash_val != btree_ptr_hash_val(k) || @@ -903,8 +910,7 @@ lock_node: trace_ip, path->btree_id, &path->pos); - btree_trans_restart(trans); - return ERR_PTR(-EINTR); + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused)); } } @@ -920,11 +926,13 @@ lock_node: * should_be_locked is not set on this path yet, so we need to * relock it specifically: */ - if (trans && - (!bch2_trans_relock(trans) || - !bch2_btree_path_relock_intent(trans, path))) { - BUG_ON(!trans->restarted); - return ERR_PTR(-EINTR); + if (trans) { + int ret = bch2_trans_relock(trans) ?: + bch2_btree_path_relock_intent(trans, path); + if (ret) { + BUG_ON(!trans->restarted); + return ERR_PTR(ret); + } } if (!six_relock_type(&b->c.lock, lock_type, seq)) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 45ecd196bceb..db247c96298f 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -34,7 +34,7 @@ static inline int bch2_trans_cond_resched(struct btree_trans *trans) if (need_resched() || race_fault()) { bch2_trans_unlock(trans); schedule(); - return bch2_trans_relock(trans) ? 0 : -EINTR; + return bch2_trans_relock(trans); } else { return 0; } @@ -285,13 +285,13 @@ static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b, } /* Slowpath: */ -bool __bch2_btree_node_lock(struct btree_trans *trans, - struct btree_path *path, - struct btree *b, - struct bpos pos, unsigned level, - enum six_lock_type type, - six_lock_should_sleep_fn should_sleep_fn, void *p, - unsigned long ip) +int __bch2_btree_node_lock(struct btree_trans *trans, + struct btree_path *path, + struct btree *b, + struct bpos pos, unsigned level, + enum six_lock_type type, + six_lock_should_sleep_fn should_sleep_fn, void *p, + unsigned long ip) { struct btree_path *linked; unsigned reason; @@ -369,8 +369,7 @@ deadlock: path->btree_id, path->cached, &pos); - btree_trans_restart(trans); - return false; + return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock); } /* Btree iterator locking: */ @@ -408,8 +407,8 @@ static inline void bch2_btree_path_verify_locks(struct btree_path *path) {} /* * Only for btree_cache.c - only relocks intent locks */ -bool bch2_btree_path_relock_intent(struct btree_trans *trans, - struct btree_path *path) +int bch2_btree_path_relock_intent(struct btree_trans *trans, + struct btree_path *path) { unsigned l; @@ -421,16 +420,15 @@ bool bch2_btree_path_relock_intent(struct btree_trans *trans, btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); trace_trans_restart_relock_path_intent(trans->fn, _RET_IP_, path->btree_id, &path->pos); - btree_trans_restart(trans); - return false; + return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent); } } - return true; + return 0; } noinline __flatten -static bool __bch2_btree_path_relock(struct btree_trans *trans, +static int __bch2_btree_path_relock(struct btree_trans *trans, struct btree_path *path, unsigned long trace_ip) { bool ret = btree_path_get_locks(trans, path, false); @@ -438,16 +436,17 @@ static bool __bch2_btree_path_relock(struct btree_trans *trans, if (!ret) { trace_trans_restart_relock_path(trans->fn, trace_ip, path->btree_id, &path->pos); - btree_trans_restart(trans); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path); } - return ret; + + return 0; } -static inline bool bch2_btree_path_relock(struct btree_trans *trans, +static inline int bch2_btree_path_relock(struct btree_trans *trans, struct btree_path *path, unsigned long trace_ip) { return btree_node_locked(path, path->level) - ? true + ? 0 : __bch2_btree_path_relock(trans, path, trace_ip); } @@ -532,22 +531,22 @@ void bch2_trans_downgrade(struct btree_trans *trans) /* Btree transaction locking: */ -bool bch2_trans_relock(struct btree_trans *trans) +int bch2_trans_relock(struct btree_trans *trans) { struct btree_path *path; if (unlikely(trans->restarted)) - return false; + return -BCH_ERR_transaction_restart_relock; trans_for_each_path(trans, path) if (path->should_be_locked && - !bch2_btree_path_relock(trans, path, _RET_IP_)) { + bch2_btree_path_relock(trans, path, _RET_IP_)) { trace_trans_restart_relock(trans->fn, _RET_IP_, path->btree_id, &path->pos); BUG_ON(!trans->restarted); - return false; + return -BCH_ERR_transaction_restart_relock; } - return true; + return 0; } void bch2_trans_unlock(struct btree_trans *trans) @@ -1187,7 +1186,9 @@ static int lock_root_check_fn(struct six_lock *lock, void *p) struct btree *b = container_of(lock, struct btree, c.lock); struct btree **rootp = p; - return b == *rootp ? 0 : -1; + if (b != *rootp) + return BCH_ERR_lock_fail_root_changed; + return 0; } static inline int btree_path_lock_root(struct btree_trans *trans, @@ -1199,6 +1200,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans, struct btree *b, **rootp = &c->btree_roots[path->btree_id].b; enum six_lock_type lock_type; unsigned i; + int ret; EBUG_ON(path->nodes_locked); @@ -1220,13 +1222,16 @@ static inline int btree_path_lock_root(struct btree_trans *trans, } lock_type = __btree_lock_want(path, path->level); - if (unlikely(!btree_node_lock(trans, path, b, SPOS_MAX, - path->level, lock_type, - lock_root_check_fn, rootp, - trace_ip))) { - if (trans->restarted) - return -EINTR; - continue; + ret = btree_node_lock(trans, path, b, SPOS_MAX, + path->level, lock_type, + lock_root_check_fn, rootp, + trace_ip); + if (unlikely(ret)) { + if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed)) + continue; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ret; + BUG(); } if (likely(b == READ_ONCE(*rootp) && @@ -1431,12 +1436,12 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans) int i, ret = 0; if (trans->in_traverse_all) - return -EINTR; + return -BCH_ERR_transaction_restart_in_traverse_all; trans->in_traverse_all = true; retry_all: prev = NULL; - trans->restarted = false; + trans->restarted = 0; trans_for_each_path(trans, path) path->should_be_locked = false; @@ -1480,7 +1485,8 @@ retry_all: */ if (path->uptodate) { ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_); - if (ret == -EINTR || ret == -ENOMEM) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || + ret == -ENOMEM) goto retry_all; if (ret) goto err; @@ -1587,19 +1593,17 @@ static int btree_path_traverse_one(struct btree_trans *trans, unsigned long trace_ip) { unsigned depth_want = path->level; - int ret = 0; + int ret = trans->restarted; - if (unlikely(trans->restarted)) { - ret = -EINTR; + if (unlikely(ret)) goto out; - } /* * Ensure we obey path->should_be_locked: if it's set, we can't unlock * and re-traverse the path without a transaction restart: */ if (path->should_be_locked) { - ret = bch2_btree_path_relock(trans, path, trace_ip) ? 0 : -EINTR; + ret = bch2_btree_path_relock(trans, path, trace_ip); goto out; } @@ -1648,7 +1652,7 @@ static int btree_path_traverse_one(struct btree_trans *trans, path->uptodate = BTREE_ITER_UPTODATE; out: - BUG_ON((ret == -EINTR) != !!trans->restarted); + BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted); bch2_btree_path_verify(trans, path); return ret; } @@ -2135,8 +2139,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); trace_trans_restart_relock_next_node(trans->fn, _THIS_IP_, path->btree_id, &path->pos); - btree_trans_restart(trans); - ret = -EINTR; + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_relock); goto err; } @@ -2517,8 +2520,9 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e BUG_ON(!iter->path->nodes_locked); out: if (iter->update_path) { - if (unlikely(!bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_))) { - k = bkey_s_c_err(-EINTR); + ret = bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); } else { BUG_ON(!(iter->update_path->nodes_locked & 1)); iter->update_path->should_be_locked = true; @@ -3169,8 +3173,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) if (old_bytes) { trace_trans_restart_mem_realloced(trans->fn, _RET_IP_, new_bytes); - btree_trans_restart(trans); - return ERR_PTR(-EINTR); + return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced)); } } @@ -3184,9 +3187,9 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) * bch2_trans_begin() - reset a transaction after a interrupted attempt * @trans: transaction to reset * - * While iterating over nodes or updating nodes a attempt to lock a btree - * node may return EINTR when the trylock fails. When this occurs - * bch2_trans_begin() should be called and the transaction retried. + * While iterating over nodes or updating nodes a attempt to lock a btree node + * may return BCH_ERR_transaction_restart when the trylock fails. When this + * occurs bch2_trans_begin() should be called and the transaction retried. */ u32 bch2_trans_begin(struct btree_trans *trans) { diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 1952a7683610..79339a6abcd7 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -197,27 +197,36 @@ void bch2_btree_node_iter_fix(struct btree_trans *trans, struct btree_path *, struct btree *, struct btree_node_iter *, struct bkey_packed *, unsigned, unsigned); -bool bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *); +int bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *); void bch2_path_put(struct btree_trans *, struct btree_path *, bool); -bool bch2_trans_relock(struct btree_trans *); +int bch2_trans_relock(struct btree_trans *); void bch2_trans_unlock(struct btree_trans *); -static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count) +static inline bool trans_was_restarted(struct btree_trans *trans, u32 restart_count) { - return restart_count != trans->restart_count ? -EINTR : 0; + return restart_count != trans->restart_count; } void bch2_trans_verify_not_restarted(struct btree_trans *, u32); __always_inline -static inline int btree_trans_restart(struct btree_trans *trans) +static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int err) { - trans->restarted = true; + BUG_ON(err <= 0); + BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart)); + + trans->restarted = err; trans->restart_count++; - bch2_trans_unlock(trans); - return -EINTR; + return -err; +} + +__always_inline +static inline int btree_trans_restart(struct btree_trans *trans, int err) +{ + btree_trans_restart_nounlock(trans, err); + return -err; } bool bch2_btree_node_upgrade(struct btree_trans *, @@ -338,7 +347,7 @@ __btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter struct btree *b; while (b = bch2_btree_iter_peek_node(iter), - PTR_ERR_OR_ZERO(b) == -EINTR) + bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart)) bch2_trans_begin(trans); return b; @@ -387,7 +396,7 @@ static inline int btree_trans_too_many_iters(struct btree_trans *trans) { if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2) { trace_trans_restart_too_many_iters(trans->fn, _THIS_IP_); - return btree_trans_restart(trans); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters); } return 0; @@ -401,7 +410,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, while (btree_trans_too_many_iters(trans) || (k = bch2_btree_iter_peek_type(iter, flags), - bkey_err(k) == -EINTR)) + bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart))) bch2_trans_begin(trans); return k; @@ -414,7 +423,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, do { \ bch2_trans_begin(_trans); \ _ret = (_do); \ - } while (_ret == -EINTR); \ + } while (bch2_err_matches(_ret, BCH_ERR_transaction_restart)); \ \ _ret; \ }) @@ -425,7 +434,8 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, * These are like lockrestart_do() and commit_do(), with two differences: * * - We don't call bch2_trans_begin() unless we had a transaction restart - * - We return -EINTR if we succeeded after a transaction restart + * - We return -BCH_ERR_transaction_restart_nested if we succeeded after a + * transaction restart */ #define nested_lockrestart_do(_trans, _do) \ ({ \ @@ -434,13 +444,16 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, \ _restart_count = _orig_restart_count = (_trans)->restart_count; \ \ - while ((_ret = (_do)) == -EINTR) \ + while (bch2_err_matches(_ret = (_do), BCH_ERR_transaction_restart))\ _restart_count = bch2_trans_begin(_trans); \ \ if (!_ret) \ bch2_trans_verify_not_restarted(_trans, _restart_count);\ \ - _ret ?: trans_was_restarted(_trans, _orig_restart_count); \ + if (!_ret && trans_was_restarted(_trans, _orig_restart_count)) \ + _ret = -BCH_ERR_transaction_restart_nested; \ + \ + _ret; \ }) #define for_each_btree_key2(_trans, _iter, _btree_id, \ @@ -451,7 +464,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ (_start), (_flags)); \ \ - do { \ + while (1) { \ bch2_trans_begin(_trans); \ (_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \ if (!(_k).k) { \ @@ -460,9 +473,12 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, } \ \ _ret = bkey_err(_k) ?: (_do); \ - if (!_ret) \ - bch2_btree_iter_advance(&(_iter)); \ - } while (_ret == 0 || _ret == -EINTR); \ + if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\ + continue; \ + if (_ret) \ + break; \ + bch2_btree_iter_advance(&(_iter)); \ + } \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ _ret; \ diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index e5a29240bbcc..549abe607b53 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -5,6 +5,7 @@ #include "btree_key_cache.h" #include "btree_locking.h" #include "btree_update.h" +#include "errcode.h" #include "error.h" #include "journal.h" #include "journal_reclaim.h" @@ -292,7 +293,7 @@ static int btree_key_cache_fill(struct btree_trans *trans, if (!bch2_btree_node_relock(trans, ck_path, 0)) { trace_trans_restart_relock_key_cache_fill(trans->fn, _THIS_IP_, ck_path->btree_id, &ck_path->pos); - ret = btree_trans_restart(trans); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced); goto err; } @@ -347,8 +348,10 @@ static int bkey_cached_check_fn(struct six_lock *lock, void *p) struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock); const struct btree_path *path = p; - return ck->key.btree_id == path->btree_id && - !bpos_cmp(ck->key.pos, path->pos) ? 0 : -1; + if (ck->key.btree_id != path->btree_id && + bpos_cmp(ck->key.pos, path->pos)) + return BCH_ERR_lock_fail_node_reused; + return 0; } __flatten @@ -387,14 +390,15 @@ retry: } else { enum six_lock_type lock_want = __btree_lock_want(path, 0); - if (!btree_node_lock(trans, path, (void *) ck, path->pos, 0, - lock_want, - bkey_cached_check_fn, path, _THIS_IP_)) { - if (!trans->restarted) + ret = btree_node_lock(trans, path, (void *) ck, path->pos, 0, + lock_want, + bkey_cached_check_fn, path, _THIS_IP_); + if (ret) { + if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused)) goto retry; - - ret = -EINTR; - goto err; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto err; + BUG(); } if (ck->key.btree_id != path->btree_id || @@ -413,7 +417,7 @@ fill: if (!path->locks_want && !__bch2_btree_path_upgrade(trans, path, 1)) { trace_transaction_restart_ip(trans->fn, _THIS_IP_); - ret = btree_trans_restart(trans); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade); goto err; } @@ -430,7 +434,7 @@ fill: return ret; err: - if (ret != -EINTR) { + if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) { btree_node_unlock(trans, path, 0); path->l[0].b = BTREE_ITER_NO_NODE_ERROR; } @@ -497,13 +501,14 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, ? JOURNAL_WATERMARK_reserved : 0)| commit_flags); - if (ret) { - bch2_fs_fatal_err_on(ret != -EINTR && - ret != -EAGAIN && - !bch2_journal_error(j), c, - "error flushing key cache: %i", ret); + + bch2_fs_fatal_err_on(ret && + !bch2_err_matches(ret, BCH_ERR_transaction_restart) && + !bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) && + !bch2_journal_error(j), c, + "error flushing key cache: %s", bch2_err_str(ret)); + if (ret) goto out; - } bch2_journal_pin_drop(j, &ck->journal); bch2_journal_preres_put(j, &ck->res); diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index b8708466c4e3..33a69e27c39e 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -152,7 +152,7 @@ static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type) } } -static inline bool btree_node_lock_type(struct btree_trans *trans, +static inline int btree_node_lock_type(struct btree_trans *trans, struct btree_path *path, struct btree *b, struct bpos pos, unsigned level, @@ -161,10 +161,10 @@ static inline bool btree_node_lock_type(struct btree_trans *trans, { struct bch_fs *c = trans->c; u64 start_time; - bool ret; + int ret; if (six_trylock_type(&b->c.lock, type)) - return true; + return 0; start_time = local_clock(); @@ -174,13 +174,14 @@ static inline bool btree_node_lock_type(struct btree_trans *trans, trans->locking_level = level; trans->locking_lock_type = type; trans->locking = b; - ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p) == 0; + ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p); trans->locking = NULL; if (ret) - bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time); + return ret; - return ret; + bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time); + return 0; } /* @@ -203,33 +204,34 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans, return false; } -bool __bch2_btree_node_lock(struct btree_trans *, struct btree_path *, - struct btree *, struct bpos, unsigned, - enum six_lock_type, - six_lock_should_sleep_fn, void *, - unsigned long); +int __bch2_btree_node_lock(struct btree_trans *, struct btree_path *, + struct btree *, struct bpos, unsigned, + enum six_lock_type, + six_lock_should_sleep_fn, void *, + unsigned long); -static inline bool btree_node_lock(struct btree_trans *trans, +static inline int btree_node_lock(struct btree_trans *trans, struct btree_path *path, struct btree *b, struct bpos pos, unsigned level, enum six_lock_type type, six_lock_should_sleep_fn should_sleep_fn, void *p, unsigned long ip) { + int ret = 0; + EBUG_ON(level >= BTREE_MAX_DEPTH); EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx))); if (likely(six_trylock_type(&b->c.lock, type)) || - btree_node_lock_increment(trans, b, level, type) || - __bch2_btree_node_lock(trans, path, b, pos, level, type, - should_sleep_fn, p, ip)) { + btree_node_lock_increment(trans, b, level, type) || + !(ret = __bch2_btree_node_lock(trans, path, b, pos, level, type, + should_sleep_fn, p, ip))) { #ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS path->l[b->c.level].lock_taken_time = ktime_get_ns(); #endif - return true; - } else { - return false; } + + return ret; } bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 0650a3558182..bc1571fc2f1f 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -405,11 +405,11 @@ struct btree_trans { u8 nr_updates; bool used_mempool:1; bool in_traverse_all:1; - bool restarted:1; bool paths_sorted:1; bool memory_allocation_failure:1; bool journal_transaction_names:1; bool journal_replay_not_finished:1; + enum bch_errcode restarted:16; u32 restart_count; unsigned long last_restarted_ip; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 9b5a8b18b01b..89941fb8caa0 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -90,7 +90,6 @@ int bch2_trans_log_msg(struct btree_trans *, const char *); * This is main entry point for btree updates. * * Return values: - * -EINTR: locking changed, this function should be called again. * -EROFS: filesystem read only * -EIO: journal or btree node IO error */ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 9f9ab85ec6b8..cf02e814c579 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -996,7 +996,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) { trace_trans_restart_iter_upgrade(trans->fn, _RET_IP_, path->btree_id, &path->pos); - ret = btree_trans_restart(trans); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade); return ERR_PTR(ret); } @@ -1005,9 +1005,10 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, else if (!down_read_trylock(&c->gc_lock)) { bch2_trans_unlock(trans); down_read(&c->gc_lock); - if (!bch2_trans_relock(trans)) { + ret = bch2_trans_relock(trans); + if (ret) { up_read(&c->gc_lock); - return ERR_PTR(-EINTR); + return ERR_PTR(ret); } } @@ -1053,7 +1054,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, journal_flags); if (ret) { trace_trans_restart_journal_preres_get(trans->fn, _RET_IP_); - btree_trans_restart(trans); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get); goto err; } @@ -1090,10 +1091,9 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, goto err; } - if (!bch2_trans_relock(trans)) { - ret = -EINTR; + ret = bch2_trans_relock(trans); + if (ret) goto err; - } return as; err: @@ -2030,10 +2030,8 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite int ret = 0; if (!btree_node_intent_locked(path, b->c.level) && - !bch2_btree_path_upgrade(trans, path, b->c.level + 1)) { - btree_trans_restart(trans); - return -EINTR; - } + !bch2_btree_path_upgrade(trans, path, b->c.level + 1)) + return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade); closure_init_stack(&cl); @@ -2046,8 +2044,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite if (ret) { bch2_trans_unlock(trans); closure_sync(&cl); - if (!bch2_trans_relock(trans)) - return -EINTR; + ret = bch2_trans_relock(trans); + if (ret) + return ret; } new_hash = bch2_btree_node_mem_alloc(c, false); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index c6fe24f424de..541826df50d9 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -10,6 +10,7 @@ #include "btree_locking.h" #include "buckets.h" #include "debug.h" +#include "errcode.h" #include "error.h" #include "extent_update.h" #include "journal.h" @@ -282,9 +283,10 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s, if (ret) return ret; - if (!bch2_trans_relock(trans)) { + ret = bch2_trans_relock(trans); + if (ret) { trace_trans_restart_journal_preres_get(trans->fn, trace_ip); - return -EINTR; + return ret; } return 0; @@ -376,12 +378,7 @@ btree_key_can_insert_cached(struct btree_trans *trans, trace_trans_restart_key_cache_key_realloced(trans->fn, _RET_IP_, path->btree_id, &path->pos, old_u64s, new_u64s); - /* - * Not using btree_trans_restart() because we can't unlock here, we have - * write locks held: - */ - trans->restarted = true; - return -EINTR; + return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_key_cache_realloced); } /* Triggers: */ @@ -573,8 +570,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, if (race_fault()) { trace_trans_restart_fault_inject(trans->fn, trace_ip); - trans->restarted = true; - return -EINTR; + return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject); } /* @@ -812,6 +808,7 @@ static inline bool have_conflicting_read_lock(struct btree_trans *trans, struct static inline int trans_lock_write(struct btree_trans *trans) { struct btree_insert_entry *i; + int ret; trans_for_each_update(trans, i) { if (same_leaf_as_prev(trans, i)) @@ -821,10 +818,11 @@ static inline int trans_lock_write(struct btree_trans *trans) if (have_conflicting_read_lock(trans, i->path)) goto fail; - btree_node_lock_type(trans, i->path, + ret = btree_node_lock_type(trans, i->path, insert_l(i)->b, i->path->pos, i->level, SIX_LOCK_write, NULL, NULL); + BUG_ON(ret); } bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b); @@ -840,7 +838,7 @@ fail: } trace_trans_restart_would_deadlock_write(trans->fn); - return btree_trans_restart(trans); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write); } static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans) @@ -971,10 +969,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, switch (ret) { case BTREE_INSERT_BTREE_NODE_FULL: ret = bch2_btree_split_leaf(trans, i->path, trans->flags); - if (!ret) - return 0; - - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) trace_trans_restart_btree_node_split(trans->fn, trace_ip, i->btree_id, &i->path->pos); break; @@ -985,19 +980,16 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (ret) break; - if (bch2_trans_relock(trans)) - return 0; - - trace_trans_restart_mark_replicas(trans->fn, trace_ip); - ret = -EINTR; + ret = bch2_trans_relock(trans); + if (ret) + trace_trans_restart_mark_replicas(trans->fn, trace_ip); break; case BTREE_INSERT_NEED_JOURNAL_RES: bch2_trans_unlock(trans); if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) && !(trans->flags & JOURNAL_WATERMARK_reserved)) { - trans->restarted = true; - ret = -EAGAIN; + ret = -BCH_ERR_journal_reclaim_would_deadlock; break; } @@ -1005,11 +997,9 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (ret) break; - if (bch2_trans_relock(trans)) - return 0; - - trace_trans_restart_journal_res_get(trans->fn, trace_ip); - ret = -EINTR; + ret = bch2_trans_relock(trans); + if (ret) + trace_trans_restart_journal_res_get(trans->fn, trace_ip); break; case BTREE_INSERT_NEED_JOURNAL_RECLAIM: bch2_trans_unlock(trans); @@ -1021,18 +1011,16 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (ret < 0) break; - if (bch2_trans_relock(trans)) - return 0; - - trace_trans_restart_journal_reclaim(trans->fn, trace_ip); - ret = -EINTR; + ret = bch2_trans_relock(trans); + if (ret) + trace_trans_restart_journal_reclaim(trans->fn, trace_ip); break; default: BUG_ON(ret >= 0); break; } - BUG_ON((ret == EINTR || ret == -EAGAIN) && !trans->restarted); + BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted); BUG_ON(ret == -ENOSPC && !(trans->flags & BTREE_INSERT_NOWAIT) && (trans->flags & BTREE_INSERT_NOFAIL)); @@ -1052,13 +1040,11 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans) bch2_trans_unlock(trans); - ret = bch2_fs_read_write_early(c); + ret = bch2_fs_read_write_early(c) ?: + bch2_trans_relock(trans); if (ret) return ret; - if (!bch2_trans_relock(trans)) - return -EINTR; - percpu_ref_get(&c->writes); return 0; } @@ -1132,7 +1118,7 @@ int __bch2_trans_commit(struct btree_trans *trans) if (unlikely(!bch2_btree_path_upgrade(trans, i->path, i->level + 1))) { trace_trans_restart_upgrade(trans->fn, _RET_IP_, i->btree_id, &i->path->pos); - ret = btree_trans_restart(trans); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade); goto out; } @@ -1654,8 +1640,7 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { trace_trans_restart_key_cache_raced(trans->fn, _RET_IP_); - btree_trans_restart(trans); - return -EINTR; + return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced); } iter->key_cache_path->should_be_locked = true; @@ -1783,7 +1768,7 @@ retry: break; } - if (ret == -EINTR) { + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { ret = 0; goto retry; } diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 6726bd6b9b07..c0d6a48d3c72 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -236,7 +236,7 @@ int bch2_data_update_index_update(struct bch_write_op *op) bch2_ob_add_backpointer(c, ec_ob, &insert->k); } err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; if (ret) break; @@ -264,7 +264,7 @@ out: bch2_trans_exit(&trans); bch2_bkey_buf_exit(&_insert, c); bch2_bkey_buf_exit(&_new, c); - BUG_ON(ret == -EINTR); + BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); return ret; } diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 0cbb765cde54..4d942d224a08 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -471,7 +471,7 @@ retry: ret = __bch2_dirent_lookup_trans(&trans, &iter, dir, hash_info, name, inum, 0); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (!ret) bch2_trans_iter_exit(&trans, &iter); @@ -556,7 +556,7 @@ retry: } bch2_trans_iter_exit(&trans, &iter); err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 947f2f2b1c09..f33acf1af110 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -572,18 +572,14 @@ static int ec_stripe_mem_alloc(struct btree_trans *trans, struct btree_iter *iter) { size_t idx = iter->pos.offset; - int ret = 0; if (!__ec_stripe_mem_alloc(trans->c, idx, GFP_NOWAIT|__GFP_NOWARN)) - return ret; + return 0; bch2_trans_unlock(trans); - ret = -EINTR; - if (!__ec_stripe_mem_alloc(trans->c, idx, GFP_KERNEL)) - return ret; - - return -ENOMEM; + return __ec_stripe_mem_alloc(trans->c, idx, GFP_KERNEL) ?: + bch2_trans_relock(trans); } static ssize_t stripe_idx_to_delete(struct bch_fs *c) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 69cc7cdd1c06..7972b018d2d0 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -7,7 +7,30 @@ x(0, freelist_empty) \ x(freelist_empty, no_buckets_found) \ x(0, insufficient_devices) \ - x(0, need_snapshot_cleanup) + x(0, need_snapshot_cleanup) \ + x(0, transaction_restart) \ + x(transaction_restart, transaction_restart_fault_inject) \ + x(transaction_restart, transaction_restart_relock) \ + x(transaction_restart, transaction_restart_relock_path) \ + x(transaction_restart, transaction_restart_relock_path_intent) \ + x(transaction_restart, transaction_restart_relock_after_fill) \ + x(transaction_restart, transaction_restart_too_many_iters) \ + x(transaction_restart, transaction_restart_lock_node_reused) \ + x(transaction_restart, transaction_restart_fill_relock) \ + x(transaction_restart, transaction_restart_fill_mem_alloc_fail)\ + x(transaction_restart, transaction_restart_mem_realloced) \ + x(transaction_restart, transaction_restart_in_traverse_all) \ + x(transaction_restart, transaction_restart_would_deadlock) \ + x(transaction_restart, transaction_restart_would_deadlock_write)\ + x(transaction_restart, transaction_restart_upgrade) \ + x(transaction_restart, transaction_restart_key_cache_fill) \ + x(transaction_restart, transaction_restart_key_cache_raced) \ + x(transaction_restart, transaction_restart_key_cache_realloced)\ + x(transaction_restart, transaction_restart_journal_preres_get) \ + x(transaction_restart, transaction_restart_nested) \ + x(0, lock_fail_node_reused) \ + x(0, lock_fail_root_changed) \ + x(0, journal_reclaim_would_deadlock) enum bch_errcode { BCH_ERR_START = 2048, diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index c0dda29dabb4..9f1ecb8d7b3b 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -408,7 +408,7 @@ retry: offset = iter.pos.offset; bch2_trans_iter_exit(&trans, &iter); err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); @@ -1018,10 +1018,9 @@ retry: * read_extent -> io_time_reset may cause a transaction restart * without returning an error, we need to check for that here: */ - if (!bch2_trans_relock(trans)) { - ret = -EINTR; + ret = bch2_trans_relock(trans); + if (ret) break; - } bch2_btree_iter_set_pos(&iter, POS(inum.inum, rbio->bio.bi_iter.bi_sector)); @@ -1074,7 +1073,7 @@ retry: err: bch2_trans_iter_exit(trans, &iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (ret) { @@ -2035,7 +2034,7 @@ retry: offset = iter.pos.offset; bch2_trans_iter_exit(&trans, &iter); err: - if (err == -EINTR) + if (bch2_err_matches(err, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); @@ -2427,7 +2426,7 @@ retry: start = iter.pos; bch2_trans_iter_exit(&trans, &iter); err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); @@ -2817,7 +2816,8 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, bch2_trans_copy_iter(&dst, &src); bch2_trans_copy_iter(&del, &src); - while (ret == 0 || ret == -EINTR) { + while (ret == 0 || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete; @@ -3019,7 +3019,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, bkey_err: bch2_quota_reservation_put(c, inode, "a_res); bch2_disk_reservation_put(c, &disk_res); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; } @@ -3301,7 +3301,7 @@ retry: } bch2_trans_iter_exit(&trans, &iter); err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); @@ -3416,7 +3416,7 @@ retry: } bch2_trans_iter_exit(&trans, &iter); err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 876552a2a83b..af4941862187 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -154,7 +154,7 @@ retry: bch2_trans_iter_exit(&trans, &iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); @@ -324,7 +324,7 @@ retry: bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, KEY_TYPE_QUOTA_WARN); err_before_quota: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; goto err_trans; } @@ -755,7 +755,7 @@ retry: btree_err: bch2_trans_iter_exit(&trans, &inode_iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (unlikely(ret)) goto err_trans; @@ -987,7 +987,7 @@ retry: start = iter.pos.offset; bch2_trans_iter_exit(&trans, &iter); err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (!ret && have_extent) @@ -1337,7 +1337,7 @@ found: memcpy(name, d.v->d_name, name_len); name[name_len] = '\0'; err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_iter_exit(&trans, &iter1); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 021affcc82d4..29d731a12436 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -136,7 +136,7 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, ret = bch2_inode_unpack(k, inode); err: - if (ret && ret != -EINTR) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch_err(trans->c, "error fetching inode %llu: %s", inode_nr, bch2_err_str(ret)); bch2_trans_iter_exit(trans, &iter); @@ -164,7 +164,7 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, if (!ret) *snapshot = iter.pos.snapshot; err: - if (ret && ret != -EINTR) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch_err(trans->c, "error fetching inode %llu:%u: %s", inode_nr, *snapshot, bch2_err_str(ret)); bch2_trans_iter_exit(trans, &iter); @@ -287,7 +287,7 @@ retry: BTREE_INSERT_NOFAIL); err: bch2_trans_iter_exit(trans, &iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; return ret; @@ -314,7 +314,7 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos) BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); bch2_trans_iter_exit(trans, &iter); err: - if (ret && ret != -EINTR) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch_err(c, "error from __remove_dirent(): %s", bch2_err_str(ret)); return ret; } @@ -350,7 +350,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 subvol, goto create_lostfound; } - if (ret && ret != -EINTR) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch_err(c, "error looking up lost+found: %s", bch2_err_str(ret)); if (ret) return ret; @@ -373,7 +373,7 @@ create_lostfound: lostfound, &lostfound_str, 0, 0, S_IFDIR|0700, 0, NULL, NULL, (subvol_inum) { }, 0); - if (ret && ret != -EINTR) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch_err(c, "error creating lost+found: %s", bch2_err_str(ret)); return ret; } @@ -843,10 +843,10 @@ bad_hash: ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); if (ret) { - bch_err(c, "hash_redo_key err %i", ret); + bch_err(c, "hash_redo_key err %s", bch2_err_str(ret)); return ret; } - ret = -EINTR; + ret = -BCH_ERR_transaction_restart_nested; fsck_err: goto out; } @@ -1144,7 +1144,7 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) ret = write_inode(trans, &i->inode, i->snapshot); if (ret) break; - ret2 = -EINTR; + ret2 = -BCH_ERR_transaction_restart_nested; } fsck_err: if (ret) @@ -1191,7 +1191,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, * it shouldn't be but we need to fix the new i_sectors check * code and delete the old bch2_count_inode_sectors() first */ - return -EINTR; + return -BCH_ERR_transaction_restart_nested; } #if 0 if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) { @@ -1202,7 +1202,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, bch2_bkey_val_to_text(&PBUF(buf2), c, k); if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) { - ret = fix_overlapping_extent(trans, k, prev.k->k.p) ?: -EINTR; + ret = fix_overlapping_extent(trans, k, prev.k->k.p) + ?: -BCH_ERR_transaction_restart_nested; goto out; } } @@ -1287,8 +1288,8 @@ err: fsck_err: printbuf_exit(&buf); - if (ret && ret != -EINTR) - bch_err(c, "error %i from check_extent()", ret); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err(c, "error from check_extent(): %s", bch2_err_str(ret)); return ret; } @@ -1364,7 +1365,7 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) ret = write_inode(trans, &i->inode, i->snapshot); if (ret) break; - ret2 = -EINTR; + ret2 = -BCH_ERR_transaction_restart_nested; } } fsck_err: @@ -1487,7 +1488,7 @@ err: fsck_err: printbuf_exit(&buf); - if (ret && ret != -EINTR) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch_err(c, "error from check_target(): %s", bch2_err_str(ret)); return ret; } @@ -1530,7 +1531,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (!iter->path->should_be_locked) { /* hack: see check_extent() */ - return -EINTR; + return -BCH_ERR_transaction_restart_nested; } ret = __walk_inode(trans, dir, equiv); @@ -1660,7 +1661,7 @@ err: fsck_err: printbuf_exit(&buf); - if (ret && ret != -EINTR) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch_err(c, "error from check_dirent(): %s", bch2_err_str(ret)); return ret; } @@ -1735,7 +1736,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); fsck_err: - if (ret && ret != -EINTR) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch_err(c, "error from check_xattr(): %s", bch2_err_str(ret)); return ret; } @@ -2016,8 +2017,6 @@ static int check_directory_structure(struct bch_fs *c) } bch2_trans_iter_exit(&trans, &iter); - BUG_ON(ret == -EINTR); - darray_exit(&path); bch2_trans_exit(&trans); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 5de66d62028b..fc0f98074dab 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -619,7 +619,7 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); err: - if (ret && ret != -EINTR) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) break; } @@ -690,7 +690,7 @@ retry: BTREE_INSERT_NOFAIL); err: bch2_trans_iter_exit(&trans, &iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index f137a8e90f07..dfa708c0a7fc 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -390,7 +390,7 @@ err: } /* - * Returns -EINTR if we had to drop locks: + * Returns -BCH_ERR_transacton_restart if we had to drop locks: */ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, subvol_inum inum, u64 end, @@ -403,7 +403,8 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, int ret = 0, ret2 = 0; u32 snapshot; - while (!ret || ret == -EINTR) { + while (!ret || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete; @@ -462,7 +463,10 @@ int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); - return ret == -EINTR ? 0 : ret; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + ret = 0; + + return ret; } static int bch2_write_index_default(struct bch_write_op *op) @@ -493,7 +497,7 @@ static int bch2_write_index_default(struct bch_write_op *op) ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &sk.k->k.p.snapshot); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) break; @@ -508,7 +512,7 @@ static int bch2_write_index_default(struct bch_write_op *op) op->flags & BCH_WRITE_CHECK_ENOSPC); bch2_trans_iter_exit(&trans, &iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) break; @@ -663,7 +667,7 @@ static void __bch2_write_index(struct bch_write_op *op) ? bch2_write_index_default(op) : bch2_data_update_index_update(op); - BUG_ON(ret == -EINTR); + BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); BUG_ON(keylist_sectors(keys) && !ret); op->written += sectors_start - keylist_sectors(keys); @@ -2429,10 +2433,9 @@ retry: * read_extent -> io_time_reset may cause a transaction restart * without returning an error, we need to check for that here: */ - if (!bch2_trans_relock(&trans)) { - ret = -EINTR; + ret = bch2_trans_relock(&trans); + if (ret) break; - } bch2_btree_iter_set_pos(&iter, POS(inum.inum, bvec_iter.bi_sector)); @@ -2486,7 +2489,9 @@ retry: err: bch2_trans_iter_exit(&trans, &iter); - if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || + ret == READ_RETRY || + ret == READ_RETRY_AVOID) goto retry; bch2_trans_exit(&trans); diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c index d9b4042a2e4a..5c555b3703c0 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -272,7 +272,7 @@ retry: !test_bit(BCH_FS_STOPPING, &c->flags)) b = bch2_btree_iter_next_node(&iter); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_iter_exit(&trans, &iter); diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index baeca0e2a302..8b258d966d04 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -146,7 +146,7 @@ retry: } ret = bch2_btree_node_update_key(&trans, &iter, b, k.k, false); - if (ret == -EINTR) { + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { ret = 0; continue; } @@ -159,7 +159,7 @@ retry: next: bch2_btree_iter_next_node(&iter); } - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_iter_exit(&trans, &iter); @@ -174,7 +174,7 @@ err: bch2_trans_exit(&trans); bch2_bkey_buf_exit(&k, c); - BUG_ON(ret == -EINTR); + BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); return ret; } diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 7fba0f70c409..ea9ce6d436a2 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -387,7 +387,7 @@ static int __bch2_move_data(struct moving_context *ctxt, break; ret = bkey_err(k); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) break; @@ -409,7 +409,7 @@ static int __bch2_move_data(struct moving_context *ctxt, ret = lookup_inode(&trans, SPOS(0, k.k->p.inode, k.k->p.snapshot), &inode); - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (!ret) @@ -432,7 +432,7 @@ static int __bch2_move_data(struct moving_context *ctxt, ret2 = bch2_move_extent(&trans, ctxt, io_opts, btree_id, k, data_opts); if (ret2) { - if (ret2 == -EINTR) + if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) continue; if (ret2 == -ENOMEM) { @@ -546,14 +546,14 @@ retry: goto next; ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret; - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) break; next: bch2_btree_iter_next_node(&iter); } - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_iter_exit(&trans, &iter); diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 2038e3502d8c..d5c14bb2992d 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -299,7 +299,8 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_trans_iter_init(&trans, &dst_iter, BTREE_ID_extents, dst_start, BTREE_ITER_INTENT); - while ((ret == 0 || ret == -EINTR) && + while ((ret == 0 || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) && bkey_cmp(dst_iter.pos, dst_end) < 0) { struct disk_reservation disk_res = { 0 }; @@ -409,7 +410,7 @@ s64 bch2_remap_range(struct bch_fs *c, } bch2_trans_iter_exit(&trans, &inode_iter); - } while (ret2 == -EINTR); + } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart)); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&new_src, c); diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 0469b90064eb..b5b0f5e39f97 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -315,7 +315,7 @@ static int check_subvol(struct btree_trans *trans, if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { ret = bch2_subvolume_delete(trans, iter->pos.offset); - if (ret && ret != -EINTR) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch_err(trans->c, "error deleting subvolume %llu: %s", iter->pos.offset, bch2_err_str(ret)); if (ret) diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index bf0a33c0233d..c6cac5c79c12 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -640,7 +640,7 @@ static int rand_mixed_trans(struct btree_trans *trans, k = bch2_btree_iter_peek(iter); ret = bkey_err(k); - if (ret && ret != -EINTR) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch_err(trans->c, "lookup error in rand_mixed: %s", bch2_err_str(ret)); if (ret) return ret; diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 5df61b6b4a3c..37793b3357d3 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -344,7 +344,7 @@ retry: offset = iter.pos.offset; bch2_trans_iter_exit(&trans, &iter); err: - if (ret == -EINTR) + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; bch2_trans_exit(&trans); -- cgit v1.2.3 From 5c1ef830f6786059f85bebe7501b63dffed0b633 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Sep 2022 15:43:50 -0400 Subject: bcachefs: Errcodes can now subtype standard error codes The next patch is going to be adding private error codes for all the places we return -ENOSPC. Additionally, this patch updates return paths at all module boundaries to call bch2_err_class(), to return the standard error code. Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.c | 13 ++++++- fs/bcachefs/errcode.h | 97 +++++++++++++++++++++++++++----------------------- fs/bcachefs/fs-io.c | 49 +++++++++++++++---------- fs/bcachefs/fs-ioctl.c | 50 +++++++++++++++++--------- fs/bcachefs/fs.c | 23 ++++++------ fs/bcachefs/sysfs.c | 15 ++++++-- fs/bcachefs/xattr.c | 16 ++++++--- 7 files changed, 164 insertions(+), 99 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/errcode.c b/fs/bcachefs/errcode.c index 9da8a5973af0..cc9ce0be356e 100644 --- a/fs/bcachefs/errcode.c +++ b/fs/bcachefs/errcode.c @@ -15,7 +15,7 @@ static const char * const bch2_errcode_strs[] = { #define BCH_ERR_0 0 static unsigned bch2_errcode_parents[] = { -#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = BCH_ERR_##class, +#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = class, BCH_ERRCODES() #undef x }; @@ -49,3 +49,14 @@ bool __bch2_err_matches(int err, int class) return err == class; } + +int __bch2_err_class(int err) +{ + err = -err; + BUG_ON((unsigned) err >= BCH_ERR_MAX); + + while (err >= BCH_ERR_START && bch2_errcode_parents[err - BCH_ERR_START]) + err = bch2_errcode_parents[err - BCH_ERR_START]; + + return -err; +} diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 15a1be2fcc84..2088cc5a4f3c 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -3,51 +3,51 @@ #define _BCACHEFS_ERRCODE_H #define BCH_ERRCODES() \ - x(0, open_buckets_empty) \ - x(0, freelist_empty) \ - x(freelist_empty, no_buckets_found) \ - x(0, insufficient_devices) \ - x(0, transaction_restart) \ - x(transaction_restart, transaction_restart_fault_inject) \ - x(transaction_restart, transaction_restart_relock) \ - x(transaction_restart, transaction_restart_relock_path) \ - x(transaction_restart, transaction_restart_relock_path_intent) \ - x(transaction_restart, transaction_restart_relock_after_fill) \ - x(transaction_restart, transaction_restart_too_many_iters) \ - x(transaction_restart, transaction_restart_lock_node_reused) \ - x(transaction_restart, transaction_restart_fill_relock) \ - x(transaction_restart, transaction_restart_fill_mem_alloc_fail)\ - x(transaction_restart, transaction_restart_mem_realloced) \ - x(transaction_restart, transaction_restart_in_traverse_all) \ - x(transaction_restart, transaction_restart_would_deadlock) \ - x(transaction_restart, transaction_restart_would_deadlock_write)\ - x(transaction_restart, transaction_restart_upgrade) \ - x(transaction_restart, transaction_restart_key_cache_upgrade) \ - x(transaction_restart, transaction_restart_key_cache_fill) \ - x(transaction_restart, transaction_restart_key_cache_raced) \ - x(transaction_restart, transaction_restart_key_cache_realloced)\ - x(transaction_restart, transaction_restart_journal_preres_get) \ - x(transaction_restart, transaction_restart_nested) \ - x(0, no_btree_node) \ - x(no_btree_node, no_btree_node_relock) \ - x(no_btree_node, no_btree_node_upgrade) \ - x(no_btree_node, no_btree_node_drop) \ - x(no_btree_node, no_btree_node_lock_root) \ - x(no_btree_node, no_btree_node_up) \ - x(no_btree_node, no_btree_node_down) \ - x(no_btree_node, no_btree_node_init) \ - x(no_btree_node, no_btree_node_cached) \ - x(0, lock_fail_node_reused) \ - x(0, lock_fail_root_changed) \ - x(0, journal_reclaim_would_deadlock) \ - x(0, fsck) \ - x(fsck, fsck_fix) \ - x(fsck, fsck_ignore) \ - x(fsck, fsck_errors_not_fixed) \ - x(fsck, fsck_repair_unimplemented) \ - x(fsck, fsck_repair_impossible) \ - x(0, need_snapshot_cleanup) \ - x(0, need_topology_repair) + x(0, open_buckets_empty) \ + x(0, freelist_empty) \ + x(BCH_ERR_freelist_empty, no_buckets_found) \ + x(0, insufficient_devices) \ + x(0, transaction_restart) \ + x(BCH_ERR_transaction_restart, transaction_restart_fault_inject) \ + x(BCH_ERR_transaction_restart, transaction_restart_relock) \ + x(BCH_ERR_transaction_restart, transaction_restart_relock_path) \ + x(BCH_ERR_transaction_restart, transaction_restart_relock_path_intent) \ + x(BCH_ERR_transaction_restart, transaction_restart_relock_after_fill) \ + x(BCH_ERR_transaction_restart, transaction_restart_too_many_iters) \ + x(BCH_ERR_transaction_restart, transaction_restart_lock_node_reused) \ + x(BCH_ERR_transaction_restart, transaction_restart_fill_relock) \ + x(BCH_ERR_transaction_restart, transaction_restart_fill_mem_alloc_fail)\ + x(BCH_ERR_transaction_restart, transaction_restart_mem_realloced) \ + x(BCH_ERR_transaction_restart, transaction_restart_in_traverse_all) \ + x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock) \ + x(BCH_ERR_transaction_restart, transaction_restart_would_deadlock_write)\ + x(BCH_ERR_transaction_restart, transaction_restart_upgrade) \ + x(BCH_ERR_transaction_restart, transaction_restart_key_cache_upgrade) \ + x(BCH_ERR_transaction_restart, transaction_restart_key_cache_fill) \ + x(BCH_ERR_transaction_restart, transaction_restart_key_cache_raced) \ + x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\ + x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \ + x(BCH_ERR_transaction_restart, transaction_restart_nested) \ + x(0, no_btree_node) \ + x(BCH_ERR_no_btree_node, no_btree_node_relock) \ + x(BCH_ERR_no_btree_node, no_btree_node_upgrade) \ + x(BCH_ERR_no_btree_node, no_btree_node_drop) \ + x(BCH_ERR_no_btree_node, no_btree_node_lock_root) \ + x(BCH_ERR_no_btree_node, no_btree_node_up) \ + x(BCH_ERR_no_btree_node, no_btree_node_down) \ + x(BCH_ERR_no_btree_node, no_btree_node_init) \ + x(BCH_ERR_no_btree_node, no_btree_node_cached) \ + x(0, lock_fail_node_reused) \ + x(0, lock_fail_root_changed) \ + x(0, journal_reclaim_would_deadlock) \ + x(0, fsck) \ + x(BCH_ERR_fsck, fsck_fix) \ + x(BCH_ERR_fsck, fsck_ignore) \ + x(BCH_ERR_fsck, fsck_errors_not_fixed) \ + x(BCH_ERR_fsck, fsck_repair_unimplemented) \ + x(BCH_ERR_fsck, fsck_repair_impossible) \ + x(0, need_snapshot_cleanup) \ + x(0, need_topology_repair) enum bch_errcode { BCH_ERR_START = 2048, @@ -71,4 +71,11 @@ static inline bool _bch2_err_matches(int err, int class) _bch2_err_matches(_err, _class); \ }) +int __bch2_err_class(int); + +static inline long bch2_err_class(long err) +{ + return err < 0 ? __bch2_err_class(err) : err; +} + #endif /* _BCACHFES_ERRCODE_H */ diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 9f1ecb8d7b3b..c83e1de9a39a 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -1186,7 +1186,7 @@ int bch2_read_folio(struct file *file, struct folio *folio) ret = bch2_read_single_page(page, page->mapping); folio_unlock(folio); - return ret; + return bch2_err_class(ret); } /* writepages: */ @@ -1465,7 +1465,7 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc if (w.io) bch2_writepage_do_io(&w); blk_finish_plug(&plug); - return ret; + return bch2_err_class(ret); } /* buffered writes: */ @@ -1550,7 +1550,7 @@ err_unlock: bch2_pagecache_add_put(&inode->ei_pagecache_lock); kfree(res); *fsdata = NULL; - return ret; + return bch2_err_class(ret); } int bch2_write_end(struct file *file, struct address_space *mapping, @@ -1975,7 +1975,7 @@ ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) iocb->ki_pos, iocb->ki_pos + count - 1); if (ret < 0) - return ret; + goto out; } file_accessed(file); @@ -1991,8 +1991,8 @@ ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) ret = generic_file_read_iter(iocb, iter); bch2_pagecache_add_put(&inode->ei_pagecache_lock); } - - return ret; +out: + return bch2_err_class(ret); } /* O_DIRECT writes */ @@ -2224,6 +2224,9 @@ err: /* inode->i_dio_count is our ref on inode and thus bch_fs */ inode_dio_end(&inode->v); + if (ret < 0) + ret = bch2_err_class(ret); + if (!sync) { req->ki_complete(req, ret); ret = -EIOCBQUEUED; @@ -2332,8 +2335,10 @@ ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) struct bch_inode_info *inode = file_bch_inode(file); ssize_t ret; - if (iocb->ki_flags & IOCB_DIRECT) - return bch2_direct_write(iocb, from); + if (iocb->ki_flags & IOCB_DIRECT) { + ret = bch2_direct_write(iocb, from); + goto out; + } inode_lock(&inode->v); @@ -2357,8 +2362,8 @@ unlock: if (ret > 0) ret = generic_write_sync(iocb, ret); - - return ret; +out: + return bch2_err_class(ret); } /* fsync: */ @@ -2392,7 +2397,7 @@ int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) ret2 = sync_inode_metadata(&inode->v, 1); ret3 = bch2_flush_inode(c, inode_inum(inode)); - return ret ?: ret2 ?: ret3; + return bch2_err_class(ret ?: ret2 ?: ret3); } /* truncate: */ @@ -2698,7 +2703,7 @@ int bch2_truncate(struct mnt_idmap *idmap, ret = bch2_setattr_nonsize(idmap, inode, iattr); err: bch2_pagecache_block_put(&inode->ei_pagecache_lock); - return ret; + return bch2_err_class(ret); } /* fallocate: */ @@ -3128,7 +3133,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode, inode_unlock(&inode->v); percpu_ref_put(&c->writes); - return ret; + return bch2_err_class(ret); } loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, @@ -3206,7 +3211,7 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, err: bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); - return ret; + return bch2_err_class(ret); } /* fseek: */ @@ -3431,18 +3436,26 @@ err: loff_t bch2_llseek(struct file *file, loff_t offset, int whence) { + loff_t ret; + switch (whence) { case SEEK_SET: case SEEK_CUR: case SEEK_END: - return generic_file_llseek(file, offset, whence); + ret = generic_file_llseek(file, offset, whence); + break; case SEEK_DATA: - return bch2_seek_data(file, offset); + ret = bch2_seek_data(file, offset); + break; case SEEK_HOLE: - return bch2_seek_hole(file, offset); + ret = bch2_seek_hole(file, offset); + break; + default: + ret = -EINVAL; + break; } - return -EINVAL; + return bch2_err_class(ret); } void bch2_fs_fsio_exit(struct bch_fs *c) diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index de94895ace9f..3df2f5f3d1ea 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -455,51 +455,67 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; + long ret; switch (cmd) { case FS_IOC_GETFLAGS: - return bch2_ioc_getflags(inode, (int __user *) arg); + ret = bch2_ioc_getflags(inode, (int __user *) arg); + break; case FS_IOC_SETFLAGS: - return bch2_ioc_setflags(c, file, inode, (int __user *) arg); + ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg); + break; case FS_IOC_FSGETXATTR: - return bch2_ioc_fsgetxattr(inode, (void __user *) arg); + ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg); + break; + case FS_IOC_FSSETXATTR: - return bch2_ioc_fssetxattr(c, file, inode, - (void __user *) arg); + ret = bch2_ioc_fssetxattr(c, file, inode, + (void __user *) arg); + break; case BCHFS_IOC_REINHERIT_ATTRS: - return bch2_ioc_reinherit_attrs(c, file, inode, - (void __user *) arg); + ret = bch2_ioc_reinherit_attrs(c, file, inode, + (void __user *) arg); + break; case FS_IOC_GETVERSION: - return -ENOTTY; + ret = -ENOTTY; + break; + case FS_IOC_SETVERSION: - return -ENOTTY; + ret = -ENOTTY; + break; case FS_IOC_GOINGDOWN: - return bch2_ioc_goingdown(c, (u32 __user *) arg); + ret = bch2_ioc_goingdown(c, (u32 __user *) arg); + break; case BCH_IOCTL_SUBVOLUME_CREATE: { struct bch_ioctl_subvolume i; - if (copy_from_user(&i, (void __user *) arg, sizeof(i))) - return -EFAULT; - return bch2_ioctl_subvolume_create(c, file, i); + ret = copy_from_user(&i, (void __user *) arg, sizeof(i)) + ? -EFAULT + : bch2_ioctl_subvolume_create(c, file, i); + break; } case BCH_IOCTL_SUBVOLUME_DESTROY: { struct bch_ioctl_subvolume i; - if (copy_from_user(&i, (void __user *) arg, sizeof(i))) - return -EFAULT; - return bch2_ioctl_subvolume_destroy(c, file, i); + ret = copy_from_user(&i, (void __user *) arg, sizeof(i)) + ? -EFAULT + : bch2_ioctl_subvolume_destroy(c, file, i); + break; } default: - return bch2_fs_ioctl(c, cmd, (void __user *) arg); + ret = bch2_fs_ioctl(c, cmd, (void __user *) arg); + break; } + + return bch2_err_class(ret); } #ifdef CONFIG_COMPAT diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index af4941862187..66fcd3e28e0c 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -419,7 +419,7 @@ static int bch2_mknod(struct mnt_idmap *idmap, (subvol_inum) { 0 }, 0); if (IS_ERR(inode)) - return PTR_ERR(inode); + return bch2_err_class(PTR_ERR(inode)); d_instantiate(dentry, &inode->v); return 0; @@ -529,7 +529,7 @@ static int bch2_symlink(struct mnt_idmap *idmap, inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0, (subvol_inum) { 0 }, BCH_CREATE_TMPFILE); if (unlikely(IS_ERR(inode))) - return PTR_ERR(inode); + return bch2_err_class(PTR_ERR(inode)); inode_lock(&inode->v); ret = page_symlink(&inode->v, symname, strlen(symname) + 1); @@ -769,7 +769,7 @@ err_trans: err: mutex_unlock(&inode->ei_update_lock); - return ret; + return bch2_err_class(ret); } static int bch2_getattr(struct mnt_idmap *idmap, @@ -839,7 +839,7 @@ static int bch2_tmpfile(struct mnt_idmap *idmap, (subvol_inum) { 0 }, BCH_CREATE_TMPFILE); if (IS_ERR(inode)) - return PTR_ERR(inode); + return bch2_err_class(PTR_ERR(inode)); d_mark_tmpfile(file, &inode->v); d_instantiate(file->f_path.dentry, &inode->v); @@ -1454,7 +1454,7 @@ static int bch2_vfs_write_inode(struct inode *vinode, ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); - return ret; + return bch2_err_class(ret); } static void bch2_evict_inode(struct inode *vinode) @@ -1558,6 +1558,7 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) static int bch2_sync_fs(struct super_block *sb, int wait) { struct bch_fs *c = sb->s_fs_info; + int ret; if (c->opts.journal_flush_disabled) return 0; @@ -1567,7 +1568,8 @@ static int bch2_sync_fs(struct super_block *sb, int wait) return 0; } - return bch2_journal_flush(&c->journal); + ret = bch2_journal_flush(&c->journal); + return bch2_err_class(ret); } static struct bch_fs *bch2_path_to_fs(const char *path) @@ -1623,7 +1625,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) ret = bch2_parse_mount_opts(c, &opts, data); if (ret) - return ret; + goto err; if (opts.read_only != c->opts.read_only) { down_write(&c->state_lock); @@ -1637,7 +1639,8 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) if (ret) { bch_err(c, "error going rw: %i", ret); up_write(&c->state_lock); - return -EINVAL; + ret = -EINVAL; + goto err; } sb->s_flags &= ~SB_RDONLY; @@ -1650,8 +1653,8 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) if (opts.errors >= 0) c->opts.errors = opts.errors; - - return ret; +err: + return bch2_err_class(ret); } static int bch2_show_devname(struct seq_file *seq, struct dentry *root) diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 96c107e0508e..50b3ba92c5ae 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -40,14 +40,14 @@ #include "util.h" #define SYSFS_OPS(type) \ -const struct sysfs_ops type ## _sysfs_ops = { \ +const struct sysfs_ops type ## _sysfs_ops = { \ .show = type ## _show, \ .store = type ## _store \ } #define SHOW(fn) \ static ssize_t fn ## _to_text(struct printbuf *, \ - struct kobject *, struct attribute *);\ + struct kobject *, struct attribute *); \ \ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ char *buf) \ @@ -66,15 +66,24 @@ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ memcpy(buf, out.buf, ret); \ } \ printbuf_exit(&out); \ - return ret; \ + return bch2_err_class(ret); \ } \ \ static ssize_t fn ## _to_text(struct printbuf *out, struct kobject *kobj,\ struct attribute *attr) #define STORE(fn) \ +static ssize_t fn ## _store_inner(struct kobject *, struct attribute *,\ + const char *, size_t); \ + \ static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\ const char *buf, size_t size) \ +{ \ + return bch2_err_class(fn##_store_inner(kobj, attr, buf, size)); \ +} \ + \ +static ssize_t fn ## _store_inner(struct kobject *kobj, struct attribute *attr,\ + const char *buf, size_t size) #define __sysfs_attribute(_name, _mode) \ static struct attribute sysfs_##_name = \ diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 37793b3357d3..2b9fb4941e9f 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -350,17 +350,19 @@ err: bch2_trans_exit(&trans); if (ret) - return ret; + goto out; ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, false); if (ret) - return ret; + goto out; ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, true); if (ret) - return ret; + goto out; return buf.used; +out: + return bch2_err_class(ret); } static int bch2_xattr_get_handler(const struct xattr_handler *handler, @@ -369,8 +371,10 @@ static int bch2_xattr_get_handler(const struct xattr_handler *handler, { struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; + int ret; - return bch2_xattr_get(c, inode, name, buffer, size, handler->flags); + ret = bch2_xattr_get(c, inode, name, buffer, size, handler->flags); + return bch2_err_class(ret); } static int bch2_xattr_set_handler(const struct xattr_handler *handler, @@ -382,11 +386,13 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler, struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); + int ret; - return bch2_trans_do(c, NULL, NULL, 0, + ret = bch2_trans_do(c, NULL, NULL, 0, bch2_xattr_set(&trans, inode_inum(inode), &hash, name, value, size, handler->flags, flags)); + return bch2_err_class(ret); } static const struct xattr_handler bch_xattr_user_handler = { -- cgit v1.2.3 From 3e3e02e6bce627ed9e3a5d9fd3118e6569dc2548 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 19 Oct 2022 18:31:33 -0400 Subject: bcachefs: Assorted checkpatch fixes checkpatch.pl gives lots of warnings that we don't want - suggested ignore list: ASSIGN_IN_IF UNSPECIFIED_INT - bcachefs coding style prefers single token type names NEW_TYPEDEFS - typedefs are occasionally good FUNCTION_ARGUMENTS - we prefer to look at functions in .c files (hopefully with docbook documentation), not .h file prototypes MULTISTATEMENT_MACRO_USE_DO_WHILE - we have _many_ x-macros and other macros where we can't do this Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 2 +- fs/bcachefs/alloc_foreground.c | 2 +- fs/bcachefs/bcachefs.h | 2 +- fs/bcachefs/bcachefs_format.h | 22 +++++++++++----------- fs/bcachefs/bkey.c | 4 ++-- fs/bcachefs/bkey.h | 5 +++-- fs/bcachefs/bset.c | 4 ++-- fs/bcachefs/btree_cache.c | 4 ++-- fs/bcachefs/btree_gc.c | 2 +- fs/bcachefs/btree_iter.c | 4 ++-- fs/bcachefs/btree_key_cache.c | 10 +++++----- fs/bcachefs/btree_locking.c | 2 +- fs/bcachefs/btree_update_interior.c | 2 +- fs/bcachefs/buckets.c | 8 ++++---- fs/bcachefs/checksum.c | 2 +- fs/bcachefs/data_update.c | 2 +- fs/bcachefs/debug.c | 2 +- fs/bcachefs/dirent.c | 2 +- fs/bcachefs/extents.c | 4 ++-- fs/bcachefs/fs-common.c | 4 ++-- fs/bcachefs/fs-io.c | 2 +- fs/bcachefs/fs.c | 7 +++---- fs/bcachefs/fsck.c | 3 ++- fs/bcachefs/inode.c | 2 +- fs/bcachefs/journal.c | 12 ++++++------ fs/bcachefs/journal_reclaim.c | 2 +- fs/bcachefs/journal_sb.c | 4 ++-- fs/bcachefs/move.c | 4 ++-- fs/bcachefs/recovery.c | 4 ++-- fs/bcachefs/siphash.c | 2 +- fs/bcachefs/super-io.c | 6 ++---- fs/bcachefs/super.c | 9 ++++----- fs/bcachefs/util.c | 2 -- 33 files changed, 72 insertions(+), 76 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 7edebeed779e..2bf58aa89f71 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -173,7 +173,7 @@ bch2_acl_to_xattr(struct btree_trans *trans, bkey_xattr_init(&xattr->k_i); xattr->k.u64s = u64s; xattr->v.x_type = acl_to_xattr_type(type); - xattr->v.x_name_len = 0, + xattr->v.x_name_len = 0; xattr->v.x_val_len = cpu_to_le16(acl_len); acl_header = xattr_val(&xattr->v); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 5d7231979024..ccc6be5a002f 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -490,7 +490,7 @@ again: * bch_bucket_alloc - allocate a single bucket from a specific device * * Returns index of bucket on success, 0 on failure - * */ + */ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, struct bch_dev *ca, enum alloc_reserve reserve, diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 7ca1aa3a847f..544621dd4af4 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -107,7 +107,7 @@ * * BTREE NODES: * - * Our unit of allocation is a bucket, and we we can't arbitrarily allocate and + * Our unit of allocation is a bucket, and we can't arbitrarily allocate and * free smaller than a bucket - so, that's how big our btree nodes are. * * (If buckets are really big we'll only use part of the bucket for a btree node diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 5471b797be93..2047484ebe4b 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -340,7 +340,7 @@ static inline void bkey_init(struct bkey *k) * number. * * - WHITEOUT: for hash table btrees -*/ + */ #define BCH_BKEY_TYPES() \ x(deleted, 0) \ x(whiteout, 1) \ @@ -783,16 +783,16 @@ enum { * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL * flags) */ - __BCH_INODE_SYNC = 0, - __BCH_INODE_IMMUTABLE = 1, - __BCH_INODE_APPEND = 2, - __BCH_INODE_NODUMP = 3, - __BCH_INODE_NOATIME = 4, - - __BCH_INODE_I_SIZE_DIRTY= 5, - __BCH_INODE_I_SECTORS_DIRTY= 6, - __BCH_INODE_UNLINKED = 7, - __BCH_INODE_BACKPTR_UNTRUSTED = 8, + __BCH_INODE_SYNC = 0, + __BCH_INODE_IMMUTABLE = 1, + __BCH_INODE_APPEND = 2, + __BCH_INODE_NODUMP = 3, + __BCH_INODE_NOATIME = 4, + + __BCH_INODE_I_SIZE_DIRTY = 5, + __BCH_INODE_I_SECTORS_DIRTY = 6, + __BCH_INODE_UNLINKED = 7, + __BCH_INODE_BACKPTR_UNTRUSTED = 8, /* bits 20+ reserved for packed fields below: */ }; diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index 52af6f370eb9..e09a5e3fd709 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -1113,10 +1113,10 @@ int bch2_bkey_cmp_packed(const struct btree *b, if (bkey_packed(l)) { __bkey_unpack_key_format_checked(b, &unpacked, l); - l = (void*) &unpacked; + l = (void *) &unpacked; } else if (bkey_packed(r)) { __bkey_unpack_key_format_checked(b, &unpacked, r); - r = (void*) &unpacked; + r = (void *) &unpacked; } return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p); diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 2e7e6b6b4af7..d1d9b5d7e2c9 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -142,8 +142,9 @@ int bkey_cmp_left_packed(const struct btree *b, } /* - * we prefer to pass bpos by ref, but it's often enough terribly convenient to - * pass it by by val... as much as I hate c++, const ref would be nice here: + * The compiler generates better code when we pass bpos by ref, but it's often + * enough terribly convenient to pass it by val... as much as I hate c++, const + * ref would be nice here: */ __pure __flatten static inline int bkey_cmp_left_packed_byval(const struct btree *b, diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index f29fb9327cf7..e92737eb34e6 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -953,7 +953,7 @@ static void bch2_bset_fix_lookup_table(struct btree *b, t->size -= j - l; for (j = l; j < t->size; j++) - rw_aux_tree(b, t)[j].offset += shift; + rw_aux_tree(b, t)[j].offset += shift; EBUG_ON(l < t->size && rw_aux_tree(b, t)[l].offset == @@ -1254,7 +1254,7 @@ void bch2_btree_node_iter_push(struct btree_node_iter *iter, bch2_btree_node_iter_sort(iter, b); } -noinline __flatten __attribute__((cold)) +noinline __flatten __cold static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, struct btree *b, struct bpos *search) { diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 75bc18466e75..135c3ea1377d 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -819,7 +819,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path * if (likely(c->opts.btree_node_mem_ptr_optimization && b && b->hash_val == btree_ptr_hash_val(k))) - goto lock_node; + goto lock_node; retry: b = btree_cache_find(bc, k); if (unlikely(!b)) { @@ -1059,7 +1059,7 @@ wait_on_io: /* XXX we're called from btree_gc which will be holding other btree * nodes locked - * */ + */ __bch2_btree_node_wait_on_read(b); __bch2_btree_node_wait_on_write(b); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 5d19029477cf..1bc5bded0546 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -318,7 +318,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b, " node %s", bch2_btree_ids[b->c.btree_id], b->c.level, buf1.buf, buf2.buf)) - ret = set_node_min(c, cur, expected_start); + ret = set_node_min(c, cur, expected_start); } out: fsck_err: diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 4402fcee26e3..51eac08e9eea 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2832,7 +2832,7 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) BUG_ON(trans->used_mempool); #ifdef __KERNEL__ - p = this_cpu_xchg(c->btree_paths_bufs->path , NULL); + p = this_cpu_xchg(c->btree_paths_bufs->path, NULL); #endif if (!p) p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS); @@ -3002,7 +3002,7 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out, rcu_read_lock(); owner = READ_ONCE(b->lock.owner); - pid = owner ? owner->pid : 0;; + pid = owner ? owner->pid : 0; rcu_read_unlock(); prt_tab(out); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 9a5729309b8f..179669dbd688 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "btree_cache.h" @@ -315,7 +316,7 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path) bool was_new = true; ck = bkey_cached_alloc(trans, path); - if (unlikely(IS_ERR(ck))) + if (IS_ERR(ck)) return ck; if (unlikely(!ck)) { @@ -435,7 +436,7 @@ err: return ret; } -noinline static int +static noinline int bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path, unsigned flags) { @@ -616,7 +617,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, * Since journal reclaim depends on us making progress here, and the * allocator/copygc depend on journal reclaim making progress, we need * to be using alloc reserves: - * */ + */ ret = bch2_btree_iter_traverse(&b_iter) ?: bch2_trans_update(trans, &b_iter, ck->k, BTREE_UPDATE_KEY_CACHE_RECLAIM| @@ -1019,8 +1020,7 @@ void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache * void bch2_btree_key_cache_exit(void) { - if (bch2_key_cache) - kmem_cache_destroy(bch2_key_cache); + kmem_cache_destroy(bch2_key_cache); } int __init bch2_btree_key_cache_init(void) diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 5e9424fbc3be..1530457f0e69 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -681,7 +681,7 @@ int bch2_trans_relock(struct btree_trans *trans) struct btree_path *path; if (unlikely(trans->restarted)) - return - ((int) trans->restarted); + return -((int) trans->restarted); trans_for_each_path(trans, path) if (path->should_be_locked && diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 9680d83f9036..30b7c46cb86b 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2047,7 +2047,7 @@ static int async_btree_node_rewrite_trans(struct btree_trans *trans, goto out; ret = bch2_btree_node_rewrite(trans, &iter, b, 0); -out : +out: bch2_trans_iter_exit(trans, &iter); return ret; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 1a1790ac01ae..17a1e4767077 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -933,7 +933,7 @@ int bch2_mark_extent(struct btree_trans *trans, { u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -1152,7 +1152,7 @@ int bch2_mark_reservation(struct btree_trans *trans, unsigned flags) { struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bch_fs_usage __percpu *fs_usage; unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; s64 sectors = (s64) k.k->size; @@ -1231,7 +1231,7 @@ int bch2_mark_reflink_p(struct btree_trans *trans, unsigned flags) { struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct reflink_gc *ref; size_t l, r, m; @@ -2102,5 +2102,5 @@ int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) return -ENOMEM; } - return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);; + return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets); } diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c index b5850a761b91..3268e8d48603 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -131,7 +131,7 @@ static inline int do_encrypt(struct crypto_sync_skcipher *tfm, size_t orig_len = len; int ret, i; - sg = kmalloc_array(sizeof(*sg), pages, GFP_KERNEL); + sg = kmalloc_array(pages, sizeof(*sg), GFP_KERNEL); if (!sg) return -ENOMEM; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index c606f075688f..927deb3943b5 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -315,7 +315,7 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m, bch2_write_op_init(&m->op, c, io_opts); m->op.pos = bkey_start_pos(k.k); m->op.version = k.k->version; - m->op.target = data_opts.target, + m->op.target = data_opts.target; m->op.write_point = wp; m->op.flags |= BCH_WRITE_PAGES_STABLE| BCH_WRITE_PAGES_OWNED| diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index a9e4180d6a80..16be8d3db2ad 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -476,7 +476,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, if (i->iter < tbl->size) { rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash) bch2_cached_btree_node_to_text(&i->buf, c, b); - i->iter++;; + i->iter++; } else { done = true; } diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 4d942d224a08..288f46b55876 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -103,7 +103,7 @@ int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_u64s(k.k) > dirent_val_u64s(len)) { prt_printf(err, "value too big (%zu > %u)", - bkey_val_u64s(k.k),dirent_val_u64s(len)); + bkey_val_u64s(k.k), dirent_val_u64s(len)); return -EINVAL; } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 38836c1990aa..bb1b862bfa65 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -291,7 +291,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) if (lp.crc.offset + lp.crc.live_size + rp.crc.live_size <= lp.crc.uncompressed_size) { /* can use left extent's crc entry */ - } else if (lp.crc.live_size <= rp.crc.offset ) { + } else if (lp.crc.live_size <= rp.crc.offset) { /* can use right extent's crc entry */ } else { /* check if checksums can be merged: */ @@ -350,7 +350,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) if (crc_l.offset + crc_l.live_size + crc_r.live_size <= crc_l.uncompressed_size) { /* can use left extent's crc entry */ - } else if (crc_l.live_size <= crc_r.offset ) { + } else if (crc_l.live_size <= crc_r.offset) { /* can use right extent's crc entry */ crc_r.offset -= crc_l.live_size; bch2_extent_crc_pack(entry_to_crc(en_l), crc_r, diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index e9dd1d13ec7e..1f2e1fc4f6b2 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -487,11 +487,11 @@ int bch2_rename_trans(struct btree_trans *trans, ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?: (src_dir.inum != dst_dir.inum ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u) - : 0 ) ?: + : 0) ?: bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?: (dst_inum.inum ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u) - : 0 ); + : 0); err: bch2_trans_iter_exit(trans, &dst_inode_iter); bch2_trans_iter_exit(trans, &src_inode_iter); diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 274dc78916f8..b1d53290f6ba 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -2724,7 +2724,7 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len truncate_pagecache_range(&inode->v, offset, end - 1); - if (block_start < block_end ) { + if (block_start < block_end) { s64 i_sectors_delta = 0; ret = bch2_fpunch(c, inode_inum(inode), diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 66fcd3e28e0c..485cb9cbcd51 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -528,7 +528,7 @@ static int bch2_symlink(struct mnt_idmap *idmap, inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0, (subvol_inum) { 0 }, BCH_CREATE_TMPFILE); - if (unlikely(IS_ERR(inode))) + if (IS_ERR(inode)) return bch2_err_class(PTR_ERR(inode)); inode_lock(&inode->v); @@ -1847,7 +1847,7 @@ got_sb: sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); c->vfs_sb = sb; - strlcpy(sb->s_id, c->name, sizeof(sb->s_id)); + strscpy(sb->s_id, c->name, sizeof(sb->s_id)); ret = super_setup_bdi(sb); if (ret) @@ -1918,8 +1918,7 @@ MODULE_ALIAS_FS("bcachefs"); void bch2_vfs_exit(void) { unregister_filesystem(&bcache_fs_type); - if (bch2_inode_cache) - kmem_cache_destroy(bch2_inode_cache); + kmem_cache_destroy(bch2_inode_cache); } int __init bch2_vfs_init(void) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 12f2ef4417cb..ca95d85b7348 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2044,7 +2044,8 @@ static int add_nlink(struct bch_fs *c, struct nlink_table *t, { if (t->nr == t->size) { size_t new_size = max_t(size_t, 128UL, t->size * 2); - void *d = kvmalloc(new_size * sizeof(t->d[0]), GFP_KERNEL); + void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL); + if (!d) { bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", new_size); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 18cfad860ddf..4161cd850eb8 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -314,7 +314,7 @@ static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err) return -EINVAL; } - if (bch2_inode_unpack(k, &unpacked)){ + if (bch2_inode_unpack(k, &unpacked)) { prt_printf(err, "invalid variable length fields"); return -EINVAL; } diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 97c1ecb65dbd..ed3ed3072db1 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -738,7 +738,7 @@ int bch2_journal_log_msg(struct journal *j, const char *fmt, ...) return ret; entry = container_of(journal_res_entry(j, &res), - struct jset_entry_log, entry);; + struct jset_entry_log, entry); memset(entry, 0, u64s * sizeof(u64)); entry->entry.type = BCH_JSET_ENTRY_log; entry->entry.u64s = u64s - 1; @@ -795,10 +795,10 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, bch2_journal_block(&c->journal); } - bu = kzalloc(nr_want * sizeof(*bu), GFP_KERNEL); - ob = kzalloc(nr_want * sizeof(*ob), GFP_KERNEL); - new_buckets = kzalloc(nr * sizeof(u64), GFP_KERNEL); - new_bucket_seq = kzalloc(nr * sizeof(u64), GFP_KERNEL); + bu = kcalloc(nr_want, sizeof(*bu), GFP_KERNEL); + ob = kcalloc(nr_want, sizeof(*ob), GFP_KERNEL); + new_buckets = kcalloc(nr, sizeof(u64), GFP_KERNEL); + new_bucket_seq = kcalloc(nr, sizeof(u64), GFP_KERNEL); if (!bu || !ob || !new_buckets || !new_bucket_seq) { ret = -ENOMEM; goto err_unblock; @@ -1264,7 +1264,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) rcu_read_lock(); s = READ_ONCE(j->reservations); - prt_printf(out, "dirty journal entries:\t%llu/%llu\n",fifo_used(&j->pin), j->pin.size); + prt_printf(out, "dirty journal entries:\t%llu/%llu\n", fifo_used(&j->pin), j->pin.size); prt_printf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j)); prt_printf(out, "seq_ondisk:\t\t%llu\n", j->seq_ondisk); prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j)); diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index a4f9d01d33cc..b683a13dbf87 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -232,7 +232,7 @@ void bch2_journal_space_available(struct journal *j) if ((j->space[journal_space_clean_ondisk].next_entry < j->space[journal_space_clean_ondisk].total) && (clean - clean_ondisk <= total / 8) && - (clean_ondisk * 2 > clean )) + (clean_ondisk * 2 > clean)) set_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags); else clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags); diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c index cfdbd92d2164..c19db0425dd7 100644 --- a/fs/bcachefs/journal_sb.c +++ b/fs/bcachefs/journal_sb.c @@ -31,7 +31,7 @@ static int bch2_sb_journal_validate(struct bch_sb *sb, if (!nr) return 0; - b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL); + b = kmalloc_array(nr, sizeof(u64), GFP_KERNEL); if (!b) return -ENOMEM; @@ -114,7 +114,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, if (!nr) return 0; - b = kmalloc_array(sizeof(*b), nr, GFP_KERNEL); + b = kmalloc_array(nr, sizeof(*b), GFP_KERNEL); if (!b) return -ENOMEM; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index f00c57c8e7a3..7a9d1e4466c5 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -479,7 +479,7 @@ static int __bch2_move_data(struct moving_context *ctxt, /* * The iterator gets unlocked by __bch2_read_extent - need to * save a copy of @k elsewhere: - */ + */ bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); @@ -667,7 +667,7 @@ static bool migrate_pred(struct bch_fs *c, void *arg, i++; } - return data_opts->rewrite_ptrs != 0;; + return data_opts->rewrite_ptrs != 0; } static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index ea8cc636a9e0..580ff915d0e6 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -224,7 +224,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, .size = max_t(size_t, keys->size, 8) * 2, }; - new_keys.d = kvmalloc(sizeof(new_keys.d[0]) * new_keys.size, GFP_KERNEL); + new_keys.d = kvmalloc_array(new_keys.size, sizeof(new_keys.d[0]), GFP_KERNEL); if (!new_keys.d) { bch_err(c, "%s: error allocating new key array (size %zu)", __func__, new_keys.size); @@ -501,7 +501,7 @@ static int journal_keys_sort(struct bch_fs *c) keys->size = roundup_pow_of_two(nr_keys); - keys->d = kvmalloc(sizeof(keys->d[0]) * keys->size, GFP_KERNEL); + keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL); if (!keys->d) return -ENOMEM; diff --git a/fs/bcachefs/siphash.c b/fs/bcachefs/siphash.c index c062edb3fbc2..dc1a27cc31cd 100644 --- a/fs/bcachefs/siphash.c +++ b/fs/bcachefs/siphash.c @@ -160,7 +160,7 @@ u64 SipHash_End(SIPHASH_CTX *ctx, int rc, int rf) r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]); memset(ctx, 0, sizeof(*ctx)); - return (r); + return r; } u64 SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 2a347efdbd83..42e3ce7c0f8c 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -101,8 +101,7 @@ void bch2_sb_field_delete(struct bch_sb_handle *sb, void bch2_free_super(struct bch_sb_handle *sb) { - if (sb->bio) - kfree(sb->bio); + kfree(sb->bio); if (!IS_ERR_OR_NULL(sb->bdev)) blkdev_put(sb->bdev, sb->holder); kfree(sb->holder); @@ -151,8 +150,7 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0); - if (sb->bio) - kfree(sb->bio); + kfree(sb->bio); sb->bio = bio; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index c69d64555339..8ee0783a1e78 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -461,8 +461,8 @@ static void __bch2_fs_free(struct bch_fs *c) kfree(c->unused_inode_hints); free_heap(&c->copygc_heap); - if (c->io_complete_wq ) - destroy_workqueue(c->io_complete_wq ); + if (c->io_complete_wq) + destroy_workqueue(c->io_complete_wq); if (c->copygc_wq) destroy_workqueue(c->copygc_wq); if (c->btree_io_complete_wq) @@ -712,7 +712,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) goto err; pr_uuid(&name, c->sb.user_uuid.b); - strlcpy(c->name, name.buf, sizeof(c->name)); + strscpy(c->name, name.buf, sizeof(c->name)); printbuf_exit(&name); ret = name.allocation_failure ? -ENOMEM : 0; @@ -1786,9 +1786,8 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) } ret = bch2_trans_mark_dev_sb(c, ca); - if (ret) { + if (ret) goto err; - } mutex_lock(&c->sb_lock); mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx]; diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index a58239fb2a6d..8b2eef24498e 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -789,8 +789,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) } } -#include "eytzinger.h" - static int alignment_ok(const void *base, size_t align) { return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || -- cgit v1.2.3 From a7ecd30c8300624448c4e66cd7a7e7209b96ea61 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 4 Nov 2022 13:25:57 -0400 Subject: bcachefs: Factor out two_state_shared_lock We have a unique lock used for controlling adding to the pagecache: the lock has two states, where both states are shared - the lock may be held multiple times for either state - but not both states at the same time. This is exactly what we need for nocow mode locking, so this patch pulls it out of fs.c into its own file. Signed-off-by: Kent Overstreet --- fs/bcachefs/Makefile | 1 + fs/bcachefs/fs-io.c | 50 +++++++++++++++++----------------- fs/bcachefs/fs.c | 54 +------------------------------------ fs/bcachefs/fs.h | 35 ++++++++---------------- fs/bcachefs/two_state_shared_lock.c | 33 +++++++++++++++++++++++ fs/bcachefs/two_state_shared_lock.h | 28 +++++++++++++++++++ 6 files changed, 99 insertions(+), 102 deletions(-) create mode 100644 fs/bcachefs/two_state_shared_lock.c create mode 100644 fs/bcachefs/two_state_shared_lock.h (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 444e79c62b50..966c9b9a74fc 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -65,6 +65,7 @@ bcachefs-y := \ sysfs.o \ tests.o \ trace.o \ + two_state_shared_lock.o \ util.o \ varint.o \ xattr.o diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 3c3fa95215ac..ab5b4e086e0a 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -751,25 +751,25 @@ vm_fault_t bch2_page_fault(struct vm_fault *vmf) if (fdm > mapping) { struct bch_inode_info *fdm_host = to_bch_ei(fdm->host); - if (bch2_pagecache_add_tryget(&inode->ei_pagecache_lock)) + if (bch2_pagecache_add_tryget(inode)) goto got_lock; - bch2_pagecache_block_put(&fdm_host->ei_pagecache_lock); + bch2_pagecache_block_put(fdm_host); - bch2_pagecache_add_get(&inode->ei_pagecache_lock); - bch2_pagecache_add_put(&inode->ei_pagecache_lock); + bch2_pagecache_add_get(inode); + bch2_pagecache_add_put(inode); - bch2_pagecache_block_get(&fdm_host->ei_pagecache_lock); + bch2_pagecache_block_get(fdm_host); /* Signal that lock has been dropped: */ set_fdm_dropped_locks(); return VM_FAULT_SIGBUS; } - bch2_pagecache_add_get(&inode->ei_pagecache_lock); + bch2_pagecache_add_get(inode); got_lock: ret = filemap_fault(vmf); - bch2_pagecache_add_put(&inode->ei_pagecache_lock); + bch2_pagecache_add_put(inode); return ret; } @@ -797,7 +797,7 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) * a write_invalidate_inode_pages_range() that works without dropping * page lock before invalidating page */ - bch2_pagecache_add_get(&inode->ei_pagecache_lock); + bch2_pagecache_add_get(inode); lock_page(page); isize = i_size_read(&inode->v); @@ -830,7 +830,7 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) wait_for_stable_page(page); ret = VM_FAULT_LOCKED; out: - bch2_pagecache_add_put(&inode->ei_pagecache_lock); + bch2_pagecache_add_put(inode); sb_end_pagefault(inode->v.i_sb); return ret; @@ -1098,7 +1098,7 @@ void bch2_readahead(struct readahead_control *ractl) bch2_trans_init(&trans, c, 0, 0); - bch2_pagecache_add_get(&inode->ei_pagecache_lock); + bch2_pagecache_add_get(inode); while ((page = readpage_iter_next(&readpages_iter))) { pgoff_t index = readpages_iter.offset + readpages_iter.idx; @@ -1121,7 +1121,7 @@ void bch2_readahead(struct readahead_control *ractl) &readpages_iter); } - bch2_pagecache_add_put(&inode->ei_pagecache_lock); + bch2_pagecache_add_put(inode); bch2_trans_exit(&trans); kfree(readpages_iter.pages); @@ -1483,7 +1483,7 @@ int bch2_write_begin(struct file *file, struct address_space *mapping, bch2_page_reservation_init(c, inode, res); *fsdata = res; - bch2_pagecache_add_get(&inode->ei_pagecache_lock); + bch2_pagecache_add_get(inode); page = grab_cache_page_write_begin(mapping, index); if (!page) @@ -1540,7 +1540,7 @@ err: put_page(page); *pagep = NULL; err_unlock: - bch2_pagecache_add_put(&inode->ei_pagecache_lock); + bch2_pagecache_add_put(inode); kfree(res); *fsdata = NULL; return bch2_err_class(ret); @@ -1584,7 +1584,7 @@ int bch2_write_end(struct file *file, struct address_space *mapping, unlock_page(page); put_page(page); - bch2_pagecache_add_put(&inode->ei_pagecache_lock); + bch2_pagecache_add_put(inode); bch2_page_reservation_put(c, inode, res); kfree(res); @@ -1753,7 +1753,7 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter) ssize_t written = 0; int ret = 0; - bch2_pagecache_add_get(&inode->ei_pagecache_lock); + bch2_pagecache_add_get(inode); do { unsigned offset = pos & (PAGE_SIZE - 1); @@ -1811,7 +1811,7 @@ again: balance_dirty_pages_ratelimited(mapping); } while (iov_iter_count(iter)); - bch2_pagecache_add_put(&inode->ei_pagecache_lock); + bch2_pagecache_add_put(inode); return written ? written : ret; } @@ -1991,9 +1991,9 @@ ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) if (ret >= 0) iocb->ki_pos += ret; } else { - bch2_pagecache_add_get(&inode->ei_pagecache_lock); + bch2_pagecache_add_get(inode); ret = generic_file_read_iter(iocb, iter); - bch2_pagecache_add_put(&inode->ei_pagecache_lock); + bch2_pagecache_add_put(inode); } out: return bch2_err_class(ret); @@ -2149,7 +2149,7 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio) return -EIOCBQUEUED; } - bch2_pagecache_block_put(&inode->ei_pagecache_lock); + bch2_pagecache_block_put(inode); bch2_quota_reservation_put(c, inode, &dio->quota_res); if (dio->free_iov) @@ -2357,7 +2357,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) goto err; inode_dio_begin(&inode->v); - bch2_pagecache_block_get(&inode->ei_pagecache_lock); + bch2_pagecache_block_get(inode); extending = req->ki_pos + iter->count > inode->v.i_size; if (!extending) { @@ -2403,7 +2403,7 @@ err: inode_unlock(&inode->v); return ret; err_put_bio: - bch2_pagecache_block_put(&inode->ei_pagecache_lock); + bch2_pagecache_block_put(inode); bch2_quota_reservation_put(c, inode, &dio->quota_res); bio_put(bio); inode_dio_end(&inode->v); @@ -2704,7 +2704,7 @@ int bch2_truncate(struct mnt_idmap *idmap, } inode_dio_wait(&inode->v); - bch2_pagecache_block_get(&inode->ei_pagecache_lock); + bch2_pagecache_block_get(inode); ret = bch2_inode_find_by_inum(c, inode_inum(inode), &inode_u); if (ret) @@ -2783,7 +2783,7 @@ int bch2_truncate(struct mnt_idmap *idmap, ret = bch2_setattr_nonsize(idmap, inode, iattr); err: - bch2_pagecache_block_put(&inode->ei_pagecache_lock); + bch2_pagecache_block_put(inode); return bch2_err_class(ret); } @@ -3195,7 +3195,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode, inode_lock(&inode->v); inode_dio_wait(&inode->v); - bch2_pagecache_block_get(&inode->ei_pagecache_lock); + bch2_pagecache_block_get(inode); ret = file_modified(file); if (ret) @@ -3212,7 +3212,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode, else ret = -EOPNOTSUPP; err: - bch2_pagecache_block_put(&inode->ei_pagecache_lock); + bch2_pagecache_block_put(inode); inode_unlock(&inode->v); percpu_ref_put(&c->writes); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 485cb9cbcd51..90297cfc7934 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -43,58 +43,6 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, struct bch_subvolume *); -static void __pagecache_lock_put(struct pagecache_lock *lock, long i) -{ - BUG_ON(atomic_long_read(&lock->v) == 0); - - if (atomic_long_sub_return_release(i, &lock->v) == 0) - wake_up_all(&lock->wait); -} - -static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i) -{ - long v = atomic_long_read(&lock->v), old; - - do { - old = v; - - if (i > 0 ? v < 0 : v > 0) - return false; - } while ((v = atomic_long_cmpxchg_acquire(&lock->v, - old, old + i)) != old); - return true; -} - -static void __pagecache_lock_get(struct pagecache_lock *lock, long i) -{ - wait_event(lock->wait, __pagecache_lock_tryget(lock, i)); -} - -void bch2_pagecache_add_put(struct pagecache_lock *lock) -{ - __pagecache_lock_put(lock, 1); -} - -bool bch2_pagecache_add_tryget(struct pagecache_lock *lock) -{ - return __pagecache_lock_tryget(lock, 1); -} - -void bch2_pagecache_add_get(struct pagecache_lock *lock) -{ - __pagecache_lock_get(lock, 1); -} - -void bch2_pagecache_block_put(struct pagecache_lock *lock) -{ - __pagecache_lock_put(lock, -1); -} - -void bch2_pagecache_block_get(struct pagecache_lock *lock) -{ - __pagecache_lock_get(lock, -1); -} - void bch2_inode_update_after_write(struct btree_trans *trans, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, @@ -1410,7 +1358,7 @@ static struct inode *bch2_alloc_inode(struct super_block *sb) inode_init_once(&inode->v); mutex_init(&inode->ei_update_lock); - pagecache_lock_init(&inode->ei_pagecache_lock); + two_state_lock_init(&inode->ei_pagecache_lock); mutex_init(&inode->ei_quota_lock); return &inode->v; diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 73b96d0b5d83..4164d0669d70 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -6,31 +6,11 @@ #include "opts.h" #include "str_hash.h" #include "quota_types.h" +#include "two_state_shared_lock.h" #include #include -/* - * Two-state lock - can be taken for add or block - both states are shared, - * like read side of rwsem, but conflict with other state: - */ -struct pagecache_lock { - atomic_long_t v; - wait_queue_head_t wait; -}; - -static inline void pagecache_lock_init(struct pagecache_lock *lock) -{ - atomic_long_set(&lock->v, 0); - init_waitqueue_head(&lock->wait); -} - -void bch2_pagecache_add_put(struct pagecache_lock *); -bool bch2_pagecache_add_tryget(struct pagecache_lock *); -void bch2_pagecache_add_get(struct pagecache_lock *); -void bch2_pagecache_block_put(struct pagecache_lock *); -void bch2_pagecache_block_get(struct pagecache_lock *); - struct bch_inode_info { struct inode v; unsigned long ei_flags; @@ -38,7 +18,7 @@ struct bch_inode_info { struct mutex ei_update_lock; u64 ei_quota_reserved; unsigned long ei_last_dirtied; - struct pagecache_lock ei_pagecache_lock; + two_state_lock_t ei_pagecache_lock; struct mutex ei_quota_lock; struct bch_qid ei_qid; @@ -49,6 +29,13 @@ struct bch_inode_info { struct bch_inode_unpacked ei_inode; }; +#define bch2_pagecache_add_put(i) bch2_two_state_unlock(&i->ei_pagecache_lock, 0) +#define bch2_pagecache_add_tryget(i) bch2_two_state_trylock(&i->ei_pagecache_lock, 0) +#define bch2_pagecache_add_get(i) bch2_two_state_lock(&i->ei_pagecache_lock, 0) + +#define bch2_pagecache_block_put(i) bch2_two_state_unlock(&i->ei_pagecache_lock, 1) +#define bch2_pagecache_block_get(i) bch2_two_state_lock(&i->ei_pagecache_lock, 1) + static inline subvol_inum inode_inum(struct bch_inode_info *inode) { return (subvol_inum) { @@ -95,7 +82,7 @@ do { \ if ((_locks) & INODE_LOCK) \ down_write_nested(&a[i]->v.i_rwsem, i); \ if ((_locks) & INODE_PAGECACHE_BLOCK) \ - bch2_pagecache_block_get(&a[i]->ei_pagecache_lock);\ + bch2_pagecache_block_get(a[i]);\ if ((_locks) & INODE_UPDATE_LOCK) \ mutex_lock_nested(&a[i]->ei_update_lock, i);\ } \ @@ -113,7 +100,7 @@ do { \ if ((_locks) & INODE_LOCK) \ up_write(&a[i]->v.i_rwsem); \ if ((_locks) & INODE_PAGECACHE_BLOCK) \ - bch2_pagecache_block_put(&a[i]->ei_pagecache_lock);\ + bch2_pagecache_block_put(a[i]);\ if ((_locks) & INODE_UPDATE_LOCK) \ mutex_unlock(&a[i]->ei_update_lock); \ } \ diff --git a/fs/bcachefs/two_state_shared_lock.c b/fs/bcachefs/two_state_shared_lock.c new file mode 100644 index 000000000000..dc508d545de0 --- /dev/null +++ b/fs/bcachefs/two_state_shared_lock.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "two_state_shared_lock.h" + +void bch2_two_state_unlock(two_state_lock_t *lock, int s) +{ + long i = s ? 1 : -1; + + BUG_ON(atomic_long_read(&lock->v) == 0); + + if (atomic_long_sub_return_release(i, &lock->v) == 0) + wake_up_all(&lock->wait); +} + +bool bch2_two_state_trylock(two_state_lock_t *lock, int s) +{ + long i = s ? 1 : -1; + long v = atomic_long_read(&lock->v), old; + + do { + old = v; + + if (i > 0 ? v < 0 : v > 0) + return false; + } while ((v = atomic_long_cmpxchg_acquire(&lock->v, + old, old + i)) != old); + return true; +} + +void bch2_two_state_lock(two_state_lock_t *lock, int s) +{ + wait_event(lock->wait, bch2_two_state_trylock(lock, s)); +} diff --git a/fs/bcachefs/two_state_shared_lock.h b/fs/bcachefs/two_state_shared_lock.h new file mode 100644 index 000000000000..1b4f108908a1 --- /dev/null +++ b/fs/bcachefs/two_state_shared_lock.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_TWO_STATE_LOCK_H +#define _BCACHEFS_TWO_STATE_LOCK_H + +#include +#include +#include + +/* + * Two-state lock - can be taken for add or block - both states are shared, + * like read side of rwsem, but conflict with other state: + */ +typedef struct { + atomic_long_t v; + wait_queue_head_t wait; +} two_state_lock_t; + +static inline void two_state_lock_init(two_state_lock_t *lock) +{ + atomic_long_set(&lock->v, 0); + init_waitqueue_head(&lock->wait); +} + +void bch2_two_state_unlock(two_state_lock_t *, int); +bool bch2_two_state_trylock(two_state_lock_t *, int); +void bch2_two_state_lock(two_state_lock_t *, int); + +#endif /* _BCACHEFS_TWO_STATE_LOCK_H */ -- cgit v1.2.3 From 78c0b75c34209c471616566b3978eac4c1c53e99 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 19 Nov 2022 22:39:08 -0500 Subject: bcachefs: More errcode cleanup We shouldn't be overloading standard error codes now that we have provisions for bcachefs-specific errorcodes: this patch converts super.c and super-io.c to per error site errcodes, with a bit of cleanup. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 20 +++++----- fs/bcachefs/bkey_methods.c | 28 +++++++------- fs/bcachefs/dirent.c | 16 ++++---- fs/bcachefs/disk_groups.c | 13 +++---- fs/bcachefs/ec.c | 8 ++-- fs/bcachefs/errcode.h | 40 ++++++++++++++++++- fs/bcachefs/extents.c | 36 +++++++++--------- fs/bcachefs/fs.c | 7 +++- fs/bcachefs/inode.c | 26 ++++++------- fs/bcachefs/journal_sb.c | 4 +- fs/bcachefs/journal_seq_blacklist.c | 4 +- fs/bcachefs/lru.c | 2 +- fs/bcachefs/quota.c | 6 +-- fs/bcachefs/reflink.c | 4 +- fs/bcachefs/replicas.c | 10 ++--- fs/bcachefs/subvolume.c | 16 ++++---- fs/bcachefs/super-io.c | 68 ++++++++++++++++----------------- fs/bcachefs/super.c | 76 ++++++++++++++++--------------------- fs/bcachefs/xattr.c | 10 ++--- 19 files changed, 211 insertions(+), 183 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index a0b9fa30260a..cef5de13a6e4 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -302,7 +302,7 @@ int bch2_alloc_v1_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v)) { prt_printf(err, "incorrect value size (%zu < %u)", bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; @@ -315,7 +315,7 @@ int bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bch2_alloc_unpack_v2(&u, k)) { prt_printf(err, "unpack error"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; @@ -328,7 +328,7 @@ int bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bch2_alloc_unpack_v3(&u, k)) { prt_printf(err, "unpack error"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; @@ -342,14 +342,14 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_bytes(k.k) != sizeof(struct bch_alloc_v4)) { prt_printf(err, "bad val size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(struct bch_alloc_v4)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (rw == WRITE) { if (alloc_data_type(*a.v, a.v->data_type) != a.v->data_type) { prt_printf(err, "invalid data type (got %u should be %u)", a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } switch (a.v->data_type) { @@ -360,7 +360,7 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, a.v->cached_sectors || a.v->stripe) { prt_printf(err, "empty data type free but have data"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } break; case BCH_DATA_sb: @@ -371,7 +371,7 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, if (!a.v->dirty_sectors) { prt_printf(err, "data_type %s but dirty_sectors==0", bch2_data_types[a.v->data_type]); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } break; case BCH_DATA_cached: @@ -379,20 +379,20 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, a.v->dirty_sectors || a.v->stripe) { prt_printf(err, "data type inconsistency"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (!a.v->io_time[READ] && test_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags)) { prt_printf(err, "cached bucket with read_time == 0"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } break; case BCH_DATA_stripe: if (!a.v->stripe) { prt_printf(err, "data_type %s but stripe==0", bch2_data_types[a.v->data_type]); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } break; } diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 7fcd6ca40b93..29809da5e9cf 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -42,7 +42,7 @@ static int empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_bytes(k.k)) { prt_printf(err, "incorrect value size (%zu != 0)", bkey_val_bytes(k.k)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; @@ -58,7 +58,7 @@ static int key_type_cookie_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_bytes(k.k) != sizeof(struct bch_cookie)) { prt_printf(err, "incorrect value size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(struct bch_cookie)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; @@ -99,7 +99,7 @@ static int key_type_set_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_bytes(k.k)) { prt_printf(err, "incorrect value size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(struct bch_cookie)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; @@ -127,7 +127,7 @@ int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, { if (k.k->type >= KEY_TYPE_MAX) { prt_printf(err, "invalid type (%u >= %u)", k.k->type, KEY_TYPE_MAX); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return bch2_bkey_ops[k.k->type].key_invalid(c, k, rw, err); @@ -203,30 +203,30 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, { if (k.k->u64s < BKEY_U64s) { prt_printf(err, "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (!(bch2_key_types_allowed[type] & (1U << k.k->type))) { prt_printf(err, "invalid key type for btree %s (%s)", bch2_btree_ids[type], bch2_bkey_types[k.k->type]); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { if (k.k->size == 0) { prt_printf(err, "size == 0"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (k.k->size > k.k->p.offset) { prt_printf(err, "size greater than offset (%u > %llu)", k.k->size, k.k->p.offset); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } } else { if (k.k->size) { prt_printf(err, "size != 0"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } } @@ -234,20 +234,20 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, !btree_type_has_snapshots(type) && k.k->p.snapshot) { prt_printf(err, "nonzero snapshot"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (type != BKEY_TYPE_btree && btree_type_has_snapshots(type) && !k.k->p.snapshot) { prt_printf(err, "snapshot == 0"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (type != BKEY_TYPE_btree && bkey_eq(k.k->p, POS_MAX)) { prt_printf(err, "key at POS_MAX"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; @@ -266,12 +266,12 @@ int bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k, { if (bpos_lt(k.k->p, b->data->min_key)) { prt_printf(err, "key before start of btree node"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (bpos_gt(k.k->p, b->data->max_key)) { prt_printf(err, "key past end of btree node"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index c2126f39369b..f1838b7c45ee 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -92,46 +92,46 @@ int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent)) { prt_printf(err, "incorrect value size (%zu < %zu)", bkey_val_bytes(k.k), sizeof(*d.v)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } len = bch2_dirent_name_bytes(d); if (!len) { prt_printf(err, "empty name"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (bkey_val_u64s(k.k) > dirent_val_u64s(len)) { prt_printf(err, "value too big (%zu > %u)", bkey_val_u64s(k.k), dirent_val_u64s(len)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (len > BCH_NAME_MAX) { prt_printf(err, "dirent name too big (%u > %u)", len, BCH_NAME_MAX); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (len == 1 && !memcmp(d.v->d_name, ".", 1)) { prt_printf(err, "invalid name"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (len == 2 && !memcmp(d.v->d_name, "..", 2)) { prt_printf(err, "invalid name"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (memchr(d.v->d_name, '/', len)) { prt_printf(err, "invalid name"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (d.v->d_type != DT_SUBVOL && le64_to_cpu(d.v->d_inum) == d.k->p.inode) { prt_printf(err, "dirent points to own directory"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index 6b81f35861ac..fcd5dbff248d 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -27,7 +27,7 @@ static int bch2_sb_disk_groups_validate(struct bch_sb *sb, struct bch_sb_field_members *mi = bch2_sb_get_members(sb); unsigned nr_groups = disk_groups_nr(groups); unsigned i, len; - int ret = -EINVAL; + int ret = 0; for (i = 0; i < sb->nr_devices; i++) { struct bch_member *m = mi->members + i; @@ -41,12 +41,12 @@ static int bch2_sb_disk_groups_validate(struct bch_sb *sb, if (g >= nr_groups) { prt_printf(err, "disk %u has invalid label %u (have %u)", i, g, nr_groups); - return -EINVAL; + return -BCH_ERR_invalid_sb_disk_groups; } if (BCH_GROUP_DELETED(&groups->entries[g])) { prt_printf(err, "disk %u has deleted label %u", i, g); - return -EINVAL; + return -BCH_ERR_invalid_sb_disk_groups; } } @@ -62,7 +62,7 @@ static int bch2_sb_disk_groups_validate(struct bch_sb *sb, len = strnlen(g->label, sizeof(g->label)); if (!len) { prt_printf(err, "label %u empty", i); - return -EINVAL; + return -BCH_ERR_invalid_sb_disk_groups; } } @@ -79,13 +79,12 @@ static int bch2_sb_disk_groups_validate(struct bch_sb *sb, prt_printf(err, "duplicate label %llu.%.*s", BCH_GROUP_PARENT(g), (int) sizeof(g->label), g->label); + ret = -BCH_ERR_invalid_sb_disk_groups; goto err; } - - ret = 0; err: kfree(sorted); - return 0; + return ret; } static void bch2_sb_disk_groups_to_text(struct printbuf *out, diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 503a47b39ad1..c855ea025f0e 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -109,24 +109,24 @@ int bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_eq(k.k->p, POS_MIN)) { prt_printf(err, "stripe at POS_MIN"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (k.k->p.inode) { prt_printf(err, "nonzero inode field"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (bkey_val_bytes(k.k) < sizeof(*s)) { prt_printf(err, "incorrect value size (%zu < %zu)", bkey_val_bytes(k.k), sizeof(*s)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (bkey_val_u64s(k.k) < stripe_val_u64s(s)) { prt_printf(err, "incorrect value size (%zu < %u)", bkey_val_u64s(k.k), stripe_val_u64s(s)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return bch2_bkey_ptrs_invalid(c, k, rw, err); diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 3ec5808dcbd9..dc388864be6f 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -67,7 +67,45 @@ x(BCH_ERR_fsck, fsck_repair_unimplemented) \ x(BCH_ERR_fsck, fsck_repair_impossible) \ x(0, need_snapshot_cleanup) \ - x(0, need_topology_repair) + x(0, need_topology_repair) \ + x(EINVAL, device_state_not_allowed) \ + x(EINVAL, member_info_missing) \ + x(EINVAL, mismatched_block_size) \ + x(EINVAL, block_size_too_small) \ + x(EINVAL, bucket_size_too_small) \ + x(EINVAL, device_size_too_small) \ + x(EINVAL, device_not_a_member_of_filesystem) \ + x(EINVAL, device_has_been_removed) \ + x(EINVAL, device_already_online) \ + x(EINVAL, insufficient_devices_to_start) \ + x(EINVAL, invalid) \ + x(BCH_ERR_invalid, invalid_sb) \ + x(BCH_ERR_invalid_sb, invalid_sb_magic) \ + x(BCH_ERR_invalid_sb, invalid_sb_version) \ + x(BCH_ERR_invalid_sb, invalid_sb_features) \ + x(BCH_ERR_invalid_sb, invalid_sb_too_big) \ + x(BCH_ERR_invalid_sb, invalid_sb_csum_type) \ + x(BCH_ERR_invalid_sb, invalid_sb_csum) \ + x(BCH_ERR_invalid_sb, invalid_sb_block_size) \ + x(BCH_ERR_invalid_sb, invalid_sb_uuid) \ + x(BCH_ERR_invalid_sb, invalid_sb_too_many_members) \ + x(BCH_ERR_invalid_sb, invalid_sb_dev_idx) \ + x(BCH_ERR_invalid_sb, invalid_sb_time_precision) \ + x(BCH_ERR_invalid_sb, invalid_sb_field_size) \ + x(BCH_ERR_invalid_sb, invalid_sb_layout) \ + x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_type) \ + x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_nr_superblocks) \ + x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_superblocks_overlap) \ + x(BCH_ERR_invalid_sb, invalid_sb_members_missing) \ + x(BCH_ERR_invalid_sb, invalid_sb_members) \ + x(BCH_ERR_invalid_sb, invalid_sb_disk_groups) \ + x(BCH_ERR_invalid_sb, invalid_sb_replicas) \ + x(BCH_ERR_invalid_sb, invalid_sb_journal) \ + x(BCH_ERR_invalid_sb, invalid_sb_journal_seq_blacklist) \ + x(BCH_ERR_invalid_sb, invalid_sb_crypt) \ + x(BCH_ERR_invalid_sb, invalid_sb_clean) \ + x(BCH_ERR_invalid_sb, invalid_sb_quota) \ + x(BCH_ERR_invalid, invalid_bkey) \ enum bch_errcode { BCH_ERR_START = 2048, diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index e3bc39bee197..422adca7230b 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -163,7 +163,7 @@ int bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_u64s(k.k) > BCH_REPLICAS_MAX) { prt_printf(err, "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return bch2_bkey_ptrs_invalid(c, k, rw, err); @@ -183,20 +183,20 @@ int bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_bytes(k.k) <= sizeof(*bp.v)) { prt_printf(err, "value too small (%zu <= %zu)", bkey_val_bytes(k.k), sizeof(*bp.v)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) { prt_printf(err, "value too big (%zu > %zu)", bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (c->sb.version < bcachefs_metadata_version_snapshot && bp.v->min_key.snapshot) { prt_printf(err, "invalid min_key.snapshot (%u != 0)", bp.v->min_key.snapshot); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return bch2_bkey_ptrs_invalid(c, k, rw, err); @@ -387,13 +387,13 @@ int bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation)) { prt_printf(err, "incorrect value size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(*r.v)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) { prt_printf(err, "invalid nr_replicas (%u)", r.v->nr_replicas); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; @@ -1054,14 +1054,14 @@ static int extent_ptr_invalid(const struct bch_fs *c, if (!bch2_dev_exists2(c, ptr->dev)) { prt_printf(err, "pointer to invalid device (%u)", ptr->dev); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } ca = bch_dev_bkey_exists(c, ptr->dev); bkey_for_each_ptr(ptrs, ptr2) if (ptr != ptr2 && ptr->dev == ptr2->dev) { prt_printf(err, "multiple pointers to same device (%u)", ptr->dev); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } bucket = sector_to_bucket_and_offset(ca, ptr->offset, &bucket_offset); @@ -1069,19 +1069,19 @@ static int extent_ptr_invalid(const struct bch_fs *c, if (bucket >= ca->mi.nbuckets) { prt_printf(err, "pointer past last bucket (%llu > %llu)", bucket, ca->mi.nbuckets); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket)) { prt_printf(err, "pointer before first bucket (%llu < %u)", bucket, ca->mi.first_bucket); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (bucket_offset + size_ondisk > ca->mi.bucket_size) { prt_printf(err, "pointer spans multiple buckets (%u + %u > %u)", bucket_offset, size_ondisk, ca->mi.bucket_size); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; @@ -1105,13 +1105,13 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) { prt_printf(err, "invalid extent entry type (got %u, max %u)", __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (bkey_is_btree_ptr(k.k) && !extent_entry_is_ptr(entry)) { prt_printf(err, "has non ptr field"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } switch (extent_entry_type(entry)) { @@ -1130,19 +1130,19 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, if (crc.offset + crc.live_size > crc.uncompressed_size) { prt_printf(err, "checksum offset + key size > uncompressed size"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } size_ondisk = crc.compressed_size; if (!bch2_checksum_type_valid(c, crc.csum_type)) { prt_printf(err, "invalid checksum type"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (crc.compression_type >= BCH_COMPRESSION_TYPE_NR) { prt_printf(err, "invalid compression type"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (bch2_csum_type_is_encryption(crc.csum_type)) { @@ -1150,7 +1150,7 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, nonce = crc.offset + crc.nonce; else if (nonce != crc.offset + crc.nonce) { prt_printf(err, "incorrect nonce"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } } break; @@ -1161,7 +1161,7 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, if (nr_ptrs >= BCH_BKEY_PTRS_MAX) { prt_str(err, "too many ptrs"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 90297cfc7934..cc41472a335e 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1767,8 +1767,11 @@ got_sb: kfree(devs[0]); kfree(devs); - if (IS_ERR(sb)) - return ERR_CAST(sb); + if (IS_ERR(sb)) { + ret = PTR_ERR(sb); + ret = bch2_err_class(ret); + return ERR_PTR(ret); + } c = sb->s_fs_info; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 4ca70c6c3a4f..cf453edcb5ab 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -306,40 +306,40 @@ static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err) if (k.k->p.inode) { prt_printf(err, "nonzero k.p.inode"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (k.k->p.offset < BLOCKDEV_INODE_MAX) { prt_printf(err, "fs inode in blockdev range"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (bch2_inode_unpack(k, &unpacked)) { prt_printf(err, "invalid variable length fields"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1) { prt_printf(err, "invalid data checksum type (%u >= %u", unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1) { prt_printf(err, "invalid data checksum type (%u >= %u)", unpacked.bi_compression, BCH_COMPRESSION_OPT_NR + 1); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if ((unpacked.bi_flags & BCH_INODE_UNLINKED) && unpacked.bi_nlink != 0) { prt_printf(err, "flagged as unlinked but bi_nlink != 0"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode)) { prt_printf(err, "subvolume root but not a directory"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; @@ -353,13 +353,13 @@ int bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_bytes(k.k) < sizeof(*inode.v)) { prt_printf(err, "incorrect value size (%zu < %zu)", bkey_val_bytes(k.k), sizeof(*inode.v)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR) { prt_printf(err, "invalid str hash type (%llu >= %u)", INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return __bch2_inode_invalid(k, err); @@ -373,13 +373,13 @@ int bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_bytes(k.k) < sizeof(*inode.v)) { prt_printf(err, "incorrect value size (%zu < %zu)", bkey_val_bytes(k.k), sizeof(*inode.v)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR) { prt_printf(err, "invalid str hash type (%llu >= %u)", INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return __bch2_inode_invalid(k, err); @@ -421,13 +421,13 @@ int bch2_inode_generation_invalid(const struct bch_fs *c, struct bkey_s_c k, { if (k.k->p.inode) { prt_printf(err, "nonzero k.p.inode"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation)) { prt_printf(err, "incorrect value size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(struct bch_inode_generation)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c index c19db0425dd7..9b933330a4c3 100644 --- a/fs/bcachefs/journal_sb.c +++ b/fs/bcachefs/journal_sb.c @@ -22,7 +22,7 @@ static int bch2_sb_journal_validate(struct bch_sb *sb, { struct bch_sb_field_journal *journal = field_to_type(f, journal); struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx; - int ret = -EINVAL; + int ret = -BCH_ERR_invalid_sb_journal; unsigned nr; unsigned i; u64 *b; @@ -105,7 +105,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, { struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2); struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx; - int ret = -EINVAL; + int ret = -BCH_ERR_invalid_sb_journal; unsigned nr; unsigned i; struct u64_range *b; diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c index 5c555b3703c0..012c870acce0 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -203,7 +203,7 @@ static int bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb, le64_to_cpu(e->end)) { prt_printf(err, "entry %u start >= end (%llu >= %llu)", i, le64_to_cpu(e->start), le64_to_cpu(e->end)); - return -EINVAL; + return -BCH_ERR_invalid_sb_journal_seq_blacklist; } if (i + 1 < nr && @@ -211,7 +211,7 @@ static int bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb, le64_to_cpu(e[1].start)) { prt_printf(err, "entry %u out of order with next entry (%llu > %llu)", i + 1, le64_to_cpu(e[0].end), le64_to_cpu(e[1].start)); - return -EINVAL; + return -BCH_ERR_invalid_sb_journal_seq_blacklist; } } diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 53e607d72274..db1674ef1d22 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -16,7 +16,7 @@ int bch2_lru_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_bytes(k.k) < sizeof(*lru)) { prt_printf(err, "incorrect value size (%zu < %zu)", bkey_val_bytes(k.k), sizeof(*lru)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 7f74c026e9da..ededc826e9a0 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -26,7 +26,7 @@ static int bch2_sb_quota_validate(struct bch_sb *sb, struct bch_sb_field *f, if (vstruct_bytes(&q->field) < sizeof(*q)) { prt_printf(err, "wrong size (got %zu should be %zu)", vstruct_bytes(&q->field), sizeof(*q)); - return -EINVAL; + return -BCH_ERR_invalid_sb_quota; } return 0; @@ -64,13 +64,13 @@ int bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k, if (k.k->p.inode >= QTYP_NR) { prt_printf(err, "invalid quota type (%llu >= %u)", k.k->p.inode, QTYP_NR); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (bkey_val_bytes(k.k) != sizeof(struct bch_quota)) { prt_printf(err, "incorrect value size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(struct bch_quota)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index aebed671c43a..8c426d6440c9 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -85,7 +85,7 @@ int bch2_reflink_v_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_bytes(r.k) < sizeof(*r.v)) { prt_printf(err, "incorrect value size (%zu < %zu)", bkey_val_bytes(r.k), sizeof(*r.v)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return bch2_bkey_ptrs_invalid(c, k, rw, err); @@ -136,7 +136,7 @@ int bch2_indirect_inline_data_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_bytes(k.k) < sizeof(struct bch_indirect_inline_data)) { prt_printf(err, "incorrect value size (%zu < %zu)", bkey_val_bytes(k.k), sizeof(struct bch_indirect_inline_data)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index e540c1aa91ba..482bedf4be8b 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -841,27 +841,27 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, if (e->data_type >= BCH_DATA_NR) { prt_printf(err, "invalid data type in entry "); bch2_replicas_entry_to_text(err, e); - return -EINVAL; + return -BCH_ERR_invalid_sb_replicas; } if (!e->nr_devs) { prt_printf(err, "no devices in entry "); bch2_replicas_entry_to_text(err, e); - return -EINVAL; + return -BCH_ERR_invalid_sb_replicas; } if (e->nr_required > 1 && e->nr_required >= e->nr_devs) { prt_printf(err, "bad nr_required in entry "); bch2_replicas_entry_to_text(err, e); - return -EINVAL; + return -BCH_ERR_invalid_sb_replicas; } for (j = 0; j < e->nr_devs; j++) if (!bch2_dev_exists(sb, mi, e->devs[j])) { prt_printf(err, "invalid device %u in entry ", e->devs[j]); bch2_replicas_entry_to_text(err, e); - return -EINVAL; + return -BCH_ERR_invalid_sb_replicas; } if (i + 1 < cpu_r->nr) { @@ -873,7 +873,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, if (!memcmp(e, n, cpu_r->entry_size)) { prt_printf(err, "duplicate replicas entry "); bch2_replicas_entry_to_text(err, e); - return -EINVAL; + return -BCH_ERR_invalid_sb_replicas; } } } diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index e37ffaad5883..f19f6f8d3233 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -33,13 +33,13 @@ int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_gt(k.k->p, POS(0, U32_MAX)) || bkey_lt(k.k->p, POS(0, 1))) { prt_printf(err, "bad pos"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (bkey_val_bytes(k.k) != sizeof(struct bch_snapshot)) { prt_printf(err, "bad val size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(struct bch_snapshot)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } s = bkey_s_c_to_snapshot(k); @@ -48,18 +48,18 @@ int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k, if (id && id <= k.k->p.offset) { prt_printf(err, "bad parent node (%u <= %llu)", id, k.k->p.offset); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) { prt_printf(err, "children not normalized"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (s.v->children[0] && s.v->children[0] == s.v->children[1]) { prt_printf(err, "duplicate child nodes"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } for (i = 0; i < 2; i++) { @@ -68,7 +68,7 @@ int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k, if (id >= k.k->p.offset) { prt_printf(err, "bad child node (%u >= %llu)", id, k.k->p.offset); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } } @@ -773,13 +773,13 @@ int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_lt(k.k->p, SUBVOL_POS_MIN) || bkey_gt(k.k->p, SUBVOL_POS_MAX)) { prt_printf(err, "invalid pos"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (bkey_val_bytes(k.k) != sizeof(struct bch_subvolume)) { prt_printf(err, "incorrect value size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(struct bch_subvolume)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index e27b301432b1..8dfe92d7eb77 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -216,23 +216,23 @@ static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out if (!uuid_equal(&layout->magic, &BCACHE_MAGIC) && !uuid_equal(&layout->magic, &BCHFS_MAGIC)) { prt_printf(out, "Not a bcachefs superblock layout"); - return -EINVAL; + return -BCH_ERR_invalid_sb_layout; } if (layout->layout_type != 0) { prt_printf(out, "Invalid superblock layout type %u", layout->layout_type); - return -EINVAL; + return -BCH_ERR_invalid_sb_layout_type; } if (!layout->nr_superblocks) { prt_printf(out, "Invalid superblock layout: no superblocks"); - return -EINVAL; + return -BCH_ERR_invalid_sb_layout_nr_superblocks; } if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset)) { prt_printf(out, "Invalid superblock layout: too many superblocks"); - return -EINVAL; + return -BCH_ERR_invalid_sb_layout_nr_superblocks; } max_sectors = 1 << layout->sb_max_size_bits; @@ -246,7 +246,7 @@ static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out prt_printf(out, "Invalid superblock layout: superblocks overlap\n" " (sb %u ends at %llu next starts at %llu", i - 1, prev_offset + max_sectors, offset); - return -EINVAL; + return -BCH_ERR_invalid_sb_layout_superblocks_overlap; } prev_offset = offset; } @@ -273,25 +273,25 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out, if (version >= bcachefs_metadata_version_max) { prt_printf(out, "Unsupported superblock version %u (min %u, max %u)", version, bcachefs_metadata_version_min, bcachefs_metadata_version_max); - return -EINVAL; + return -BCH_ERR_invalid_sb_version; } if (version_min < bcachefs_metadata_version_min) { prt_printf(out, "Unsupported superblock version %u (min %u, max %u)", version_min, bcachefs_metadata_version_min, bcachefs_metadata_version_max); - return -EINVAL; + return -BCH_ERR_invalid_sb_version; } if (version_min > version) { prt_printf(out, "Bad minimum version %u, greater than version field %u", version_min, version); - return -EINVAL; + return -BCH_ERR_invalid_sb_version; } if (sb->features[1] || (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) { prt_printf(out, "Filesystem has incompatible features"); - return -EINVAL; + return -BCH_ERR_invalid_sb_features; } block_size = le16_to_cpu(sb->block_size); @@ -299,37 +299,37 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out, if (block_size > PAGE_SECTORS) { prt_printf(out, "Block size too big (got %u, max %u)", block_size, PAGE_SECTORS); - return -EINVAL; + return -BCH_ERR_invalid_sb_block_size; } if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) { prt_printf(out, "Bad user UUID (got zeroes)"); - return -EINVAL; + return -BCH_ERR_invalid_sb_uuid; } if (bch2_is_zero(sb->uuid.b, sizeof(sb->uuid))) { prt_printf(out, "Bad intenal UUID (got zeroes)"); - return -EINVAL; + return -BCH_ERR_invalid_sb_uuid; } if (!sb->nr_devices || sb->nr_devices > BCH_SB_MEMBERS_MAX) { prt_printf(out, "Bad number of member devices %u (max %u)", sb->nr_devices, BCH_SB_MEMBERS_MAX); - return -EINVAL; + return -BCH_ERR_invalid_sb_too_many_members; } if (sb->dev_idx >= sb->nr_devices) { prt_printf(out, "Bad dev_idx (got %u, nr_devices %u)", sb->dev_idx, sb->nr_devices); - return -EINVAL; + return -BCH_ERR_invalid_sb_dev_idx; } if (!sb->time_precision || le32_to_cpu(sb->time_precision) > NSEC_PER_SEC) { prt_printf(out, "Invalid time precision: %u (min 1, max %lu)", le32_to_cpu(sb->time_precision), NSEC_PER_SEC); - return -EINVAL; + return -BCH_ERR_invalid_sb_time_precision; } if (rw == READ) { @@ -366,15 +366,15 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out, vstruct_for_each(sb, f) { if (!f->u64s) { - prt_printf(out, "Invalid superblock: optional with size 0 (type %u)", + prt_printf(out, "Invalid superblock: optional field with size 0 (type %u)", le32_to_cpu(f->type)); - return -EINVAL; + return -BCH_ERR_invalid_sb_field_size; } if (vstruct_next(f) > vstruct_last(sb)) { prt_printf(out, "Invalid superblock: optional field extends past end of superblock (type %u)", le32_to_cpu(f->type)); - return -EINVAL; + return -BCH_ERR_invalid_sb_field_size; } } @@ -382,7 +382,7 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out, mi = bch2_sb_get_members(sb); if (!mi) { prt_printf(out, "Invalid superblock: member info area missing"); - return -EINVAL; + return -BCH_ERR_invalid_sb_members_missing; } ret = bch2_sb_field_validate(sb, &mi->field, out); @@ -544,7 +544,7 @@ reread: if (!uuid_equal(&sb->sb->magic, &BCACHE_MAGIC) && !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC)) { prt_printf(err, "Not a bcachefs superblock"); - return -EINVAL; + return -BCH_ERR_invalid_sb_magic; } version = le16_to_cpu(sb->sb->version); @@ -555,13 +555,13 @@ reread: if (version >= bcachefs_metadata_version_max) { prt_printf(err, "Unsupported superblock version %u (min %u, max %u)", version, bcachefs_metadata_version_min, bcachefs_metadata_version_max); - return -EINVAL; + return -BCH_ERR_invalid_sb_version; } if (version_min < bcachefs_metadata_version_min) { prt_printf(err, "Unsupported superblock version %u (min %u, max %u)", version_min, bcachefs_metadata_version_min, bcachefs_metadata_version_max); - return -EINVAL; + return -BCH_ERR_invalid_sb_version; } bytes = vstruct_bytes(sb->sb); @@ -569,7 +569,7 @@ reread: if (bytes > 512 << sb->sb->layout.sb_max_size_bits) { prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)", bytes, 512UL << sb->sb->layout.sb_max_size_bits); - return -EINVAL; + return -BCH_ERR_invalid_sb_too_big; } if (bytes > sb->buffer_size) { @@ -580,7 +580,7 @@ reread: if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) { prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb)); - return -EINVAL; + return -BCH_ERR_invalid_sb_csum_type; } /* XXX: verify MACs */ @@ -589,7 +589,7 @@ reread: if (bch2_crc_cmp(csum, sb->sb->csum)) { prt_printf(err, "bad checksum"); - return -EINVAL; + return -BCH_ERR_invalid_sb_csum; } sb->seq = le64_to_cpu(sb->sb->seq); @@ -703,7 +703,7 @@ got_super: prt_printf(&err, "block size (%u) smaller than device block size (%u)", le16_to_cpu(sb->sb->block_size) << 9, bdev_logical_block_size(sb->bdev)); - ret = -EINVAL; + ret = -BCH_ERR_block_size_too_small; goto err; } @@ -958,7 +958,7 @@ static int bch2_sb_members_validate(struct bch_sb *sb, if ((void *) (mi->members + sb->nr_devices) > vstruct_end(&mi->field)) { prt_printf(err, "too many devices for section size"); - return -EINVAL; + return -BCH_ERR_invalid_sb_members; } for (i = 0; i < sb->nr_devices; i++) { @@ -970,28 +970,28 @@ static int bch2_sb_members_validate(struct bch_sb *sb, if (le64_to_cpu(m->nbuckets) > LONG_MAX) { prt_printf(err, "device %u: too many buckets (got %llu, max %lu)", i, le64_to_cpu(m->nbuckets), LONG_MAX); - return -EINVAL; + return -BCH_ERR_invalid_sb_members; } if (le64_to_cpu(m->nbuckets) - le16_to_cpu(m->first_bucket) < BCH_MIN_NR_NBUCKETS) { prt_printf(err, "device %u: not enough buckets (got %llu, max %u)", i, le64_to_cpu(m->nbuckets), BCH_MIN_NR_NBUCKETS); - return -EINVAL; + return -BCH_ERR_invalid_sb_members; } if (le16_to_cpu(m->bucket_size) < le16_to_cpu(sb->block_size)) { prt_printf(err, "device %u: bucket size %u smaller than block size %u", i, le16_to_cpu(m->bucket_size), le16_to_cpu(sb->block_size)); - return -EINVAL; + return -BCH_ERR_invalid_sb_members; } if (le16_to_cpu(m->bucket_size) < BCH_SB_BTREE_NODE_SIZE(sb)) { prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu", i, le16_to_cpu(m->bucket_size), BCH_SB_BTREE_NODE_SIZE(sb)); - return -EINVAL; + return -BCH_ERR_invalid_sb_members; } } @@ -1123,12 +1123,12 @@ static int bch2_sb_crypt_validate(struct bch_sb *sb, if (vstruct_bytes(&crypt->field) < sizeof(*crypt)) { prt_printf(err, "wrong size (got %zu should be %zu)", vstruct_bytes(&crypt->field), sizeof(*crypt)); - return -EINVAL; + return -BCH_ERR_invalid_sb_crypt; } if (BCH_CRYPT_KDF_TYPE(crypt)) { prt_printf(err, "bad kdf type %llu", BCH_CRYPT_KDF_TYPE(crypt)); - return -EINVAL; + return -BCH_ERR_invalid_sb_crypt; } return 0; @@ -1365,7 +1365,7 @@ static int bch2_sb_clean_validate(struct bch_sb *sb, if (vstruct_bytes(&clean->field) < sizeof(*clean)) { prt_printf(err, "wrong size (got %zu should be %zu)", vstruct_bytes(&clean->field), sizeof(*clean)); - return -EINVAL; + return -BCH_ERR_invalid_sb_clean; } return 0; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 234dab15fa63..37dce3e3cccb 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -880,7 +880,7 @@ int bch2_fs_start(struct bch_fs *c) struct bch_dev *ca; time64_t now = ktime_get_real_seconds(); unsigned i; - int ret = -EINVAL; + int ret; down_write(&c->state_lock); @@ -917,9 +917,9 @@ int bch2_fs_start(struct bch_fs *c) if (ret) goto err; - ret = -EINVAL; if (bch2_fs_init_fault("fs_start")) { bch_err(c, "fs_start fault injected"); + ret = -EINVAL; goto err; } @@ -942,46 +942,43 @@ out: return ret; err: bch_err(c, "error starting filesystem: %s", bch2_err_str(ret)); - - if (ret < -BCH_ERR_START) - ret = -EINVAL; goto out; } -static const char *bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c) +static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c) { struct bch_sb_field_members *sb_mi; sb_mi = bch2_sb_get_members(sb); if (!sb_mi) - return "Invalid superblock: member info area missing"; + return -BCH_ERR_member_info_missing; if (le16_to_cpu(sb->block_size) != block_sectors(c)) - return "mismatched block size"; + return -BCH_ERR_mismatched_block_size; if (le16_to_cpu(sb_mi->members[sb->dev_idx].bucket_size) < BCH_SB_BTREE_NODE_SIZE(c->disk_sb.sb)) - return "new cache bucket size is too small"; + return -BCH_ERR_bucket_size_too_small; - return NULL; + return 0; } -static const char *bch2_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb) +static int bch2_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb) { struct bch_sb *newest = le64_to_cpu(fs->seq) > le64_to_cpu(sb->seq) ? fs : sb; struct bch_sb_field_members *mi = bch2_sb_get_members(newest); if (!uuid_equal(&fs->uuid, &sb->uuid)) - return "device not a member of filesystem"; + return -BCH_ERR_device_not_a_member_of_filesystem; if (!bch2_dev_exists(newest, mi, sb->dev_idx)) - return "device has been removed"; + return -BCH_ERR_device_has_been_removed; if (fs->block_size != sb->block_size) - return "mismatched block size"; + return -BCH_ERR_mismatched_block_size; - return NULL; + return 0; } /* Device startup/shutdown: */ @@ -1179,23 +1176,17 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) if (bch2_dev_is_online(ca)) { bch_err(ca, "already have device online in slot %u", sb->sb->dev_idx); - return -EINVAL; + return -BCH_ERR_device_already_online; } if (get_capacity(sb->bdev->bd_disk) < ca->mi.bucket_size * ca->mi.nbuckets) { bch_err(ca, "cannot online: device too small"); - return -EINVAL; + return -BCH_ERR_device_size_too_small; } BUG_ON(!percpu_ref_is_zero(&ca->io_ref)); - if (get_capacity(sb->bdev->bd_disk) < - ca->mi.bucket_size * ca->mi.nbuckets) { - bch_err(ca, "device too small"); - return -EINVAL; - } - ret = bch2_dev_journal_init(ca, sb->sb); if (ret) return ret; @@ -1370,7 +1361,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, return 0; if (!bch2_dev_state_allowed(c, ca, new_state, flags)) - return -EINVAL; + return -BCH_ERR_device_state_not_allowed; if (new_state != BCH_MEMBER_STATE_rw) __bch2_dev_read_only(c, ca); @@ -1433,7 +1424,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) { struct bch_sb_field_members *mi; unsigned dev_idx = ca->dev_idx, data; - int ret = -EINVAL; + int ret; down_write(&c->state_lock); @@ -1445,6 +1436,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) { bch_err(ca, "Cannot remove without losing data"); + ret = -BCH_ERR_device_state_not_allowed; goto err; } @@ -1530,7 +1522,6 @@ int bch2_dev_add(struct bch_fs *c, const char *path) { struct bch_opts opts = bch2_opts_empty(); struct bch_sb_handle sb; - const char *err; struct bch_dev *ca = NULL; struct bch_sb_field_members *mi; struct bch_member dev_mi; @@ -1555,10 +1546,9 @@ int bch2_dev_add(struct bch_fs *c, const char *path) } } - err = bch2_dev_may_add(sb.sb, c); - if (err) { - bch_err(c, "device add error: %s", err); - ret = -EINVAL; + ret = bch2_dev_may_add(sb.sb, c); + if (ret) { + bch_err(c, "device add error: %s", bch2_err_str(ret)); goto err; } @@ -1692,7 +1682,6 @@ int bch2_dev_online(struct bch_fs *c, const char *path) struct bch_sb_field_members *mi; struct bch_dev *ca; unsigned dev_idx; - const char *err; int ret; down_write(&c->state_lock); @@ -1705,9 +1694,9 @@ int bch2_dev_online(struct bch_fs *c, const char *path) dev_idx = sb.sb->dev_idx; - err = bch2_dev_in_fs(c->disk_sb.sb, sb.sb); - if (err) { - bch_err(c, "error bringing %s online: %s", path, err); + ret = bch2_dev_in_fs(c->disk_sb.sb, sb.sb); + if (ret) { + bch_err(c, "error bringing %s online: %s", path, bch2_err_str(ret)); goto err; } @@ -1741,7 +1730,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path) err: up_write(&c->state_lock); bch2_free_super(&sb); - return -EINVAL; + return ret; } int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) @@ -1757,7 +1746,7 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) { bch_err(ca, "Cannot offline required disk"); up_write(&c->state_lock); - return -EINVAL; + return -BCH_ERR_device_state_not_allowed; } __bch2_dev_offline(c, ca); @@ -1783,7 +1772,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) get_capacity(ca->disk_sb.bdev->bd_disk) < ca->mi.bucket_size * nbuckets) { bch_err(ca, "New size larger than device"); - ret = -EINVAL; + ret = -BCH_ERR_device_size_too_small; goto err; } @@ -1836,7 +1825,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, struct bch_fs *c = NULL; struct bch_sb_field_members *mi; unsigned i, best_sb = 0; - const char *err; struct printbuf errbuf = PRINTBUF; int ret = 0; @@ -1880,8 +1868,8 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, continue; } - err = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb); - if (err) + ret = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb); + if (ret) goto err_print; i++; } @@ -1902,9 +1890,10 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, } up_write(&c->state_lock); - err = "insufficient devices"; - if (!bch2_fs_may_start(c)) + if (!bch2_fs_may_start(c)) { + ret = -BCH_ERR_insufficient_devices_to_start; goto err_print; + } if (!c->opts.nostart) { ret = bch2_fs_start(c); @@ -1919,8 +1908,7 @@ out: return c; err_print: pr_err("bch_fs_open err opening %s: %s", - devices[0], err); - ret = -EINVAL; + devices[0], bch2_err_str(ret)); err: if (!IS_ERR_OR_NULL(c)) bch2_fs_stop(c); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index bd118f6ea08b..448737be045c 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -78,7 +78,7 @@ int bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k, if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr)) { prt_printf(err, "incorrect value size (%zu < %zu)", bkey_val_bytes(k.k), sizeof(*xattr.v)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (bkey_val_u64s(k.k) < @@ -88,7 +88,7 @@ int bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k, bkey_val_u64s(k.k), xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len))); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } /* XXX why +4 ? */ @@ -99,18 +99,18 @@ int bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k, bkey_val_u64s(k.k), xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len) + 4)); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } handler = bch2_xattr_type_to_handler(xattr.v->x_type); if (!handler) { prt_printf(err, "invalid type (%u)", xattr.v->x_type); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len)) { prt_printf(err, "xattr name has invalid characters"); - return -EINVAL; + return -BCH_ERR_invalid_bkey; } return 0; -- cgit v1.2.3 From 792031116bee35e13be7c8ae8cf1b8eec141b136 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 13 Nov 2022 18:59:01 -0500 Subject: bcachefs: Unwritten extents support - bch2_extent_merge checks unwritten bit - read path returns 0s for unwritten extents without actually reading - reflink path skips over unwritten extents - bch2_bkey_ptrs_invalid() checks for extents with both written and unwritten extents, and non-normal extents (stripes, btree ptrs) with unwritten ptrs - fiemap checks for unwritten extents and returns FIEMAP_EXTENT_UNWRITTEN Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 4 ++-- fs/bcachefs/extents.c | 34 ++++++++++++++++++++++++++++++---- fs/bcachefs/extents.h | 17 +++++++++++++++++ fs/bcachefs/fs-io.c | 14 +++++++------- fs/bcachefs/fs.c | 3 +++ fs/bcachefs/fsck.c | 4 ++-- fs/bcachefs/io.c | 3 +++ fs/bcachefs/reflink.c | 6 +++++- 8 files changed, 69 insertions(+), 16 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index e0e2219fb1cc..57327c4dc9b4 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -582,7 +582,7 @@ struct bch_extent_ptr { __u64 type:1, cached:1, unused:1, - reservation:1, + unwritten:1, offset:44, /* 8 petabytes */ dev:8, gen:8; @@ -590,7 +590,7 @@ struct bch_extent_ptr { __u64 gen:8, dev:8, offset:44, - reservation:1, + unwritten:1, unused:1, cached:1, type:1; diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 3d124dc5bbef..627edba24900 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -116,6 +116,13 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, return -EIO; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + /* + * Unwritten extent: no need to actually read, treat it as a + * hole and return 0s: + */ + if (p.ptr.unwritten) + return 0; + ca = bch_dev_bkey_exists(c, p.ptr.dev); /* @@ -269,6 +276,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) rp.ptr.offset + rp.crc.offset || lp.ptr.dev != rp.ptr.dev || lp.ptr.gen != rp.ptr.gen || + lp.ptr.unwritten != rp.ptr.unwritten || lp.has_ec != rp.has_ec) return false; @@ -904,6 +912,9 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2) const union bch_extent_entry *entry1, *entry2; struct extent_ptr_decoded p1, p2; + if (bkey_extent_is_unwritten(k1) != bkey_extent_is_unwritten(k2)) + return false; + bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1) bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2) if (p1.ptr.dev == p2.ptr.dev && @@ -981,10 +992,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, u32 offset; u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); - prt_printf(out, "ptr: %u:%llu:%u gen %u%s", ptr->dev, - b, offset, ptr->gen, - ptr->cached ? " cached" : ""); - + prt_printf(out, "ptr: %u:%llu:%u gen %u", + ptr->dev, b, offset, ptr->gen); + if (ptr->cached) + prt_str(out, " cached"); + if (ptr->unwritten) + prt_str(out, " unwritten"); if (ca && ptr_stale(ca, ptr)) prt_printf(out, " stale"); } @@ -1073,6 +1086,7 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, unsigned size_ondisk = k.k->size; unsigned nonce = UINT_MAX; unsigned nr_ptrs = 0; + bool unwritten = false; int ret; if (bkey_is_btree_ptr(k.k)) @@ -1097,6 +1111,18 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, false, err); if (ret) return ret; + + if (nr_ptrs && unwritten != entry->ptr.unwritten) { + prt_printf(err, "extent with unwritten and written ptrs"); + return -BCH_ERR_invalid_bkey; + } + + if (k.k->type != KEY_TYPE_extent && entry->ptr.unwritten) { + prt_printf(err, "has unwritten ptrs"); + return -BCH_ERR_invalid_bkey; + } + + unwritten = entry->ptr.unwritten; nr_ptrs++; break; case BCH_EXTENT_ENTRY_crc32: diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index f640254004e7..659ab76ea62c 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -510,6 +510,23 @@ static inline bool bkey_extent_is_allocation(const struct bkey *k) } } +static inline bool bkey_extent_is_unwritten(struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; + + bkey_for_each_ptr(ptrs, ptr) + if (ptr->unwritten) + return true; + return false; +} + +static inline bool bkey_extent_is_reservation(struct bkey_s_c k) +{ + return k.k->type == KEY_TYPE_reservation || + bkey_extent_is_unwritten(k); +} + static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k) { struct bch_devs_list ret = (struct bch_devs_list) { 0 }; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 77037574cb0d..b5cf0a3218ea 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -341,11 +341,11 @@ static struct bch_page_state *bch2_page_state_create(struct page *page, return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp); } -static unsigned bkey_to_sector_state(const struct bkey *k) +static unsigned bkey_to_sector_state(struct bkey_s_c k) { - if (k->type == KEY_TYPE_reservation) + if (bkey_extent_is_reservation(k)) return SECTOR_RESERVED; - if (bkey_extent_is_allocation(k)) + if (bkey_extent_is_allocation(k.k)) return SECTOR_ALLOCATED; return SECTOR_UNALLOCATED; } @@ -396,7 +396,7 @@ retry: SPOS(inum.inum, offset, snapshot), BTREE_ITER_SLOTS, k, ret) { unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k); - unsigned state = bkey_to_sector_state(k.k); + unsigned state = bkey_to_sector_state(k); while (pg_idx < nr_pages) { struct page *page = pages[pg_idx]; @@ -436,7 +436,7 @@ static void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k) struct bio_vec bv; unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k); - unsigned state = bkey_to_sector_state(k.k); + unsigned state = bkey_to_sector_state(k); bio_for_each_segment(bv, bio, iter) __bch2_page_state_set(bv.bv_page, bv.bv_offset >> 9, @@ -3093,8 +3093,8 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, goto bkey_err; /* already reserved */ - if (k.k->type == KEY_TYPE_reservation && - bkey_s_c_to_reservation(k).v->nr_replicas >= opts.data_replicas) { + if (bkey_extent_is_reservation(k) && + bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) { bch2_btree_iter_advance(&iter); continue; } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index cc41472a335e..15ab77ebb8c6 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -811,6 +811,9 @@ static int bch2_fill_extent(struct bch_fs *c, int flags2 = 0; u64 offset = p.ptr.offset; + if (p.ptr.unwritten) + flags2 |= FIEMAP_EXTENT_UNWRITTEN; + if (p.crc.compression_type) flags2 |= FIEMAP_EXTENT_ENCODED; else diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 24365b9260f6..5887d78190eb 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1251,8 +1251,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, continue; if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && - k.k->type != KEY_TYPE_reservation && - k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c, + k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && + !bkey_extent_is_reservation(k), c, "extent type past end of inode %llu:%u, i_size %llu\n %s", i->inode.bi_inum, i->snapshot, i->inode.bi_size, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index c51381daf1c5..1d0ec638f645 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -1481,6 +1481,9 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k, if (bch2_bkey_has_target(c, k, opts.promote_target)) return false; + if (bkey_extent_is_unwritten(k)) + return false; + if (bch2_target_congested(c, opts.promote_target)) { /* XXX trace this */ return false; diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index aae924dc81f7..faf75bcf9ee7 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -251,9 +251,13 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) struct bkey_s_c k; int ret; - for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret) + for_each_btree_key_upto_continue_norestart(*iter, end, 0, k, ret) { + if (bkey_extent_is_unwritten(k)) + continue; + if (bkey_extent_is_data(k.k)) return k; + } if (bkey_ge(iter->pos, end)) bch2_btree_iter_set_pos(iter, end); -- cgit v1.2.3 From 2d33036ca9360bacef23ba32e7768ff9ea87f2be Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Mar 2023 11:04:28 -0400 Subject: bcachefs: Fix for 'missing subvolume' error Subvolumes, including their root inodes, get deleted asynchronously after an unlink. But we still need to ensure that we tell the VFS the inode has been deleted, otherwise VFS writeback could fire after asynchronous deletion has finished, and try to write to an inode/subvolume that no longer exists. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 15ab77ebb8c6..828887abc261 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -442,19 +442,27 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, bch2_trans_init(&trans, c, 4, 1024); ret = commit_do(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL, - bch2_unlink_trans(&trans, - inode_inum(dir), &dir_u, - &inode_u, &dentry->d_name, - deleting_snapshot)); + BTREE_INSERT_NOFAIL, + bch2_unlink_trans(&trans, + inode_inum(dir), &dir_u, + &inode_u, &dentry->d_name, + deleting_snapshot)); + if (unlikely(ret)) + goto err; - if (likely(!ret)) { - bch2_inode_update_after_write(&trans, dir, &dir_u, - ATTR_MTIME|ATTR_CTIME); - bch2_inode_update_after_write(&trans, inode, &inode_u, - ATTR_MTIME); - } + bch2_inode_update_after_write(&trans, dir, &dir_u, + ATTR_MTIME|ATTR_CTIME); + bch2_inode_update_after_write(&trans, inode, &inode_u, + ATTR_MTIME); + if (inode_u.bi_subvol) { + /* + * Subvolume deletion is asynchronous, but we still want to tell + * the VFS that it's been deleted here: + */ + set_nlink(&inode->v, 0); + } +err: bch2_trans_exit(&trans); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); -- cgit v1.2.3 From 9edbcc72f6987bbb58f113d04e7704b7a84106a6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 15 Mar 2023 11:53:51 -0400 Subject: bcachefs: Fix bch2_evict_subvolume_inodes() This fixes a bug in bch2_evict_subvolume_inodes(): d_mark_dontcache() doesn't handle the case where i_count is already 0, we need to grab and put the inode in order for it to be dropped. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 4 +++ fs/bcachefs/darray.h | 15 +++++--- fs/bcachefs/fs.c | 93 ++++++++++++++++++++++++++++++++++---------------- fs/bcachefs/fs.h | 1 + fs/bcachefs/inode.c | 3 -- fs/bcachefs/super.c | 3 ++ 6 files changed, 81 insertions(+), 38 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 05fc0f7434dd..c1f27b4910a0 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -971,6 +971,10 @@ struct bch_fs { reflink_gc_table reflink_gc_table; size_t reflink_gc_nr; + /* fs.c */ + struct list_head vfs_inodes_list; + struct mutex vfs_inodes_lock; + /* VFS IO PATH - fs-io.c */ struct bio_set writepage_bioset; struct bio_set dio_write_bioset; diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index 519ab9b96e67..978ab7961f1b 100644 --- a/fs/bcachefs/darray.h +++ b/fs/bcachefs/darray.h @@ -19,11 +19,11 @@ struct { \ typedef DARRAY(void) darray_void; -static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more) +static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, gfp_t gfp) { if (d->nr + more > d->size) { size_t new_size = roundup_pow_of_two(d->nr + more); - void *data = krealloc_array(d->data, new_size, t_size, GFP_KERNEL); + void *data = krealloc_array(d->data, new_size, t_size, gfp); if (!data) return -ENOMEM; @@ -35,20 +35,25 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more) return 0; } +#define darray_make_room_gfp(_d, _more, _gfp) \ + __darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more), _gfp) + #define darray_make_room(_d, _more) \ - __darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more)) + darray_make_room_gfp(_d, _more, GFP_KERNEL) #define darray_top(_d) ((_d).data[(_d).nr]) -#define darray_push(_d, _item) \ +#define darray_push_gfp(_d, _item, _gfp) \ ({ \ - int _ret = darray_make_room((_d), 1); \ + int _ret = darray_make_room_gfp((_d), 1, _gfp); \ \ if (!_ret) \ (_d)->data[(_d)->nr++] = (_item); \ _ret; \ }) +#define darray_push(_d, _item) darray_push_gfp(_d, _item, GFP_KERNEL) + #define darray_insert_item(_d, _pos, _item) \ ({ \ size_t pos = (_pos); \ diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 828887abc261..129924dfaf69 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -201,6 +201,10 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) return ERR_PTR(ret); } + mutex_lock(&c->vfs_inodes_lock); + list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); + mutex_unlock(&c->vfs_inodes_lock); + unlock_new_inode(&inode->v); return &inode->v; @@ -314,6 +318,9 @@ err_before_quota: inode = old; } else { + mutex_lock(&c->vfs_inodes_lock); + list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); + mutex_unlock(&c->vfs_inodes_lock); /* * we really don't want insert_inode_locked2() to be setting * I_NEW... @@ -1370,6 +1377,7 @@ static struct inode *bch2_alloc_inode(struct super_block *sb) inode_init_once(&inode->v); mutex_init(&inode->ei_update_lock); two_state_lock_init(&inode->ei_pagecache_lock); + INIT_LIST_HEAD(&inode->ei_vfs_inode_list); mutex_init(&inode->ei_quota_lock); return &inode->v; @@ -1434,53 +1442,78 @@ static void bch2_evict_inode(struct inode *vinode) KEY_TYPE_QUOTA_WARN); bch2_inode_rm(c, inode_inum(inode)); } + + mutex_lock(&c->vfs_inodes_lock); + list_del_init(&inode->ei_vfs_inode_list); + mutex_unlock(&c->vfs_inodes_lock); } -void bch2_evict_subvolume_inodes(struct bch_fs *c, - snapshot_id_list *s) +void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s) { - struct super_block *sb = c->vfs_sb; - struct inode *inode; + struct bch_inode_info *inode, **i; + DARRAY(struct bch_inode_info *) grabbed; + bool clean_pass = false, this_pass_clean; - spin_lock(&sb->s_inode_list_lock); - list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) || - (inode->i_state & I_FREEING)) - continue; + /* + * Initially, we scan for inodes without I_DONTCACHE, then mark them to + * be pruned with d_mark_dontcache(). + * + * Once we've had a clean pass where we didn't find any inodes without + * I_DONTCACHE, we wait for them to be freed: + */ - d_mark_dontcache(inode); - d_prune_aliases(inode); - } - spin_unlock(&sb->s_inode_list_lock); + darray_init(&grabbed); + darray_make_room(&grabbed, 1024); again: cond_resched(); - spin_lock(&sb->s_inode_list_lock); - list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - if (!snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) || - (inode->i_state & I_FREEING)) + this_pass_clean = true; + + mutex_lock(&c->vfs_inodes_lock); + list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) { + if (!snapshot_list_has_id(s, inode->ei_subvol)) continue; - if (!(inode->i_state & I_DONTCACHE)) { - d_mark_dontcache(inode); - d_prune_aliases(inode); - } + if (!(inode->v.i_state & I_DONTCACHE) && + !(inode->v.i_state & I_FREEING)) { + this_pass_clean = false; + + d_mark_dontcache(&inode->v); + d_prune_aliases(&inode->v); + + /* + * If i_count was zero, we have to take and release a + * ref in order for I_DONTCACHE to be noticed and the + * inode to be dropped; + */ + + if (!atomic_read(&inode->v.i_count) && + igrab(&inode->v) && + darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) + break; + } else if (clean_pass && this_pass_clean) { + wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW); + DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW); - spin_lock(&inode->i_lock); - if (snapshot_list_has_id(s, to_bch_ei(inode)->ei_subvol) && - !(inode->i_state & I_FREEING)) { - wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_NEW); - DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); - spin_unlock(&inode->i_lock); - spin_unlock(&sb->s_inode_list_lock); + mutex_unlock(&c->vfs_inodes_lock); + schedule(); finish_wait(wq, &wait.wq_entry); goto again; } + } + mutex_unlock(&c->vfs_inodes_lock); - spin_unlock(&inode->i_lock); + darray_for_each(grabbed, i) + iput(&(*i)->v); + grabbed.nr = 0; + + if (!clean_pass || !this_pass_clean) { + clean_pass = this_pass_clean; + goto again; } - spin_unlock(&sb->s_inode_list_lock); + + darray_exit(&grabbed); } static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index e1c73a38c607..2e63cb6603bd 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -13,6 +13,7 @@ struct bch_inode_info { struct inode v; + struct list_head ei_vfs_inode_list; unsigned long ei_flags; struct mutex ei_update_lock; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 560545a7ea03..7ccbc00b7156 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -803,9 +803,6 @@ retry: bch2_inode_unpack(k, &inode_u); - /* Subvolume root? */ - BUG_ON(inode_u.bi_subvol); - bkey_inode_generation_init(&delete.k_i); delete.k.p = iter.pos; delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 278f8f19a230..d6f2f453c027 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -709,6 +709,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) sema_init(&c->io_in_flight, 128); + INIT_LIST_HEAD(&c->vfs_inodes_list); + mutex_init(&c->vfs_inodes_lock); + c->copy_gc_enabled = 1; c->rebalance.enabled = 1; c->promote_whole_extents = true; -- cgit v1.2.3 From 711bf946d55d28336dcc4f87209c8b74e6279481 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 15 Mar 2023 19:04:05 -0400 Subject: bcachefs: Add an assert in inode_write for -ENOENT Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 129924dfaf69..a57ab773dd27 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -105,6 +105,11 @@ retry: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; + bch2_fs_fatal_err_on(ret == -ENOENT, c, + "inode %u:%llu not found when updating", + inode_inum(inode).subvol, + inode_inum(inode).inum); + bch2_trans_exit(&trans); return ret < 0 ? ret : 0; } -- cgit v1.2.3 From dde72e182758e455891ff61c11746085db8c27c1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 30 Mar 2023 20:16:06 -0400 Subject: bcachefs: Add missing bch2_err_class() call We're not supposed to return our private error codes to userspace. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index a57ab773dd27..58a89c36cf0e 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1894,7 +1894,7 @@ out: err_put_super: deactivate_locked_super(sb); - return ERR_PTR(ret); + return ERR_PTR(bch2_err_class(ret)); } static void bch2_kill_sb(struct super_block *sb) -- cgit v1.2.3 From 550a6a496d33034878172ed789e03feaee6cee43 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 19 Mar 2023 16:47:30 -0400 Subject: bcachefs: Enable large folios Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 58a89c36cf0e..99082820e30b 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1369,6 +1369,8 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, inode->v.i_op = &bch_special_inode_operations; break; } + + mapping_set_large_folios(inode->v.i_mapping); } static struct inode *bch2_alloc_inode(struct super_block *sb) -- cgit v1.2.3 From e47a390aa5946e3c5bea7a4a350a88d3bb3ba5b4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 27 May 2023 19:59:59 -0400 Subject: bcachefs: Convert -ENOENT to private error codes As with previous conversions, replace -ENOENT uses with more informative private error codes. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 6 +++--- fs/bcachefs/btree_iter.h | 2 +- fs/bcachefs/buckets.c | 2 +- fs/bcachefs/chardev.c | 2 +- fs/bcachefs/errcode.h | 11 +++++++++++ fs/bcachefs/fs-common.c | 2 +- fs/bcachefs/fs-ioctl.c | 2 +- fs/bcachefs/fs.c | 6 +++--- fs/bcachefs/fsck.c | 24 ++++++++++++------------ fs/bcachefs/inode.c | 2 +- fs/bcachefs/move.c | 2 +- fs/bcachefs/movinggc.c | 2 +- fs/bcachefs/quota.c | 4 ++-- fs/bcachefs/recovery.c | 2 +- fs/bcachefs/str_hash.h | 4 ++-- fs/bcachefs/subvolume.c | 21 +++++++++++++-------- fs/bcachefs/super.c | 2 +- fs/bcachefs/xattr.c | 6 +++--- 18 files changed, 59 insertions(+), 43 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 2bf58aa89f71..3fe108bc2f08 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -236,7 +236,7 @@ retry: if (ret) { if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - if (ret != -ENOENT) + if (!bch2_err_matches(ret, ENOENT)) acl = ERR_PTR(ret); goto out; } @@ -287,7 +287,7 @@ int bch2_set_acl_trans(struct btree_trans *trans, subvol_inum inum, inum, &search); } - return ret == -ENOENT ? 0 : ret; + return bch2_err_matches(ret, ENOENT) ? 0 : ret; } int bch2_set_acl(struct mnt_idmap *idmap, @@ -368,7 +368,7 @@ int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum, &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), BTREE_ITER_INTENT); if (ret) - return ret == -ENOENT ? 0 : ret; + return bch2_err_matches(ret, ENOENT) ? 0 : ret; k = bch2_btree_iter_peek_slot(&iter); xattr = bkey_s_c_to_xattr(k); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 9a4dbf358fe5..5e5e2a5c715c 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -516,7 +516,7 @@ static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans, k = bch2_btree_iter_peek_slot(iter); if (!bkey_err(k) && type && k.k->type != type) - k = bkey_s_c_err(-ENOENT); + k = bkey_s_c_err(-BCH_ERR_ENOENT_bkey_type_mismatch); if (unlikely(bkey_err(k))) bch2_trans_iter_exit(trans, iter); return k; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index bd144182c1e1..adf3bd0e4a8f 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1455,7 +1455,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, BTREE_ITER_WITH_UPDATES, stripe); ret = PTR_ERR_OR_ZERO(s); if (unlikely(ret)) { - bch2_trans_inconsistent_on(ret == -ENOENT, trans, + bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans, "pointer to nonexistent stripe %llu", (u64) p.ec.idx); goto err; diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 28854a6c31b9..fb603df099a5 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -578,7 +578,7 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c, return i; } - return -ENOENT; + return -BCH_ERR_ENOENT_dev_idx_not_found; } static long bch2_ioctl_disk_resize(struct bch_fs *c, diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index c8ac08e5548b..acf9b92f9ab0 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -94,6 +94,17 @@ x(ENOSPC, ENOSPC_sb_crypt) \ x(ENOSPC, ENOSPC_btree_slot) \ x(ENOSPC, ENOSPC_snapshot_tree) \ + x(ENOENT, ENOENT_bkey_type_mismatch) \ + x(ENOENT, ENOENT_str_hash_lookup) \ + x(ENOENT, ENOENT_str_hash_set_must_replace) \ + x(ENOENT, ENOENT_inode) \ + x(ENOENT, ENOENT_not_subvol) \ + x(ENOENT, ENOENT_directory_dead) \ + x(ENOENT, ENOENT_subvolume) \ + x(ENOENT, ENOENT_snapshot_tree) \ + x(ENOENT, ENOENT_dirent_doesnt_match_inode) \ + x(ENOENT, ENOENT_dev_not_found) \ + x(ENOENT, ENOENT_dev_idx_not_found) \ x(0, open_buckets_empty) \ x(0, freelist_empty) \ x(BCH_ERR_freelist_empty, no_buckets_found) \ diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index 1f2e1fc4f6b2..bb5305441f27 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -281,7 +281,7 @@ int bch2_unlink_trans(struct btree_trans *trans, } if (deleting_snapshot && !inode_u->bi_subvol) { - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_not_subvol; goto err; } diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 269af9393824..dfa1bf73c854 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -382,7 +382,7 @@ retry: dir = dst_path.dentry->d_inode; if (IS_DEADDIR(dir)) { - error = -ENOENT; + error = -BCH_ERR_ENOENT_directory_dead; goto err3; } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 99082820e30b..ba7aff6b8a51 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -105,7 +105,7 @@ retry: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_fs_fatal_err_on(ret == -ENOENT, c, + bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c, "inode %u:%llu not found when updating", inode_inum(inode).subvol, inode_inum(inode).inum); @@ -1261,14 +1261,14 @@ retry: goto err; if (k.k->type != KEY_TYPE_dirent) { - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; goto err; } d = bkey_s_c_to_dirent(k); ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target); if (ret > 0) - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; if (ret) goto err; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 1b3ee66265c9..dcc55cbd3808 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -78,7 +78,7 @@ static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot, snapshot, &s); if (!ret) *subvol = le32_to_cpu(s.subvol); - else if (ret == -ENOENT) + else if (bch2_err_matches(ret, ENOENT)) bch_err(trans->c, "snapshot %u not fonud", snapshot); return ret; @@ -119,7 +119,7 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, goto err; if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) { - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_inode; goto err; } @@ -148,7 +148,7 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, ret = bkey_is_inode(k.k) ? bch2_inode_unpack(k, inode) - : -ENOENT; + : -BCH_ERR_ENOENT_inode; if (!ret) *snapshot = iter.pos.snapshot; err: @@ -333,7 +333,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 subvol, ret = __lookup_dirent(trans, root_hash_info, root_inum, &lostfound_str, &inum, &d_type); - if (ret == -ENOENT) { + if (bch2_err_matches(ret, ENOENT)) { bch_notice(c, "creating lost+found"); goto create_lostfound; } @@ -1088,7 +1088,7 @@ static int inode_backpointer_exists(struct btree_trans *trans, SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot)); ret = bkey_err(d); if (ret) - return ret == -ENOENT ? 0 : ret; + return bch2_err_matches(ret, ENOENT) ? 0 : ret; ret = dirent_points_to_inode(d, inode); bch2_trans_iter_exit(trans, &iter); @@ -1653,7 +1653,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ret = __subvol_lookup(trans, target_subvol, &target_snapshot, &target_inum); - if (ret && ret != -ENOENT) + if (ret && !bch2_err_matches(ret, ENOENT)) goto err; if (fsck_err_on(ret, c, @@ -1665,7 +1665,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ret = __lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); - if (ret && ret != -ENOENT) + if (ret && !bch2_err_matches(ret, ENOENT)) goto err; if (fsck_err_on(ret, c, @@ -1846,7 +1846,7 @@ static int check_root_trans(struct btree_trans *trans) int ret; ret = __subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); - if (ret && ret != -ENOENT) + if (ret && !bch2_err_matches(ret, ENOENT)) return ret; if (mustfix_fsck_err_on(ret, c, "root subvol missing")) { @@ -1873,7 +1873,7 @@ static int check_root_trans(struct btree_trans *trans) } ret = __lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); - if (ret && ret != -ENOENT) + if (ret && !bch2_err_matches(ret, ENOENT)) return ret; if (mustfix_fsck_err_on(ret, c, "root directory missing") || @@ -1972,15 +1972,15 @@ static int check_path(struct btree_trans *trans, PTR_ERR_OR_ZERO((d = dirent_get_by_pos(trans, &dirent_iter, SPOS(inode->bi_dir, inode->bi_dir_offset, parent_snapshot))).k)); - if (ret && ret != -ENOENT) + if (ret && !bch2_err_matches(ret, ENOENT)) break; if (!ret && !dirent_points_to_inode(d, inode)) { bch2_trans_iter_exit(trans, &dirent_iter); - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; } - if (ret == -ENOENT) { + if (bch2_err_matches(ret, ENOENT)) { if (fsck_err(c, "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu", inode->bi_inum, snapshot, bch2_d_type_str(inode_d_type(inode)), diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index ddcd7b125f32..64e8d1f8a2fa 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -336,7 +336,7 @@ int bch2_inode_peek(struct btree_trans *trans, if (ret) return ret; - ret = bkey_is_inode(k.k) ? 0 : -ENOENT; + ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode; if (ret) goto err; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 2ec30a3fd193..fd629136824b 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -427,7 +427,7 @@ static int lookup_inode(struct btree_trans *trans, struct bpos pos, goto err; if (!k.k || !bkey_eq(k.k->p, pos)) { - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_inode; goto err; } diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 0d96346d5040..6750767276f2 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -236,7 +236,7 @@ err: darray_exit(&buckets); /* no entries in LRU btree found, or got to end: */ - if (ret == -ENOENT) + if (bch2_err_matches(ret, ENOENT)) ret = 0; if (ret < 0 && !bch2_err_matches(ret, EROFS)) diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 56ba82cae19d..d20ec9764108 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -900,7 +900,7 @@ static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid, ret = -ENOENT; found: mutex_unlock(&q->lock); - return ret; + return bch2_err_class(ret); } static int bch2_set_quota_trans(struct btree_trans *trans, @@ -960,7 +960,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, bch2_set_quota_trans(&trans, &new_quota, qdq)) ?: __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq); - return ret; + return bch2_err_class(ret); } const struct quotactl_ops bch2_quotactl_operations = { diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index af76c029fb6a..e4983d144483 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1082,7 +1082,7 @@ static int bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) if (!bkey_is_inode(k.k)) { bch_err(trans->c, "root inode not found"); - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_inode; goto err; } diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 6178ae620ff1..ae21a8cca1b4 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -181,7 +181,7 @@ bch2_hash_lookup(struct btree_trans *trans, } bch2_trans_iter_exit(trans, iter); - return ret ?: -ENOENT; + return ret ?: -BCH_ERR_ENOENT_str_hash_lookup; } static __always_inline int @@ -288,7 +288,7 @@ found: not_found: if (!found && (flags & BCH_HASH_SET_MUST_REPLACE)) { - ret = -ENOENT; + ret = -BCH_ERR_ENOENT_str_hash_set_must_replace; } else if (found && (flags & BCH_HASH_SET_MUST_CREATE)) { ret = -EEXIST; } else { diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 388fa12bbd8b..f07b3e2b3226 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -37,8 +37,12 @@ int bch2_snapshot_tree_invalid(const struct bch_fs *c, struct bkey_s_c k, int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id, struct bch_snapshot_tree *s) { - return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id), - BTREE_ITER_WITH_UPDATES, snapshot_tree, s); + int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id), + BTREE_ITER_WITH_UPDATES, snapshot_tree, s); + + if (bch2_err_matches(ret, ENOENT)) + ret = -BCH_ERR_ENOENT_snapshot_tree; + return ret; } static struct bkey_i_snapshot_tree * @@ -284,6 +288,7 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_subvolume s; + bool found = false; int ret; for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, @@ -296,14 +301,14 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, continue; if (!BCH_SUBVOLUME_SNAP(s.v)) { *subvol_id = s.k->p.offset; - goto found; + found = true; + break; } } - ret = ret ?: -ENOENT; -found: + bch2_trans_iter_exit(trans, &iter); - if (bch2_err_matches(ret, ENOENT)) { + if (!ret && !found) { struct bkey_i_subvolume *s; *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root); @@ -1217,7 +1222,7 @@ int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol, k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_subvolumes, POS(0, subvol), BTREE_ITER_CACHED| BTREE_ITER_WITH_UPDATES); - ret = bkey_err(k) ?: k.k->type == KEY_TYPE_subvolume ? 0 : -ENOENT; + ret = bkey_err(k) ?: k.k->type == KEY_TYPE_subvolume ? 0 : -BCH_ERR_ENOENT_subvolume; if (likely(!ret)) *snapid = le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot); @@ -1444,7 +1449,7 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, BTREE_ITER_CACHED, subvolume); ret = PTR_ERR_OR_ZERO(src_subvol); if (unlikely(ret)) { - bch2_fs_inconsistent_on(ret == -ENOENT, c, + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, "subvolume %u not found", src_subvolid); goto err; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index af6cc73d9356..8f0cbd7ada82 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1833,7 +1833,7 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name) for_each_member_device_rcu(ca, c, i, NULL) if (!strcmp(name, ca->name)) goto found; - ca = ERR_PTR(-ENOENT); + ca = ERR_PTR(-BCH_ERR_ENOENT_dev_not_found); found: rcu_read_unlock(); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 448eb446946b..05c65d94c00f 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -163,7 +163,7 @@ static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info err2: bch2_trans_iter_exit(trans, &iter); err1: - return ret == -ENOENT ? -ENODATA : ret; + return ret < 0 && bch2_err_matches(ret, ENOENT) ? -ENODATA : ret; } int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, @@ -229,7 +229,7 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, hash_info, inum, &search); } - if (ret == -ENOENT) + if (bch2_err_matches(ret, ENOENT)) ret = flags & XATTR_REPLACE ? -ENODATA : 0; return ret; @@ -589,7 +589,7 @@ err: opt_id == Opt_background_target)) bch2_rebalance_add_work(c, inode->v.i_blocks); - return ret; + return bch2_err_class(ret); } static const struct xattr_handler bch_xattr_bcachefs_handler = { -- cgit v1.2.3 From b0e8c75e40a863dd40ecdf8fd6f8cdceacb965e5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 9 Jun 2023 15:41:41 -0400 Subject: bcachefs: Fix subvol deletion deadlock d_prune_aliases() may call bch2_evict_inode(), which needs c->vfs_inodes_list_lock. Fix this by always calling igrab() before putting the inodes onto our disposal list, and then calling d_prune_aliases() with c->vfs_inodes_lock dropped. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index ba7aff6b8a51..f417889eba08 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1481,22 +1481,14 @@ again: continue; if (!(inode->v.i_state & I_DONTCACHE) && - !(inode->v.i_state & I_FREEING)) { + !(inode->v.i_state & I_FREEING) && + igrab(&inode->v)) { this_pass_clean = false; - d_mark_dontcache(&inode->v); - d_prune_aliases(&inode->v); - - /* - * If i_count was zero, we have to take and release a - * ref in order for I_DONTCACHE to be noticed and the - * inode to be dropped; - */ - - if (!atomic_read(&inode->v.i_count) && - igrab(&inode->v) && - darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) + if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) { + iput(&inode->v); break; + } } else if (clean_pass && this_pass_clean) { wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW); DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW); @@ -1511,8 +1503,12 @@ again: } mutex_unlock(&c->vfs_inodes_lock); - darray_for_each(grabbed, i) - iput(&(*i)->v); + darray_for_each(grabbed, i) { + inode = *i; + d_mark_dontcache(&inode->v); + d_prune_aliases(&inode->v); + iput(&inode->v); + } grabbed.nr = 0; if (!clean_pass || !this_pass_clean) { -- cgit v1.2.3 From a83e108fc1964b8273c6f51cc62588ee774a5a48 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 Jun 2023 21:12:05 -0400 Subject: bcachefs: fiemap: Fix a lockdep splat As with the previous patch, we generally can't hold btree locks while copying to userspace, as that may incur a page fault and require mmap_lock. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index f417889eba08..9280f514bc9f 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -943,6 +943,7 @@ retry: cur.k->k.p.offset += cur.k->k.size; if (have_extent) { + bch2_trans_unlock(&trans); ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), 0); if (ret) @@ -961,9 +962,11 @@ err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - if (!ret && have_extent) + if (!ret && have_extent) { + bch2_trans_unlock(&trans); ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), FIEMAP_EXTENT_LAST); + } bch2_trans_exit(&trans); bch2_bkey_buf_exit(&cur, c); -- cgit v1.2.3 From 5eaa76d813d48a524a1ce040539048b851a0a20c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 13 Jul 2023 18:00:28 +0200 Subject: bcachefs: mark bch_inode_info and bkey_cached as reclaimable Mark these caches as reclaimable, so that available memory is correctly reported when there is a lot of cached inodes. Note that more work is needed - you should add __GFP_RECLAIMABLE to some of the kmalloc calls, so that they are allocated from the "kmalloc-rcl-*" caches. Signed-off-by: Mikulas Patocka Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 2 +- fs/bcachefs/fs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 34d959c4e640..badb541f493f 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -1066,7 +1066,7 @@ void bch2_btree_key_cache_exit(void) int __init bch2_btree_key_cache_init(void) { - bch2_key_cache = KMEM_CACHE(bkey_cached, 0); + bch2_key_cache = KMEM_CACHE(bkey_cached, SLAB_RECLAIM_ACCOUNT); if (!bch2_key_cache) return -ENOMEM; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 9280f514bc9f..e1824bdffdf8 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1926,7 +1926,7 @@ int __init bch2_vfs_init(void) { int ret = -ENOMEM; - bch2_inode_cache = KMEM_CACHE(bch_inode_info, 0); + bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT); if (!bch2_inode_cache) goto err; -- cgit v1.2.3 From e691b391f02b2ddef1a784ea2d4cd3f46bb6a62a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 6 Aug 2023 10:04:05 -0400 Subject: bcachefs: Add logging to bch2_inode_peek() & related Add error messages when we fail to lookup an inode, and also add a few missing bch2_err_class() calls. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 4 ++-- fs/bcachefs/fs.c | 9 +++++++-- fs/bcachefs/inode.c | 2 ++ 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 3b39597a677a..30b3d7b9f9dc 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -294,8 +294,8 @@ do { \ #define bch_err_fn(_c, _ret) \ bch_err(_c, "%s(): error %s", __func__, bch2_err_str(_ret)) -#define bch_err_msg(_c, _ret, _msg) \ - bch_err(_c, "%s(): error " _msg " %s", __func__, bch2_err_str(_ret)) +#define bch_err_msg(_c, _ret, _msg, ...) \ + bch_err(_c, "%s(): error " _msg " %s", __func__, ##__VA_ARGS__, bch2_err_str(_ret)) #define bch_verbose(c, fmt, ...) \ do { \ diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index e1824bdffdf8..695b8bc55590 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -203,7 +203,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) if (ret) { iget_failed(&inode->v); - return ERR_PTR(ret); + return ERR_PTR(bch2_err_class(ret)); } mutex_lock(&c->vfs_inodes_lock); @@ -1000,11 +1000,16 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; + int ret; if (!dir_emit_dots(file, ctx)) return 0; - return bch2_readdir(c, inode_inum(inode), ctx); + ret = bch2_readdir(c, inode_inum(inode), ctx); + if (ret) + bch_err_fn(c, ret); + + return bch2_err_class(ret); } static const struct file_operations bch_file_operations = { diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 294966e42850..e0d416553bf0 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -348,6 +348,8 @@ int bch2_inode_peek(struct btree_trans *trans, return 0; err: bch2_trans_iter_exit(trans, iter); + if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err_msg(trans->c, ret, "looking up inum %u:%llu:", inum.subvol, inum.inum); return ret; } -- cgit v1.2.3 From dbbfca9f41e86903501dded3fd494e1a56f3c310 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 3 Aug 2023 18:18:21 -0400 Subject: bcachefs: Split up fs-io.[ch] fs-io.c is too big - time for some reorganization - fs-dio.c: direct io - fs-pagecache.c: pagecache data structures (bch_folio), utility code Signed-off-by: Kent Overstreet --- fs/bcachefs/Makefile | 3 + fs/bcachefs/fs-io-buffered.c | 1098 +++++++++++++++ fs/bcachefs/fs-io-buffered.h | 27 + fs/bcachefs/fs-io-direct.c | 679 ++++++++++ fs/bcachefs/fs-io-direct.h | 16 + fs/bcachefs/fs-io-pagecache.c | 780 +++++++++++ fs/bcachefs/fs-io-pagecache.h | 176 +++ fs/bcachefs/fs-io.c | 2975 ++--------------------------------------- fs/bcachefs/fs-io.h | 166 ++- fs/bcachefs/fs.c | 3 + fs/bcachefs/super.c | 8 +- 11 files changed, 3042 insertions(+), 2889 deletions(-) create mode 100644 fs/bcachefs/fs-io-buffered.c create mode 100644 fs/bcachefs/fs-io-buffered.h create mode 100644 fs/bcachefs/fs-io-direct.c create mode 100644 fs/bcachefs/fs-io-direct.h create mode 100644 fs/bcachefs/fs-io-pagecache.c create mode 100644 fs/bcachefs/fs-io-pagecache.h (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 55b6d85d55c3..13cacf2d8bfb 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -38,6 +38,9 @@ bcachefs-y := \ fs-common.o \ fs-ioctl.o \ fs-io.o \ + fs-io-buffered.o \ + fs-io-direct.o \ + fs-io-pagecache.o \ fsck.o \ inode.o \ io.o \ diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c new file mode 100644 index 000000000000..102c70887f76 --- /dev/null +++ b/fs/bcachefs/fs-io-buffered.c @@ -0,0 +1,1098 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef NO_BCACHEFS_FS + +#include "bcachefs.h" +#include "alloc_foreground.h" +#include "bkey_buf.h" +#include "fs-io.h" +#include "fs-io-buffered.h" +#include "fs-io-direct.h" +#include "fs-io-pagecache.h" +#include "io.h" + +#include +#include +#include + +static inline bool bio_full(struct bio *bio, unsigned len) +{ + if (bio->bi_vcnt >= bio->bi_max_vecs) + return true; + if (bio->bi_iter.bi_size > UINT_MAX - len) + return true; + return false; +} + +/* readpage(s): */ + +static void bch2_readpages_end_io(struct bio *bio) +{ + struct folio_iter fi; + + bio_for_each_folio_all(fi, bio) { + if (!bio->bi_status) { + folio_mark_uptodate(fi.folio); + } else { + folio_clear_uptodate(fi.folio); + folio_set_error(fi.folio); + } + folio_unlock(fi.folio); + } + + bio_put(bio); +} + +struct readpages_iter { + struct address_space *mapping; + unsigned idx; + folios folios; +}; + +static int readpages_iter_init(struct readpages_iter *iter, + struct readahead_control *ractl) +{ + struct folio **fi; + int ret; + + memset(iter, 0, sizeof(*iter)); + + iter->mapping = ractl->mapping; + + ret = bch2_filemap_get_contig_folios_d(iter->mapping, + ractl->_index << PAGE_SHIFT, + (ractl->_index + ractl->_nr_pages) << PAGE_SHIFT, + 0, mapping_gfp_mask(iter->mapping), + &iter->folios); + if (ret) + return ret; + + darray_for_each(iter->folios, fi) { + ractl->_nr_pages -= 1U << folio_order(*fi); + __bch2_folio_create(*fi, __GFP_NOFAIL|GFP_KERNEL); + folio_put(*fi); + folio_put(*fi); + } + + return 0; +} + +static inline struct folio *readpage_iter_peek(struct readpages_iter *iter) +{ + if (iter->idx >= iter->folios.nr) + return NULL; + return iter->folios.data[iter->idx]; +} + +static inline void readpage_iter_advance(struct readpages_iter *iter) +{ + iter->idx++; +} + +static bool extent_partial_reads_expensive(struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *i; + + bkey_for_each_crc(k.k, ptrs, crc, i) + if (crc.csum_type || crc.compression_type) + return true; + return false; +} + +static int readpage_bio_extend(struct btree_trans *trans, + struct readpages_iter *iter, + struct bio *bio, + unsigned sectors_this_extent, + bool get_more) +{ + /* Don't hold btree locks while allocating memory: */ + bch2_trans_unlock(trans); + + while (bio_sectors(bio) < sectors_this_extent && + bio->bi_vcnt < bio->bi_max_vecs) { + struct folio *folio = readpage_iter_peek(iter); + int ret; + + if (folio) { + readpage_iter_advance(iter); + } else { + pgoff_t folio_offset = bio_end_sector(bio) >> PAGE_SECTORS_SHIFT; + + if (!get_more) + break; + + folio = xa_load(&iter->mapping->i_pages, folio_offset); + if (folio && !xa_is_value(folio)) + break; + + folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), 0); + if (!folio) + break; + + if (!__bch2_folio_create(folio, GFP_KERNEL)) { + folio_put(folio); + break; + } + + ret = filemap_add_folio(iter->mapping, folio, folio_offset, GFP_KERNEL); + if (ret) { + __bch2_folio_release(folio); + folio_put(folio); + break; + } + + folio_put(folio); + } + + BUG_ON(folio_sector(folio) != bio_end_sector(bio)); + + BUG_ON(!bio_add_folio(bio, folio, folio_size(folio), 0)); + } + + return bch2_trans_relock(trans); +} + +static void bchfs_read(struct btree_trans *trans, + struct bch_read_bio *rbio, + subvol_inum inum, + struct readpages_iter *readpages_iter) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_buf sk; + int flags = BCH_READ_RETRY_IF_STALE| + BCH_READ_MAY_PROMOTE; + u32 snapshot; + int ret = 0; + + rbio->c = c; + rbio->start_time = local_clock(); + rbio->subvol = inum.subvol; + + bch2_bkey_buf_init(&sk); +retry: + bch2_trans_begin(trans); + iter = (struct btree_iter) { NULL }; + + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + if (ret) + goto err; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, + SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot), + BTREE_ITER_SLOTS); + while (1) { + struct bkey_s_c k; + unsigned bytes, sectors, offset_into_extent; + enum btree_id data_btree = BTREE_ID_extents; + + /* + * read_extent -> io_time_reset may cause a transaction restart + * without returning an error, we need to check for that here: + */ + ret = bch2_trans_relock(trans); + if (ret) + break; + + bch2_btree_iter_set_pos(&iter, + POS(inum.inum, rbio->bio.bi_iter.bi_sector)); + + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + break; + + offset_into_extent = iter.pos.offset - + bkey_start_offset(k.k); + sectors = k.k->size - offset_into_extent; + + bch2_bkey_buf_reassemble(&sk, c, k); + + ret = bch2_read_indirect_extent(trans, &data_btree, + &offset_into_extent, &sk); + if (ret) + break; + + k = bkey_i_to_s_c(sk.k); + + sectors = min(sectors, k.k->size - offset_into_extent); + + if (readpages_iter) { + ret = readpage_bio_extend(trans, readpages_iter, &rbio->bio, sectors, + extent_partial_reads_expensive(k)); + if (ret) + break; + } + + bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; + swap(rbio->bio.bi_iter.bi_size, bytes); + + if (rbio->bio.bi_iter.bi_size == bytes) + flags |= BCH_READ_LAST_FRAGMENT; + + bch2_bio_page_state_set(&rbio->bio, k); + + bch2_read_extent(trans, rbio, iter.pos, + data_btree, k, offset_into_extent, flags); + + if (flags & BCH_READ_LAST_FRAGMENT) + break; + + swap(rbio->bio.bi_iter.bi_size, bytes); + bio_advance(&rbio->bio, bytes); + + ret = btree_trans_too_many_iters(trans); + if (ret) + break; + } +err: + bch2_trans_iter_exit(trans, &iter); + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto retry; + + if (ret) { + bch_err_inum_offset_ratelimited(c, + iter.pos.inode, + iter.pos.offset << 9, + "read error %i from btree lookup", ret); + rbio->bio.bi_status = BLK_STS_IOERR; + bio_endio(&rbio->bio); + } + + bch2_bkey_buf_exit(&sk, c); +} + +void bch2_readahead(struct readahead_control *ractl) +{ + struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_io_opts opts; + struct btree_trans trans; + struct folio *folio; + struct readpages_iter readpages_iter; + int ret; + + bch2_inode_opts_get(&opts, c, &inode->ei_inode); + + ret = readpages_iter_init(&readpages_iter, ractl); + BUG_ON(ret); + + bch2_trans_init(&trans, c, 0, 0); + + bch2_pagecache_add_get(inode); + + while ((folio = readpage_iter_peek(&readpages_iter))) { + unsigned n = min_t(unsigned, + readpages_iter.folios.nr - + readpages_iter.idx, + BIO_MAX_VECS); + struct bch_read_bio *rbio = + rbio_init(bio_alloc_bioset(NULL, n, REQ_OP_READ, + GFP_KERNEL, &c->bio_read), + opts); + + readpage_iter_advance(&readpages_iter); + + rbio->bio.bi_iter.bi_sector = folio_sector(folio); + rbio->bio.bi_end_io = bch2_readpages_end_io; + BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); + + bchfs_read(&trans, rbio, inode_inum(inode), + &readpages_iter); + bch2_trans_unlock(&trans); + } + + bch2_pagecache_add_put(inode); + + bch2_trans_exit(&trans); + darray_exit(&readpages_iter.folios); +} + +static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio, + subvol_inum inum, struct folio *folio) +{ + struct btree_trans trans; + + bch2_folio_create(folio, __GFP_NOFAIL); + + rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; + rbio->bio.bi_iter.bi_sector = folio_sector(folio); + BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); + + bch2_trans_init(&trans, c, 0, 0); + bchfs_read(&trans, rbio, inum, NULL); + bch2_trans_exit(&trans); +} + +static void bch2_read_single_folio_end_io(struct bio *bio) +{ + complete(bio->bi_private); +} + +int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) +{ + struct bch_inode_info *inode = to_bch_ei(mapping->host); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_read_bio *rbio; + struct bch_io_opts opts; + int ret; + DECLARE_COMPLETION_ONSTACK(done); + + bch2_inode_opts_get(&opts, c, &inode->ei_inode); + + rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read), + opts); + rbio->bio.bi_private = &done; + rbio->bio.bi_end_io = bch2_read_single_folio_end_io; + + __bchfs_readfolio(c, rbio, inode_inum(inode), folio); + wait_for_completion(&done); + + ret = blk_status_to_errno(rbio->bio.bi_status); + bio_put(&rbio->bio); + + if (ret < 0) + return ret; + + folio_mark_uptodate(folio); + return 0; +} + +int bch2_read_folio(struct file *file, struct folio *folio) +{ + int ret; + + ret = bch2_read_single_folio(folio, folio->mapping); + folio_unlock(folio); + return bch2_err_class(ret); +} + +/* writepages: */ + +struct bch_writepage_io { + struct bch_inode_info *inode; + + /* must be last: */ + struct bch_write_op op; +}; + +struct bch_writepage_state { + struct bch_writepage_io *io; + struct bch_io_opts opts; + struct bch_folio_sector *tmp; + unsigned tmp_sectors; +}; + +static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs *c, + struct bch_inode_info *inode) +{ + struct bch_writepage_state ret = { 0 }; + + bch2_inode_opts_get(&ret.opts, c, &inode->ei_inode); + return ret; +} + +static void bch2_writepage_io_done(struct bch_write_op *op) +{ + struct bch_writepage_io *io = + container_of(op, struct bch_writepage_io, op); + struct bch_fs *c = io->op.c; + struct bio *bio = &io->op.wbio.bio; + struct folio_iter fi; + unsigned i; + + if (io->op.error) { + set_bit(EI_INODE_ERROR, &io->inode->ei_flags); + + bio_for_each_folio_all(fi, bio) { + struct bch_folio *s; + + folio_set_error(fi.folio); + mapping_set_error(fi.folio->mapping, -EIO); + + s = __bch2_folio(fi.folio); + spin_lock(&s->lock); + for (i = 0; i < folio_sectors(fi.folio); i++) + s->s[i].nr_replicas = 0; + spin_unlock(&s->lock); + } + } + + if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) { + bio_for_each_folio_all(fi, bio) { + struct bch_folio *s; + + s = __bch2_folio(fi.folio); + spin_lock(&s->lock); + for (i = 0; i < folio_sectors(fi.folio); i++) + s->s[i].nr_replicas = 0; + spin_unlock(&s->lock); + } + } + + /* + * racing with fallocate can cause us to add fewer sectors than + * expected - but we shouldn't add more sectors than expected: + */ + WARN_ON_ONCE(io->op.i_sectors_delta > 0); + + /* + * (error (due to going RO) halfway through a page can screw that up + * slightly) + * XXX wtf? + BUG_ON(io->op.op.i_sectors_delta >= PAGE_SECTORS); + */ + + /* + * PageWriteback is effectively our ref on the inode - fixup i_blocks + * before calling end_page_writeback: + */ + bch2_i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta); + + bio_for_each_folio_all(fi, bio) { + struct bch_folio *s = __bch2_folio(fi.folio); + + if (atomic_dec_and_test(&s->write_count)) + folio_end_writeback(fi.folio); + } + + bio_put(&io->op.wbio.bio); +} + +static void bch2_writepage_do_io(struct bch_writepage_state *w) +{ + struct bch_writepage_io *io = w->io; + + w->io = NULL; + closure_call(&io->op.cl, bch2_write, NULL, NULL); +} + +/* + * Get a bch_writepage_io and add @page to it - appending to an existing one if + * possible, else allocating a new one: + */ +static void bch2_writepage_io_alloc(struct bch_fs *c, + struct writeback_control *wbc, + struct bch_writepage_state *w, + struct bch_inode_info *inode, + u64 sector, + unsigned nr_replicas) +{ + struct bch_write_op *op; + + w->io = container_of(bio_alloc_bioset(NULL, BIO_MAX_VECS, + REQ_OP_WRITE, + GFP_KERNEL, + &c->writepage_bioset), + struct bch_writepage_io, op.wbio.bio); + + w->io->inode = inode; + op = &w->io->op; + bch2_write_op_init(op, c, w->opts); + op->target = w->opts.foreground_target; + op->nr_replicas = nr_replicas; + op->res.nr_replicas = nr_replicas; + op->write_point = writepoint_hashed(inode->ei_last_dirtied); + op->subvol = inode->ei_subvol; + op->pos = POS(inode->v.i_ino, sector); + op->end_io = bch2_writepage_io_done; + op->devs_need_flush = &inode->ei_devs_need_flush; + op->wbio.bio.bi_iter.bi_sector = sector; + op->wbio.bio.bi_opf = wbc_to_write_flags(wbc); +} + +static int __bch2_writepage(struct folio *folio, + struct writeback_control *wbc, + void *data) +{ + struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_writepage_state *w = data; + struct bch_folio *s; + unsigned i, offset, f_sectors, nr_replicas_this_write = U32_MAX; + loff_t i_size = i_size_read(&inode->v); + int ret; + + EBUG_ON(!folio_test_uptodate(folio)); + + /* Is the folio fully inside i_size? */ + if (folio_end_pos(folio) <= i_size) + goto do_io; + + /* Is the folio fully outside i_size? (truncate in progress) */ + if (folio_pos(folio) >= i_size) { + folio_unlock(folio); + return 0; + } + + /* + * The folio straddles i_size. It must be zeroed out on each and every + * writepage invocation because it may be mmapped. "A file is mapped + * in multiples of the folio size. For a file that is not a multiple of + * the folio size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + folio_zero_segment(folio, + i_size - folio_pos(folio), + folio_size(folio)); +do_io: + f_sectors = folio_sectors(folio); + s = bch2_folio(folio); + + if (f_sectors > w->tmp_sectors) { + kfree(w->tmp); + w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), __GFP_NOFAIL); + w->tmp_sectors = f_sectors; + } + + /* + * Things get really hairy with errors during writeback: + */ + ret = bch2_get_folio_disk_reservation(c, inode, folio, false); + BUG_ON(ret); + + /* Before unlocking the page, get copy of reservations: */ + spin_lock(&s->lock); + memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors); + + for (i = 0; i < f_sectors; i++) { + if (s->s[i].state < SECTOR_dirty) + continue; + + nr_replicas_this_write = + min_t(unsigned, nr_replicas_this_write, + s->s[i].nr_replicas + + s->s[i].replicas_reserved); + } + + for (i = 0; i < f_sectors; i++) { + if (s->s[i].state < SECTOR_dirty) + continue; + + s->s[i].nr_replicas = w->opts.compression + ? 0 : nr_replicas_this_write; + + s->s[i].replicas_reserved = 0; + bch2_folio_sector_set(folio, s, i, SECTOR_allocated); + } + spin_unlock(&s->lock); + + BUG_ON(atomic_read(&s->write_count)); + atomic_set(&s->write_count, 1); + + BUG_ON(folio_test_writeback(folio)); + folio_start_writeback(folio); + + folio_unlock(folio); + + offset = 0; + while (1) { + unsigned sectors = 0, dirty_sectors = 0, reserved_sectors = 0; + u64 sector; + + while (offset < f_sectors && + w->tmp[offset].state < SECTOR_dirty) + offset++; + + if (offset == f_sectors) + break; + + while (offset + sectors < f_sectors && + w->tmp[offset + sectors].state >= SECTOR_dirty) { + reserved_sectors += w->tmp[offset + sectors].replicas_reserved; + dirty_sectors += w->tmp[offset + sectors].state == SECTOR_dirty; + sectors++; + } + BUG_ON(!sectors); + + sector = folio_sector(folio) + offset; + + if (w->io && + (w->io->op.res.nr_replicas != nr_replicas_this_write || + bio_full(&w->io->op.wbio.bio, sectors << 9) || + w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >= + (BIO_MAX_VECS * PAGE_SIZE) || + bio_end_sector(&w->io->op.wbio.bio) != sector)) + bch2_writepage_do_io(w); + + if (!w->io) + bch2_writepage_io_alloc(c, wbc, w, inode, sector, + nr_replicas_this_write); + + atomic_inc(&s->write_count); + + BUG_ON(inode != w->io->inode); + BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio, + sectors << 9, offset << 9)); + + /* Check for writing past i_size: */ + WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) > + round_up(i_size, block_bytes(c)) && + !test_bit(BCH_FS_EMERGENCY_RO, &c->flags), + "writing past i_size: %llu > %llu (unrounded %llu)\n", + bio_end_sector(&w->io->op.wbio.bio) << 9, + round_up(i_size, block_bytes(c)), + i_size); + + w->io->op.res.sectors += reserved_sectors; + w->io->op.i_sectors_delta -= dirty_sectors; + w->io->op.new_i_size = i_size; + + offset += sectors; + } + + if (atomic_dec_and_test(&s->write_count)) + folio_end_writeback(folio); + + return 0; +} + +int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc) +{ + struct bch_fs *c = mapping->host->i_sb->s_fs_info; + struct bch_writepage_state w = + bch_writepage_state_init(c, to_bch_ei(mapping->host)); + struct blk_plug plug; + int ret; + + blk_start_plug(&plug); + ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w); + if (w.io) + bch2_writepage_do_io(&w); + blk_finish_plug(&plug); + kfree(w.tmp); + return bch2_err_class(ret); +} + +/* buffered writes: */ + +int bch2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, + struct page **pagep, void **fsdata) +{ + struct bch_inode_info *inode = to_bch_ei(mapping->host); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch2_folio_reservation *res; + struct folio *folio; + unsigned offset; + int ret = -ENOMEM; + + res = kmalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + bch2_folio_reservation_init(c, inode, res); + *fsdata = res; + + bch2_pagecache_add_get(inode); + + folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, + FGP_LOCK|FGP_WRITE|FGP_CREAT|FGP_STABLE, + mapping_gfp_mask(mapping)); + if (IS_ERR_OR_NULL(folio)) + goto err_unlock; + + if (folio_test_uptodate(folio)) + goto out; + + offset = pos - folio_pos(folio); + len = min_t(size_t, len, folio_end_pos(folio) - pos); + + /* If we're writing entire folio, don't need to read it in first: */ + if (!offset && len == folio_size(folio)) + goto out; + + if (!offset && pos + len >= inode->v.i_size) { + folio_zero_segment(folio, len, folio_size(folio)); + flush_dcache_folio(folio); + goto out; + } + + if (folio_pos(folio) >= inode->v.i_size) { + folio_zero_segments(folio, 0, offset, offset + len, folio_size(folio)); + flush_dcache_folio(folio); + goto out; + } +readpage: + ret = bch2_read_single_folio(folio, mapping); + if (ret) + goto err; +out: + ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); + if (ret) + goto err; + + ret = bch2_folio_reservation_get(c, inode, folio, res, offset, len); + if (ret) { + if (!folio_test_uptodate(folio)) { + /* + * If the folio hasn't been read in, we won't know if we + * actually need a reservation - we don't actually need + * to read here, we just need to check if the folio is + * fully backed by uncompressed data: + */ + goto readpage; + } + + goto err; + } + + *pagep = &folio->page; + return 0; +err: + folio_unlock(folio); + folio_put(folio); + *pagep = NULL; +err_unlock: + bch2_pagecache_add_put(inode); + kfree(res); + *fsdata = NULL; + return bch2_err_class(ret); +} + +int bch2_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct bch_inode_info *inode = to_bch_ei(mapping->host); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch2_folio_reservation *res = fsdata; + struct folio *folio = page_folio(page); + unsigned offset = pos - folio_pos(folio); + + lockdep_assert_held(&inode->v.i_rwsem); + BUG_ON(offset + copied > folio_size(folio)); + + if (unlikely(copied < len && !folio_test_uptodate(folio))) { + /* + * The folio needs to be read in, but that would destroy + * our partial write - simplest thing is to just force + * userspace to redo the write: + */ + folio_zero_range(folio, 0, folio_size(folio)); + flush_dcache_folio(folio); + copied = 0; + } + + spin_lock(&inode->v.i_lock); + if (pos + copied > inode->v.i_size) + i_size_write(&inode->v, pos + copied); + spin_unlock(&inode->v.i_lock); + + if (copied) { + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + + bch2_set_folio_dirty(c, inode, folio, res, offset, copied); + + inode->ei_last_dirtied = (unsigned long) current; + } + + folio_unlock(folio); + folio_put(folio); + bch2_pagecache_add_put(inode); + + bch2_folio_reservation_put(c, inode, res); + kfree(res); + + return copied; +} + +static noinline void folios_trunc(folios *folios, struct folio **fi) +{ + while (folios->data + folios->nr > fi) { + struct folio *f = darray_pop(folios); + + folio_unlock(f); + folio_put(f); + } +} + +static int __bch2_buffered_write(struct bch_inode_info *inode, + struct address_space *mapping, + struct iov_iter *iter, + loff_t pos, unsigned len) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch2_folio_reservation res; + folios folios; + struct folio **fi, *f; + unsigned copied = 0, f_offset; + u64 end = pos + len, f_pos; + loff_t last_folio_pos = inode->v.i_size; + int ret = 0; + + BUG_ON(!len); + + bch2_folio_reservation_init(c, inode, &res); + darray_init(&folios); + + ret = bch2_filemap_get_contig_folios_d(mapping, pos, end, + FGP_LOCK|FGP_WRITE|FGP_STABLE|FGP_CREAT, + mapping_gfp_mask(mapping), + &folios); + if (ret) + goto out; + + BUG_ON(!folios.nr); + + f = darray_first(folios); + if (pos != folio_pos(f) && !folio_test_uptodate(f)) { + ret = bch2_read_single_folio(f, mapping); + if (ret) + goto out; + } + + f = darray_last(folios); + end = min(end, folio_end_pos(f)); + last_folio_pos = folio_pos(f); + if (end != folio_end_pos(f) && !folio_test_uptodate(f)) { + if (end >= inode->v.i_size) { + folio_zero_range(f, 0, folio_size(f)); + } else { + ret = bch2_read_single_folio(f, mapping); + if (ret) + goto out; + } + } + + ret = bch2_folio_set(c, inode_inum(inode), folios.data, folios.nr); + if (ret) + goto out; + + f_pos = pos; + f_offset = pos - folio_pos(darray_first(folios)); + darray_for_each(folios, fi) { + struct folio *f = *fi; + u64 f_len = min(end, folio_end_pos(f)) - f_pos; + + /* + * XXX: per POSIX and fstests generic/275, on -ENOSPC we're + * supposed to write as much as we have disk space for. + * + * On failure here we should still write out a partial page if + * we aren't completely out of disk space - we don't do that + * yet: + */ + ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len); + if (unlikely(ret)) { + folios_trunc(&folios, fi); + if (!folios.nr) + goto out; + + end = min(end, folio_end_pos(darray_last(folios))); + break; + } + + f_pos = folio_end_pos(f); + f_offset = 0; + } + + if (mapping_writably_mapped(mapping)) + darray_for_each(folios, fi) + flush_dcache_folio(*fi); + + f_pos = pos; + f_offset = pos - folio_pos(darray_first(folios)); + darray_for_each(folios, fi) { + struct folio *f = *fi; + u64 f_len = min(end, folio_end_pos(f)) - f_pos; + unsigned f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter); + + if (!f_copied) { + folios_trunc(&folios, fi); + break; + } + + if (!folio_test_uptodate(f) && + f_copied != folio_size(f) && + pos + copied + f_copied < inode->v.i_size) { + folio_zero_range(f, 0, folio_size(f)); + folios_trunc(&folios, fi); + break; + } + + flush_dcache_folio(f); + copied += f_copied; + + if (f_copied != f_len) { + folios_trunc(&folios, fi + 1); + break; + } + + f_pos = folio_end_pos(f); + f_offset = 0; + } + + if (!copied) + goto out; + + end = pos + copied; + + spin_lock(&inode->v.i_lock); + if (end > inode->v.i_size) + i_size_write(&inode->v, end); + spin_unlock(&inode->v.i_lock); + + f_pos = pos; + f_offset = pos - folio_pos(darray_first(folios)); + darray_for_each(folios, fi) { + struct folio *f = *fi; + u64 f_len = min(end, folio_end_pos(f)) - f_pos; + + if (!folio_test_uptodate(f)) + folio_mark_uptodate(f); + + bch2_set_folio_dirty(c, inode, f, &res, f_offset, f_len); + + f_pos = folio_end_pos(f); + f_offset = 0; + } + + inode->ei_last_dirtied = (unsigned long) current; +out: + darray_for_each(folios, fi) { + folio_unlock(*fi); + folio_put(*fi); + } + + /* + * If the last folio added to the mapping starts beyond current EOF, we + * performed a short write but left around at least one post-EOF folio. + * Clean up the mapping before we return. + */ + if (last_folio_pos >= inode->v.i_size) + truncate_pagecache(&inode->v, inode->v.i_size); + + darray_exit(&folios); + bch2_folio_reservation_put(c, inode, &res); + + return copied ?: ret; +} + +static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct bch_inode_info *inode = file_bch_inode(file); + loff_t pos = iocb->ki_pos; + ssize_t written = 0; + int ret = 0; + + bch2_pagecache_add_get(inode); + + do { + unsigned offset = pos & (PAGE_SIZE - 1); + unsigned bytes = iov_iter_count(iter); +again: + /* + * Bring in the user page that we will copy from _first_. + * Otherwise there's a nasty deadlock on copying from the + * same page as we're writing to, without it being marked + * up-to-date. + * + * Not only is this an optimisation, but it is also required + * to check that the address is actually valid, when atomic + * usercopies are used, below. + */ + if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { + bytes = min_t(unsigned long, iov_iter_count(iter), + PAGE_SIZE - offset); + + if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { + ret = -EFAULT; + break; + } + } + + if (unlikely(fatal_signal_pending(current))) { + ret = -EINTR; + break; + } + + ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes); + if (unlikely(ret < 0)) + break; + + cond_resched(); + + if (unlikely(ret == 0)) { + /* + * If we were unable to copy any data at all, we must + * fall back to a single segment length write. + * + * If we didn't fallback here, we could livelock + * because not all segments in the iov can be copied at + * once without a pagefault. + */ + bytes = min_t(unsigned long, PAGE_SIZE - offset, + iov_iter_single_seg_count(iter)); + goto again; + } + pos += ret; + written += ret; + ret = 0; + + balance_dirty_pages_ratelimited(mapping); + } while (iov_iter_count(iter)); + + bch2_pagecache_add_put(inode); + + return written ? written : ret; +} + +ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct bch_inode_info *inode = file_bch_inode(file); + ssize_t ret; + + if (iocb->ki_flags & IOCB_DIRECT) { + ret = bch2_direct_write(iocb, from); + goto out; + } + + inode_lock(&inode->v); + + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto unlock; + + ret = file_remove_privs(file); + if (ret) + goto unlock; + + ret = file_update_time(file); + if (ret) + goto unlock; + + ret = bch2_buffered_write(iocb, from); + if (likely(ret > 0)) + iocb->ki_pos += ret; +unlock: + inode_unlock(&inode->v); + + if (ret > 0) + ret = generic_write_sync(iocb, ret); +out: + return bch2_err_class(ret); +} + +void bch2_fs_fs_io_buffered_exit(struct bch_fs *c) +{ + bioset_exit(&c->writepage_bioset); +} + +int bch2_fs_fs_io_buffered_init(struct bch_fs *c) +{ + if (bioset_init(&c->writepage_bioset, + 4, offsetof(struct bch_writepage_io, op.wbio.bio), + BIOSET_NEED_BVECS)) + return -BCH_ERR_ENOMEM_writepage_bioset_init; + + return 0; +} + +#endif /* NO_BCACHEFS_FS */ diff --git a/fs/bcachefs/fs-io-buffered.h b/fs/bcachefs/fs-io-buffered.h new file mode 100644 index 000000000000..a6126ff790e6 --- /dev/null +++ b/fs/bcachefs/fs-io-buffered.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_FS_IO_BUFFERED_H +#define _BCACHEFS_FS_IO_BUFFERED_H + +#ifndef NO_BCACHEFS_FS + +int bch2_read_single_folio(struct folio *, struct address_space *); +int bch2_read_folio(struct file *, struct folio *); + +int bch2_writepages(struct address_space *, struct writeback_control *); +void bch2_readahead(struct readahead_control *); + +int bch2_write_begin(struct file *, struct address_space *, loff_t, + unsigned, struct page **, void **); +int bch2_write_end(struct file *, struct address_space *, loff_t, + unsigned, unsigned, struct page *, void *); + +ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *); + +void bch2_fs_fs_io_buffered_exit(struct bch_fs *); +int bch2_fs_fs_io_buffered_init(struct bch_fs *); +#else +static inline void bch2_fs_fs_io_buffered_exit(struct bch_fs *c) {} +static inline int bch2_fs_fs_io_buffered_init(struct bch_fs *c) { return 0; } +#endif + +#endif /* _BCACHEFS_FS_IO_BUFFERED_H */ diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c new file mode 100644 index 000000000000..2b29abd24d56 --- /dev/null +++ b/fs/bcachefs/fs-io-direct.c @@ -0,0 +1,679 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef NO_BCACHEFS_FS + +#include "bcachefs.h" +#include "alloc_foreground.h" +#include "fs.h" +#include "fs-io.h" +#include "fs-io-direct.h" +#include "fs-io-pagecache.h" +#include "io.h" + +#include +#include +#include + +/* O_DIRECT reads */ + +struct dio_read { + struct closure cl; + struct kiocb *req; + long ret; + bool should_dirty; + struct bch_read_bio rbio; +}; + +static void bio_check_or_release(struct bio *bio, bool check_dirty) +{ + if (check_dirty) { + bio_check_pages_dirty(bio); + } else { + bio_release_pages(bio, false); + bio_put(bio); + } +} + +static void bch2_dio_read_complete(struct closure *cl) +{ + struct dio_read *dio = container_of(cl, struct dio_read, cl); + + dio->req->ki_complete(dio->req, dio->ret); + bio_check_or_release(&dio->rbio.bio, dio->should_dirty); +} + +static void bch2_direct_IO_read_endio(struct bio *bio) +{ + struct dio_read *dio = bio->bi_private; + + if (bio->bi_status) + dio->ret = blk_status_to_errno(bio->bi_status); + + closure_put(&dio->cl); +} + +static void bch2_direct_IO_read_split_endio(struct bio *bio) +{ + struct dio_read *dio = bio->bi_private; + bool should_dirty = dio->should_dirty; + + bch2_direct_IO_read_endio(bio); + bio_check_or_release(bio, should_dirty); +} + +static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) +{ + struct file *file = req->ki_filp; + struct bch_inode_info *inode = file_bch_inode(file); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_io_opts opts; + struct dio_read *dio; + struct bio *bio; + loff_t offset = req->ki_pos; + bool sync = is_sync_kiocb(req); + size_t shorten; + ssize_t ret; + + bch2_inode_opts_get(&opts, c, &inode->ei_inode); + + if ((offset|iter->count) & (block_bytes(c) - 1)) + return -EINVAL; + + ret = min_t(loff_t, iter->count, + max_t(loff_t, 0, i_size_read(&inode->v) - offset)); + + if (!ret) + return ret; + + shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c)); + iter->count -= shorten; + + bio = bio_alloc_bioset(NULL, + bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), + REQ_OP_READ, + GFP_KERNEL, + &c->dio_read_bioset); + + bio->bi_end_io = bch2_direct_IO_read_endio; + + dio = container_of(bio, struct dio_read, rbio.bio); + closure_init(&dio->cl, NULL); + + /* + * this is a _really_ horrible hack just to avoid an atomic sub at the + * end: + */ + if (!sync) { + set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL); + atomic_set(&dio->cl.remaining, + CLOSURE_REMAINING_INITIALIZER - + CLOSURE_RUNNING + + CLOSURE_DESTRUCTOR); + } else { + atomic_set(&dio->cl.remaining, + CLOSURE_REMAINING_INITIALIZER + 1); + } + + dio->req = req; + dio->ret = ret; + /* + * This is one of the sketchier things I've encountered: we have to skip + * the dirtying of requests that are internal from the kernel (i.e. from + * loopback), because we'll deadlock on page_lock. + */ + dio->should_dirty = iter_is_iovec(iter); + + goto start; + while (iter->count) { + bio = bio_alloc_bioset(NULL, + bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), + REQ_OP_READ, + GFP_KERNEL, + &c->bio_read); + bio->bi_end_io = bch2_direct_IO_read_split_endio; +start: + bio->bi_opf = REQ_OP_READ|REQ_SYNC; + bio->bi_iter.bi_sector = offset >> 9; + bio->bi_private = dio; + + ret = bio_iov_iter_get_pages(bio, iter); + if (ret < 0) { + /* XXX: fault inject this path */ + bio->bi_status = BLK_STS_RESOURCE; + bio_endio(bio); + break; + } + + offset += bio->bi_iter.bi_size; + + if (dio->should_dirty) + bio_set_pages_dirty(bio); + + if (iter->count) + closure_get(&dio->cl); + + bch2_read(c, rbio_init(bio, opts), inode_inum(inode)); + } + + iter->count += shorten; + + if (sync) { + closure_sync(&dio->cl); + closure_debug_destroy(&dio->cl); + ret = dio->ret; + bio_check_or_release(&dio->rbio.bio, dio->should_dirty); + return ret; + } else { + return -EIOCBQUEUED; + } +} + +ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + struct file *file = iocb->ki_filp; + struct bch_inode_info *inode = file_bch_inode(file); + struct address_space *mapping = file->f_mapping; + size_t count = iov_iter_count(iter); + ssize_t ret; + + if (!count) + return 0; /* skip atime */ + + if (iocb->ki_flags & IOCB_DIRECT) { + struct blk_plug plug; + + if (unlikely(mapping->nrpages)) { + ret = filemap_write_and_wait_range(mapping, + iocb->ki_pos, + iocb->ki_pos + count - 1); + if (ret < 0) + goto out; + } + + file_accessed(file); + + blk_start_plug(&plug); + ret = bch2_direct_IO_read(iocb, iter); + blk_finish_plug(&plug); + + if (ret >= 0) + iocb->ki_pos += ret; + } else { + bch2_pagecache_add_get(inode); + ret = generic_file_read_iter(iocb, iter); + bch2_pagecache_add_put(inode); + } +out: + return bch2_err_class(ret); +} + +/* O_DIRECT writes */ + +struct dio_write { + struct kiocb *req; + struct address_space *mapping; + struct bch_inode_info *inode; + struct mm_struct *mm; + unsigned loop:1, + extending:1, + sync:1, + flush:1, + free_iov:1; + struct quota_res quota_res; + u64 written; + + struct iov_iter iter; + struct iovec inline_vecs[2]; + + /* must be last: */ + struct bch_write_op op; +}; + +static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, + u64 offset, u64 size, + unsigned nr_replicas, bool compressed) +{ + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + u64 end = offset + size; + u32 snapshot; + bool ret = true; + int err; + + bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); + + err = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + if (err) + goto err; + + for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, + SPOS(inum.inum, offset, snapshot), + BTREE_ITER_SLOTS, k, err) { + if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end))) + break; + + if (k.k->p.snapshot != snapshot || + nr_replicas > bch2_bkey_replicas(c, k) || + (!compressed && bch2_bkey_sectors_compressed(k))) { + ret = false; + break; + } + } + + offset = iter.pos.offset; + bch2_trans_iter_exit(&trans, &iter); +err: + if (bch2_err_matches(err, BCH_ERR_transaction_restart)) + goto retry; + bch2_trans_exit(&trans); + + return err ? false : ret; +} + +static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio) +{ + struct bch_fs *c = dio->op.c; + struct bch_inode_info *inode = dio->inode; + struct bio *bio = &dio->op.wbio.bio; + + return bch2_check_range_allocated(c, inode_inum(inode), + dio->op.pos.offset, bio_sectors(bio), + dio->op.opts.data_replicas, + dio->op.opts.compression != 0); +} + +static void bch2_dio_write_loop_async(struct bch_write_op *); +static __always_inline long bch2_dio_write_done(struct dio_write *dio); + +/* + * We're going to return -EIOCBQUEUED, but we haven't finished consuming the + * iov_iter yet, so we need to stash a copy of the iovec: it might be on the + * caller's stack, we're not guaranteed that it will live for the duration of + * the IO: + */ +static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) +{ + struct iovec *iov = dio->inline_vecs; + + /* + * iov_iter has a single embedded iovec - nothing to do: + */ + if (iter_is_ubuf(&dio->iter)) + return 0; + + /* + * We don't currently handle non-iovec iov_iters here - return an error, + * and we'll fall back to doing the IO synchronously: + */ + if (!iter_is_iovec(&dio->iter)) + return -1; + + if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { + iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov), + GFP_KERNEL); + if (unlikely(!iov)) + return -ENOMEM; + + dio->free_iov = true; + } + + memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov)); + dio->iter.__iov = iov; + return 0; +} + +static void bch2_dio_write_flush_done(struct closure *cl) +{ + struct dio_write *dio = container_of(cl, struct dio_write, op.cl); + struct bch_fs *c = dio->op.c; + + closure_debug_destroy(cl); + + dio->op.error = bch2_journal_error(&c->journal); + + bch2_dio_write_done(dio); +} + +static noinline void bch2_dio_write_flush(struct dio_write *dio) +{ + struct bch_fs *c = dio->op.c; + struct bch_inode_unpacked inode; + int ret; + + dio->flush = 0; + + closure_init(&dio->op.cl, NULL); + + if (!dio->op.error) { + ret = bch2_inode_find_by_inum(c, inode_inum(dio->inode), &inode); + if (ret) { + dio->op.error = ret; + } else { + bch2_journal_flush_seq_async(&c->journal, inode.bi_journal_seq, + &dio->op.cl); + bch2_inode_flush_nocow_writes_async(c, dio->inode, &dio->op.cl); + } + } + + if (dio->sync) { + closure_sync(&dio->op.cl); + closure_debug_destroy(&dio->op.cl); + } else { + continue_at(&dio->op.cl, bch2_dio_write_flush_done, NULL); + } +} + +static __always_inline long bch2_dio_write_done(struct dio_write *dio) +{ + struct kiocb *req = dio->req; + struct bch_inode_info *inode = dio->inode; + bool sync = dio->sync; + long ret; + + if (unlikely(dio->flush)) { + bch2_dio_write_flush(dio); + if (!sync) + return -EIOCBQUEUED; + } + + bch2_pagecache_block_put(inode); + + if (dio->free_iov) + kfree(dio->iter.__iov); + + ret = dio->op.error ?: ((long) dio->written << 9); + bio_put(&dio->op.wbio.bio); + + /* inode->i_dio_count is our ref on inode and thus bch_fs */ + inode_dio_end(&inode->v); + + if (ret < 0) + ret = bch2_err_class(ret); + + if (!sync) { + req->ki_complete(req, ret); + ret = -EIOCBQUEUED; + } + return ret; +} + +static __always_inline void bch2_dio_write_end(struct dio_write *dio) +{ + struct bch_fs *c = dio->op.c; + struct kiocb *req = dio->req; + struct bch_inode_info *inode = dio->inode; + struct bio *bio = &dio->op.wbio.bio; + + req->ki_pos += (u64) dio->op.written << 9; + dio->written += dio->op.written; + + if (dio->extending) { + spin_lock(&inode->v.i_lock); + if (req->ki_pos > inode->v.i_size) + i_size_write(&inode->v, req->ki_pos); + spin_unlock(&inode->v.i_lock); + } + + if (dio->op.i_sectors_delta || dio->quota_res.sectors) { + mutex_lock(&inode->ei_quota_lock); + __bch2_i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta); + __bch2_quota_reservation_put(c, inode, &dio->quota_res); + mutex_unlock(&inode->ei_quota_lock); + } + + bio_release_pages(bio, false); + + if (unlikely(dio->op.error)) + set_bit(EI_INODE_ERROR, &inode->ei_flags); +} + +static __always_inline long bch2_dio_write_loop(struct dio_write *dio) +{ + struct bch_fs *c = dio->op.c; + struct kiocb *req = dio->req; + struct address_space *mapping = dio->mapping; + struct bch_inode_info *inode = dio->inode; + struct bch_io_opts opts; + struct bio *bio = &dio->op.wbio.bio; + unsigned unaligned, iter_count; + bool sync = dio->sync, dropped_locks; + long ret; + + bch2_inode_opts_get(&opts, c, &inode->ei_inode); + + while (1) { + iter_count = dio->iter.count; + + EBUG_ON(current->faults_disabled_mapping); + current->faults_disabled_mapping = mapping; + + ret = bio_iov_iter_get_pages(bio, &dio->iter); + + dropped_locks = fdm_dropped_locks(); + + current->faults_disabled_mapping = NULL; + + /* + * If the fault handler returned an error but also signalled + * that it dropped & retook ei_pagecache_lock, we just need to + * re-shoot down the page cache and retry: + */ + if (dropped_locks && ret) + ret = 0; + + if (unlikely(ret < 0)) + goto err; + + if (unlikely(dropped_locks)) { + ret = bch2_write_invalidate_inode_pages_range(mapping, + req->ki_pos, + req->ki_pos + iter_count - 1); + if (unlikely(ret)) + goto err; + + if (!bio->bi_iter.bi_size) + continue; + } + + unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1); + bio->bi_iter.bi_size -= unaligned; + iov_iter_revert(&dio->iter, unaligned); + + if (!bio->bi_iter.bi_size) { + /* + * bio_iov_iter_get_pages was only able to get < + * blocksize worth of pages: + */ + ret = -EFAULT; + goto err; + } + + bch2_write_op_init(&dio->op, c, opts); + dio->op.end_io = sync + ? NULL + : bch2_dio_write_loop_async; + dio->op.target = dio->op.opts.foreground_target; + dio->op.write_point = writepoint_hashed((unsigned long) current); + dio->op.nr_replicas = dio->op.opts.data_replicas; + dio->op.subvol = inode->ei_subvol; + dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9); + dio->op.devs_need_flush = &inode->ei_devs_need_flush; + + if (sync) + dio->op.flags |= BCH_WRITE_SYNC; + dio->op.flags |= BCH_WRITE_CHECK_ENOSPC; + + ret = bch2_quota_reservation_add(c, inode, &dio->quota_res, + bio_sectors(bio), true); + if (unlikely(ret)) + goto err; + + ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio), + dio->op.opts.data_replicas, 0); + if (unlikely(ret) && + !bch2_dio_write_check_allocated(dio)) + goto err; + + task_io_account_write(bio->bi_iter.bi_size); + + if (unlikely(dio->iter.count) && + !dio->sync && + !dio->loop && + bch2_dio_write_copy_iov(dio)) + dio->sync = sync = true; + + dio->loop = true; + closure_call(&dio->op.cl, bch2_write, NULL, NULL); + + if (!sync) + return -EIOCBQUEUED; + + bch2_dio_write_end(dio); + + if (likely(!dio->iter.count) || dio->op.error) + break; + + bio_reset(bio, NULL, REQ_OP_WRITE); + } +out: + return bch2_dio_write_done(dio); +err: + dio->op.error = ret; + + bio_release_pages(bio, false); + + bch2_quota_reservation_put(c, inode, &dio->quota_res); + goto out; +} + +static noinline __cold void bch2_dio_write_continue(struct dio_write *dio) +{ + struct mm_struct *mm = dio->mm; + + bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE); + + if (mm) + kthread_use_mm(mm); + bch2_dio_write_loop(dio); + if (mm) + kthread_unuse_mm(mm); +} + +static void bch2_dio_write_loop_async(struct bch_write_op *op) +{ + struct dio_write *dio = container_of(op, struct dio_write, op); + + bch2_dio_write_end(dio); + + if (likely(!dio->iter.count) || dio->op.error) + bch2_dio_write_done(dio); + else + bch2_dio_write_continue(dio); +} + +ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) +{ + struct file *file = req->ki_filp; + struct address_space *mapping = file->f_mapping; + struct bch_inode_info *inode = file_bch_inode(file); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct dio_write *dio; + struct bio *bio; + bool locked = true, extending; + ssize_t ret; + + prefetch(&c->opts); + prefetch((void *) &c->opts + 64); + prefetch(&inode->ei_inode); + prefetch((void *) &inode->ei_inode + 64); + + inode_lock(&inode->v); + + ret = generic_write_checks(req, iter); + if (unlikely(ret <= 0)) + goto err; + + ret = file_remove_privs(file); + if (unlikely(ret)) + goto err; + + ret = file_update_time(file); + if (unlikely(ret)) + goto err; + + if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) + goto err; + + inode_dio_begin(&inode->v); + bch2_pagecache_block_get(inode); + + extending = req->ki_pos + iter->count > inode->v.i_size; + if (!extending) { + inode_unlock(&inode->v); + locked = false; + } + + bio = bio_alloc_bioset(NULL, + bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), + REQ_OP_WRITE, + GFP_KERNEL, + &c->dio_write_bioset); + dio = container_of(bio, struct dio_write, op.wbio.bio); + dio->req = req; + dio->mapping = mapping; + dio->inode = inode; + dio->mm = current->mm; + dio->loop = false; + dio->extending = extending; + dio->sync = is_sync_kiocb(req) || extending; + dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled; + dio->free_iov = false; + dio->quota_res.sectors = 0; + dio->written = 0; + dio->iter = *iter; + dio->op.c = c; + + if (unlikely(mapping->nrpages)) { + ret = bch2_write_invalidate_inode_pages_range(mapping, + req->ki_pos, + req->ki_pos + iter->count - 1); + if (unlikely(ret)) + goto err_put_bio; + } + + ret = bch2_dio_write_loop(dio); +err: + if (locked) + inode_unlock(&inode->v); + return ret; +err_put_bio: + bch2_pagecache_block_put(inode); + bio_put(bio); + inode_dio_end(&inode->v); + goto err; +} + +void bch2_fs_fs_io_direct_exit(struct bch_fs *c) +{ + bioset_exit(&c->dio_write_bioset); + bioset_exit(&c->dio_read_bioset); +} + +int bch2_fs_fs_io_direct_init(struct bch_fs *c) +{ + if (bioset_init(&c->dio_read_bioset, + 4, offsetof(struct dio_read, rbio.bio), + BIOSET_NEED_BVECS)) + return -BCH_ERR_ENOMEM_dio_read_bioset_init; + + if (bioset_init(&c->dio_write_bioset, + 4, offsetof(struct dio_write, op.wbio.bio), + BIOSET_NEED_BVECS)) + return -BCH_ERR_ENOMEM_dio_write_bioset_init; + + return 0; +} + +#endif /* NO_BCACHEFS_FS */ diff --git a/fs/bcachefs/fs-io-direct.h b/fs/bcachefs/fs-io-direct.h new file mode 100644 index 000000000000..814621ec7f81 --- /dev/null +++ b/fs/bcachefs/fs-io-direct.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_FS_IO_DIRECT_H +#define _BCACHEFS_FS_IO_DIRECT_H + +#ifndef NO_BCACHEFS_FS +ssize_t bch2_direct_write(struct kiocb *, struct iov_iter *); +ssize_t bch2_read_iter(struct kiocb *, struct iov_iter *); + +void bch2_fs_fs_io_direct_exit(struct bch_fs *); +int bch2_fs_fs_io_direct_init(struct bch_fs *); +#else +static inline void bch2_fs_fs_io_direct_exit(struct bch_fs *c) {} +static inline int bch2_fs_fs_io_direct_init(struct bch_fs *c) { return 0; } +#endif + +#endif /* _BCACHEFS_FS_IO_DIRECT_H */ diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c new file mode 100644 index 000000000000..2c1ef13d9bcd --- /dev/null +++ b/fs/bcachefs/fs-io-pagecache.c @@ -0,0 +1,780 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef NO_BCACHEFS_FS + +#include "bcachefs.h" +#include "btree_iter.h" +#include "extents.h" +#include "fs-io.h" +#include "fs-io-pagecache.h" +#include "subvolume.h" + +#include +#include + +int bch2_filemap_get_contig_folios_d(struct address_space *mapping, + loff_t start, u64 end, + int fgp_flags, gfp_t gfp, + folios *folios) +{ + struct folio *f; + u64 pos = start; + int ret = 0; + + while (pos < end) { + if ((u64) pos >= (u64) start + (1ULL << 20)) + fgp_flags &= ~FGP_CREAT; + + ret = darray_make_room_gfp(folios, 1, gfp & GFP_KERNEL); + if (ret) + break; + + f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp); + if (IS_ERR_OR_NULL(f)) + break; + + BUG_ON(folios->nr && folio_pos(f) != pos); + + pos = folio_end_pos(f); + darray_push(folios, f); + } + + if (!folios->nr && !ret && (fgp_flags & FGP_CREAT)) + ret = -ENOMEM; + + return folios->nr ? 0 : ret; +} + +/* pagecache_block must be held */ +int bch2_write_invalidate_inode_pages_range(struct address_space *mapping, + loff_t start, loff_t end) +{ + int ret; + + /* + * XXX: the way this is currently implemented, we can spin if a process + * is continually redirtying a specific page + */ + do { + if (!mapping->nrpages) + return 0; + + ret = filemap_write_and_wait_range(mapping, start, end); + if (ret) + break; + + if (!mapping->nrpages) + return 0; + + ret = invalidate_inode_pages2_range(mapping, + start >> PAGE_SHIFT, + end >> PAGE_SHIFT); + } while (ret == -EBUSY); + + return ret; +} + +static const char * const bch2_folio_sector_states[] = { +#define x(n) #n, + BCH_FOLIO_SECTOR_STATE() +#undef x + NULL +}; + +static inline enum bch_folio_sector_state +folio_sector_dirty(enum bch_folio_sector_state state) +{ + switch (state) { + case SECTOR_unallocated: + return SECTOR_dirty; + case SECTOR_reserved: + return SECTOR_dirty_reserved; + default: + return state; + } +} + +static inline enum bch_folio_sector_state +folio_sector_undirty(enum bch_folio_sector_state state) +{ + switch (state) { + case SECTOR_dirty: + return SECTOR_unallocated; + case SECTOR_dirty_reserved: + return SECTOR_reserved; + default: + return state; + } +} + +static inline enum bch_folio_sector_state +folio_sector_reserve(enum bch_folio_sector_state state) +{ + switch (state) { + case SECTOR_unallocated: + return SECTOR_reserved; + case SECTOR_dirty: + return SECTOR_dirty_reserved; + default: + return state; + } +} + +/* for newly allocated folios: */ +struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp) +{ + struct bch_folio *s; + + s = kzalloc(sizeof(*s) + + sizeof(struct bch_folio_sector) * + folio_sectors(folio), gfp); + if (!s) + return NULL; + + spin_lock_init(&s->lock); + folio_attach_private(folio, s); + return s; +} + +struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp) +{ + return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp); +} + +static unsigned bkey_to_sector_state(struct bkey_s_c k) +{ + if (bkey_extent_is_reservation(k)) + return SECTOR_reserved; + if (bkey_extent_is_allocation(k.k)) + return SECTOR_allocated; + return SECTOR_unallocated; +} + +static void __bch2_folio_set(struct folio *folio, + unsigned pg_offset, unsigned pg_len, + unsigned nr_ptrs, unsigned state) +{ + struct bch_folio *s = bch2_folio(folio); + unsigned i, sectors = folio_sectors(folio); + + BUG_ON(pg_offset >= sectors); + BUG_ON(pg_offset + pg_len > sectors); + + spin_lock(&s->lock); + + for (i = pg_offset; i < pg_offset + pg_len; i++) { + s->s[i].nr_replicas = nr_ptrs; + bch2_folio_sector_set(folio, s, i, state); + } + + if (i == sectors) + s->uptodate = true; + + spin_unlock(&s->lock); +} + +/* + * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the + * extents btree: + */ +int bch2_folio_set(struct bch_fs *c, subvol_inum inum, + struct folio **folios, unsigned nr_folios) +{ + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + struct bch_folio *s; + u64 offset = folio_sector(folios[0]); + unsigned folio_idx; + u32 snapshot; + bool need_set = false; + int ret; + + for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) { + s = bch2_folio_create(folios[folio_idx], GFP_KERNEL); + if (!s) + return -ENOMEM; + + need_set |= !s->uptodate; + } + + if (!need_set) + return 0; + + folio_idx = 0; + bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); + + ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + if (ret) + goto err; + + for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, + SPOS(inum.inum, offset, snapshot), + BTREE_ITER_SLOTS, k, ret) { + unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k); + unsigned state = bkey_to_sector_state(k); + + while (folio_idx < nr_folios) { + struct folio *folio = folios[folio_idx]; + u64 folio_start = folio_sector(folio); + u64 folio_end = folio_end_sector(folio); + unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) - + folio_start; + unsigned folio_len = min(k.k->p.offset, folio_end) - + folio_offset - folio_start; + + BUG_ON(k.k->p.offset < folio_start); + BUG_ON(bkey_start_offset(k.k) > folio_end); + + if (!bch2_folio(folio)->uptodate) + __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state); + + if (k.k->p.offset < folio_end) + break; + folio_idx++; + } + + if (folio_idx == nr_folios) + break; + } + + offset = iter.pos.offset; + bch2_trans_iter_exit(&trans, &iter); +err: + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto retry; + bch2_trans_exit(&trans); + + return ret; +} + +void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k) +{ + struct bvec_iter iter; + struct folio_vec fv; + unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v + ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k); + unsigned state = bkey_to_sector_state(k); + + bio_for_each_folio(fv, bio, iter) + __bch2_folio_set(fv.fv_folio, + fv.fv_offset >> 9, + fv.fv_len >> 9, + nr_ptrs, state); +} + +void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode, + u64 start, u64 end) +{ + pgoff_t index = start >> PAGE_SECTORS_SHIFT; + pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; + struct folio_batch fbatch; + unsigned i, j; + + if (end <= start) + return; + + folio_batch_init(&fbatch); + + while (filemap_get_folios(inode->v.i_mapping, + &index, end_index, &fbatch)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i]; + u64 folio_start = folio_sector(folio); + u64 folio_end = folio_end_sector(folio); + unsigned folio_offset = max(start, folio_start) - folio_start; + unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; + struct bch_folio *s; + + BUG_ON(end <= folio_start); + + folio_lock(folio); + s = bch2_folio(folio); + + if (s) { + spin_lock(&s->lock); + for (j = folio_offset; j < folio_offset + folio_len; j++) + s->s[j].nr_replicas = 0; + spin_unlock(&s->lock); + } + + folio_unlock(folio); + } + folio_batch_release(&fbatch); + cond_resched(); + } +} + +void bch2_mark_pagecache_reserved(struct bch_inode_info *inode, + u64 start, u64 end) +{ + struct bch_fs *c = inode->v.i_sb->s_fs_info; + pgoff_t index = start >> PAGE_SECTORS_SHIFT; + pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; + struct folio_batch fbatch; + s64 i_sectors_delta = 0; + unsigned i, j; + + if (end <= start) + return; + + folio_batch_init(&fbatch); + + while (filemap_get_folios(inode->v.i_mapping, + &index, end_index, &fbatch)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i]; + u64 folio_start = folio_sector(folio); + u64 folio_end = folio_end_sector(folio); + unsigned folio_offset = max(start, folio_start) - folio_start; + unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; + struct bch_folio *s; + + BUG_ON(end <= folio_start); + + folio_lock(folio); + s = bch2_folio(folio); + + if (s) { + spin_lock(&s->lock); + for (j = folio_offset; j < folio_offset + folio_len; j++) { + i_sectors_delta -= s->s[j].state == SECTOR_dirty; + bch2_folio_sector_set(folio, s, j, + folio_sector_reserve(s->s[j].state)); + } + spin_unlock(&s->lock); + } + + folio_unlock(folio); + } + folio_batch_release(&fbatch); + cond_resched(); + } + + bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); +} + +static inline unsigned sectors_to_reserve(struct bch_folio_sector *s, + unsigned nr_replicas) +{ + return max(0, (int) nr_replicas - + s->nr_replicas - + s->replicas_reserved); +} + +int bch2_get_folio_disk_reservation(struct bch_fs *c, + struct bch_inode_info *inode, + struct folio *folio, bool check_enospc) +{ + struct bch_folio *s = bch2_folio_create(folio, 0); + unsigned nr_replicas = inode_nr_replicas(c, inode); + struct disk_reservation disk_res = { 0 }; + unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0; + int ret; + + if (!s) + return -ENOMEM; + + for (i = 0; i < sectors; i++) + disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas); + + if (!disk_res_sectors) + return 0; + + ret = bch2_disk_reservation_get(c, &disk_res, + disk_res_sectors, 1, + !check_enospc + ? BCH_DISK_RESERVATION_NOFAIL + : 0); + if (unlikely(ret)) + return ret; + + for (i = 0; i < sectors; i++) + s->s[i].replicas_reserved += + sectors_to_reserve(&s->s[i], nr_replicas); + + return 0; +} + +void bch2_folio_reservation_put(struct bch_fs *c, + struct bch_inode_info *inode, + struct bch2_folio_reservation *res) +{ + bch2_disk_reservation_put(c, &res->disk); + bch2_quota_reservation_put(c, inode, &res->quota); +} + +int bch2_folio_reservation_get(struct bch_fs *c, + struct bch_inode_info *inode, + struct folio *folio, + struct bch2_folio_reservation *res, + unsigned offset, unsigned len) +{ + struct bch_folio *s = bch2_folio_create(folio, 0); + unsigned i, disk_sectors = 0, quota_sectors = 0; + int ret; + + if (!s) + return -ENOMEM; + + BUG_ON(!s->uptodate); + + for (i = round_down(offset, block_bytes(c)) >> 9; + i < round_up(offset + len, block_bytes(c)) >> 9; + i++) { + disk_sectors += sectors_to_reserve(&s->s[i], + res->disk.nr_replicas); + quota_sectors += s->s[i].state == SECTOR_unallocated; + } + + if (disk_sectors) { + ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0); + if (unlikely(ret)) + return ret; + } + + if (quota_sectors) { + ret = bch2_quota_reservation_add(c, inode, &res->quota, + quota_sectors, true); + if (unlikely(ret)) { + struct disk_reservation tmp = { + .sectors = disk_sectors + }; + + bch2_disk_reservation_put(c, &tmp); + res->disk.sectors -= disk_sectors; + return ret; + } + } + + return 0; +} + +static void bch2_clear_folio_bits(struct folio *folio) +{ + struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_folio *s = bch2_folio(folio); + struct disk_reservation disk_res = { 0 }; + int i, sectors = folio_sectors(folio), dirty_sectors = 0; + + if (!s) + return; + + EBUG_ON(!folio_test_locked(folio)); + EBUG_ON(folio_test_writeback(folio)); + + for (i = 0; i < sectors; i++) { + disk_res.sectors += s->s[i].replicas_reserved; + s->s[i].replicas_reserved = 0; + + dirty_sectors -= s->s[i].state == SECTOR_dirty; + bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state)); + } + + bch2_disk_reservation_put(c, &disk_res); + + bch2_i_sectors_acct(c, inode, NULL, dirty_sectors); + + bch2_folio_release(folio); +} + +void bch2_set_folio_dirty(struct bch_fs *c, + struct bch_inode_info *inode, + struct folio *folio, + struct bch2_folio_reservation *res, + unsigned offset, unsigned len) +{ + struct bch_folio *s = bch2_folio(folio); + unsigned i, dirty_sectors = 0; + + WARN_ON((u64) folio_pos(folio) + offset + len > + round_up((u64) i_size_read(&inode->v), block_bytes(c))); + + BUG_ON(!s->uptodate); + + spin_lock(&s->lock); + + for (i = round_down(offset, block_bytes(c)) >> 9; + i < round_up(offset + len, block_bytes(c)) >> 9; + i++) { + unsigned sectors = sectors_to_reserve(&s->s[i], + res->disk.nr_replicas); + + /* + * This can happen if we race with the error path in + * bch2_writepage_io_done(): + */ + sectors = min_t(unsigned, sectors, res->disk.sectors); + + s->s[i].replicas_reserved += sectors; + res->disk.sectors -= sectors; + + dirty_sectors += s->s[i].state == SECTOR_unallocated; + + bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state)); + } + + spin_unlock(&s->lock); + + bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors); + + if (!folio_test_dirty(folio)) + filemap_dirty_folio(inode->v.i_mapping, folio); +} + +vm_fault_t bch2_page_fault(struct vm_fault *vmf) +{ + struct file *file = vmf->vma->vm_file; + struct address_space *mapping = file->f_mapping; + struct address_space *fdm = faults_disabled_mapping(); + struct bch_inode_info *inode = file_bch_inode(file); + vm_fault_t ret; + + if (fdm == mapping) + return VM_FAULT_SIGBUS; + + /* Lock ordering: */ + if (fdm > mapping) { + struct bch_inode_info *fdm_host = to_bch_ei(fdm->host); + + if (bch2_pagecache_add_tryget(inode)) + goto got_lock; + + bch2_pagecache_block_put(fdm_host); + + bch2_pagecache_add_get(inode); + bch2_pagecache_add_put(inode); + + bch2_pagecache_block_get(fdm_host); + + /* Signal that lock has been dropped: */ + set_fdm_dropped_locks(); + return VM_FAULT_SIGBUS; + } + + bch2_pagecache_add_get(inode); +got_lock: + ret = filemap_fault(vmf); + bch2_pagecache_add_put(inode); + + return ret; +} + +vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) +{ + struct folio *folio = page_folio(vmf->page); + struct file *file = vmf->vma->vm_file; + struct bch_inode_info *inode = file_bch_inode(file); + struct address_space *mapping = file->f_mapping; + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch2_folio_reservation res; + unsigned len; + loff_t isize; + vm_fault_t ret; + + bch2_folio_reservation_init(c, inode, &res); + + sb_start_pagefault(inode->v.i_sb); + file_update_time(file); + + /* + * Not strictly necessary, but helps avoid dio writes livelocking in + * bch2_write_invalidate_inode_pages_range() - can drop this if/when we get + * a bch2_write_invalidate_inode_pages_range() that works without dropping + * page lock before invalidating page + */ + bch2_pagecache_add_get(inode); + + folio_lock(folio); + isize = i_size_read(&inode->v); + + if (folio->mapping != mapping || folio_pos(folio) >= isize) { + folio_unlock(folio); + ret = VM_FAULT_NOPAGE; + goto out; + } + + len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio)); + + if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?: + bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) { + folio_unlock(folio); + ret = VM_FAULT_SIGBUS; + goto out; + } + + bch2_set_folio_dirty(c, inode, folio, &res, 0, len); + bch2_folio_reservation_put(c, inode, &res); + + folio_wait_stable(folio); + ret = VM_FAULT_LOCKED; +out: + bch2_pagecache_add_put(inode); + sb_end_pagefault(inode->v.i_sb); + + return ret; +} + +void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length) +{ + if (offset || length < folio_size(folio)) + return; + + bch2_clear_folio_bits(folio); +} + +bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask) +{ + if (folio_test_dirty(folio) || folio_test_writeback(folio)) + return false; + + bch2_clear_folio_bits(folio); + return true; +} + +/* fseek: */ + +static int folio_data_offset(struct folio *folio, loff_t pos, + unsigned min_replicas) +{ + struct bch_folio *s = bch2_folio(folio); + unsigned i, sectors = folio_sectors(folio); + + if (s) + for (i = folio_pos_to_s(folio, pos); i < sectors; i++) + if (s->s[i].state >= SECTOR_dirty && + s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas) + return i << SECTOR_SHIFT; + + return -1; +} + +loff_t bch2_seek_pagecache_data(struct inode *vinode, + loff_t start_offset, + loff_t end_offset, + unsigned min_replicas, + bool nonblock) +{ + struct folio_batch fbatch; + pgoff_t start_index = start_offset >> PAGE_SHIFT; + pgoff_t end_index = end_offset >> PAGE_SHIFT; + pgoff_t index = start_index; + unsigned i; + loff_t ret; + int offset; + + folio_batch_init(&fbatch); + + while (filemap_get_folios(vinode->i_mapping, + &index, end_index, &fbatch)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i]; + + if (!nonblock) { + folio_lock(folio); + } else if (!folio_trylock(folio)) { + folio_batch_release(&fbatch); + return -EAGAIN; + } + + offset = folio_data_offset(folio, + max(folio_pos(folio), start_offset), + min_replicas); + if (offset >= 0) { + ret = clamp(folio_pos(folio) + offset, + start_offset, end_offset); + folio_unlock(folio); + folio_batch_release(&fbatch); + return ret; + } + folio_unlock(folio); + } + folio_batch_release(&fbatch); + cond_resched(); + } + + return end_offset; +} + +static int folio_hole_offset(struct address_space *mapping, loff_t *offset, + unsigned min_replicas, bool nonblock) +{ + struct folio *folio; + struct bch_folio *s; + unsigned i, sectors; + bool ret = true; + + folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT, + FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0); + if (folio == ERR_PTR(-EAGAIN)) + return -EAGAIN; + if (IS_ERR_OR_NULL(folio)) + return true; + + s = bch2_folio(folio); + if (!s) + goto unlock; + + sectors = folio_sectors(folio); + for (i = folio_pos_to_s(folio, *offset); i < sectors; i++) + if (s->s[i].state < SECTOR_dirty || + s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) { + *offset = max(*offset, + folio_pos(folio) + (i << SECTOR_SHIFT)); + goto unlock; + } + + *offset = folio_end_pos(folio); + ret = false; +unlock: + folio_unlock(folio); + folio_put(folio); + return ret; +} + +loff_t bch2_seek_pagecache_hole(struct inode *vinode, + loff_t start_offset, + loff_t end_offset, + unsigned min_replicas, + bool nonblock) +{ + struct address_space *mapping = vinode->i_mapping; + loff_t offset = start_offset; + + while (offset < end_offset && + !folio_hole_offset(mapping, &offset, min_replicas, nonblock)) + ; + + return min(offset, end_offset); +} + +int bch2_clamp_data_hole(struct inode *inode, + u64 *hole_start, + u64 *hole_end, + unsigned min_replicas, + bool nonblock) +{ + loff_t ret; + + ret = bch2_seek_pagecache_hole(inode, + *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9; + if (ret < 0) + return ret; + + *hole_start = ret; + + if (*hole_start == *hole_end) + return 0; + + ret = bch2_seek_pagecache_data(inode, + *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9; + if (ret < 0) + return ret; + + *hole_end = ret; + return 0; +} + +#endif /* NO_BCACHEFS_FS */ diff --git a/fs/bcachefs/fs-io-pagecache.h b/fs/bcachefs/fs-io-pagecache.h new file mode 100644 index 000000000000..a2222ad586e9 --- /dev/null +++ b/fs/bcachefs/fs-io-pagecache.h @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_FS_IO_PAGECACHE_H +#define _BCACHEFS_FS_IO_PAGECACHE_H + +#include + +typedef DARRAY(struct folio *) folios; + +int bch2_filemap_get_contig_folios_d(struct address_space *, loff_t, + u64, int, gfp_t, folios *); +int bch2_write_invalidate_inode_pages_range(struct address_space *, loff_t, loff_t); + +/* + * Use u64 for the end pos and sector helpers because if the folio covers the + * max supported range of the mapping, the start offset of the next folio + * overflows loff_t. This breaks much of the range based processing in the + * buffered write path. + */ +static inline u64 folio_end_pos(struct folio *folio) +{ + return folio_pos(folio) + folio_size(folio); +} + +static inline size_t folio_sectors(struct folio *folio) +{ + return PAGE_SECTORS << folio_order(folio); +} + +static inline loff_t folio_sector(struct folio *folio) +{ + return folio_pos(folio) >> 9; +} + +static inline u64 folio_end_sector(struct folio *folio) +{ + return folio_end_pos(folio) >> 9; +} + +#define BCH_FOLIO_SECTOR_STATE() \ + x(unallocated) \ + x(reserved) \ + x(dirty) \ + x(dirty_reserved) \ + x(allocated) + +enum bch_folio_sector_state { +#define x(n) SECTOR_##n, + BCH_FOLIO_SECTOR_STATE() +#undef x +}; + +struct bch_folio_sector { + /* Uncompressed, fully allocated replicas (or on disk reservation): */ + unsigned nr_replicas:4; + + /* Owns PAGE_SECTORS * replicas_reserved sized in memory reservation: */ + unsigned replicas_reserved:4; + + /* i_sectors: */ + enum bch_folio_sector_state state:8; +}; + +struct bch_folio { + spinlock_t lock; + atomic_t write_count; + /* + * Is the sector state up to date with the btree? + * (Not the data itself) + */ + bool uptodate; + struct bch_folio_sector s[]; +}; + +/* Helper for when we need to add debug instrumentation: */ +static inline void bch2_folio_sector_set(struct folio *folio, + struct bch_folio *s, + unsigned i, unsigned n) +{ + s->s[i].state = n; +} + +/* file offset (to folio offset) to bch_folio_sector index */ +static inline int folio_pos_to_s(struct folio *folio, loff_t pos) +{ + u64 f_offset = pos - folio_pos(folio); + + BUG_ON(pos < folio_pos(folio) || pos >= folio_end_pos(folio)); + return f_offset >> SECTOR_SHIFT; +} + +/* for newly allocated folios: */ +static inline void __bch2_folio_release(struct folio *folio) +{ + kfree(folio_detach_private(folio)); +} + +static inline void bch2_folio_release(struct folio *folio) +{ + EBUG_ON(!folio_test_locked(folio)); + __bch2_folio_release(folio); +} + +static inline struct bch_folio *__bch2_folio(struct folio *folio) +{ + return folio_has_private(folio) + ? (struct bch_folio *) folio_get_private(folio) + : NULL; +} + +static inline struct bch_folio *bch2_folio(struct folio *folio) +{ + EBUG_ON(!folio_test_locked(folio)); + + return __bch2_folio(folio); +} + +struct bch_folio *__bch2_folio_create(struct folio *, gfp_t); +struct bch_folio *bch2_folio_create(struct folio *, gfp_t); + +struct bch2_folio_reservation { + struct disk_reservation disk; + struct quota_res quota; +}; + +static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode) +{ + /* XXX: this should not be open coded */ + return inode->ei_inode.bi_data_replicas + ? inode->ei_inode.bi_data_replicas - 1 + : c->opts.data_replicas; +} + +static inline void bch2_folio_reservation_init(struct bch_fs *c, + struct bch_inode_info *inode, + struct bch2_folio_reservation *res) +{ + memset(res, 0, sizeof(*res)); + + res->disk.nr_replicas = inode_nr_replicas(c, inode); +} + +int bch2_folio_set(struct bch_fs *, subvol_inum, struct folio **, unsigned); +void bch2_bio_page_state_set(struct bio *, struct bkey_s_c); + +void bch2_mark_pagecache_unallocated(struct bch_inode_info *, u64, u64); +void bch2_mark_pagecache_reserved(struct bch_inode_info *, u64, u64); + +int bch2_get_folio_disk_reservation(struct bch_fs *, + struct bch_inode_info *, + struct folio *, bool); + +void bch2_folio_reservation_put(struct bch_fs *, + struct bch_inode_info *, + struct bch2_folio_reservation *); +int bch2_folio_reservation_get(struct bch_fs *, + struct bch_inode_info *, + struct folio *, + struct bch2_folio_reservation *, + unsigned, unsigned); + +void bch2_set_folio_dirty(struct bch_fs *, + struct bch_inode_info *, + struct folio *, + struct bch2_folio_reservation *, + unsigned, unsigned); + +vm_fault_t bch2_page_fault(struct vm_fault *); +vm_fault_t bch2_page_mkwrite(struct vm_fault *); +void bch2_invalidate_folio(struct folio *, size_t, size_t); +bool bch2_release_folio(struct folio *, gfp_t); + +loff_t bch2_seek_pagecache_data(struct inode *, loff_t, loff_t, unsigned, bool); +loff_t bch2_seek_pagecache_hole(struct inode *, loff_t, loff_t, unsigned, bool); +int bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned, bool); + +#endif /* _BCACHEFS_FS_IO_PAGECACHE_H */ diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 40bfd0b25d9d..11a4919f30cd 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -3,7 +3,6 @@ #include "bcachefs.h" #include "alloc_foreground.h" -#include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" #include "clock.h" @@ -12,6 +11,8 @@ #include "extent_update.h" #include "fs.h" #include "fs-io.h" +#include "fs-io-buffered.h" +#include "fs-io-pagecache.h" #include "fsck.h" #include "inode.h" #include "journal.h" @@ -31,2742 +32,135 @@ #include #include #include -#include #include -static int bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned, bool); - -struct folio_vec { - struct folio *fv_folio; - size_t fv_offset; - size_t fv_len; -}; - -static inline struct folio_vec biovec_to_foliovec(struct bio_vec bv) -{ - - struct folio *folio = page_folio(bv.bv_page); - size_t offset = (folio_page_idx(folio, bv.bv_page) << PAGE_SHIFT) + - bv.bv_offset; - size_t len = min_t(size_t, folio_size(folio) - offset, bv.bv_len); - - return (struct folio_vec) { - .fv_folio = folio, - .fv_offset = offset, - .fv_len = len, - }; -} - -static inline struct folio_vec bio_iter_iovec_folio(struct bio *bio, - struct bvec_iter iter) -{ - return biovec_to_foliovec(bio_iter_iovec(bio, iter)); -} - -#define __bio_for_each_folio(bvl, bio, iter, start) \ - for (iter = (start); \ - (iter).bi_size && \ - ((bvl = bio_iter_iovec_folio((bio), (iter))), 1); \ - bio_advance_iter_single((bio), &(iter), (bvl).fv_len)) - -/** - * bio_for_each_folio - iterate over folios within a bio - * - * Like other non-_all versions, this iterates over what bio->bi_iter currently - * points to. This version is for drivers, where the bio may have previously - * been split or cloned. - */ -#define bio_for_each_folio(bvl, bio, iter) \ - __bio_for_each_folio(bvl, bio, iter, (bio)->bi_iter) - -/* - * Use u64 for the end pos and sector helpers because if the folio covers the - * max supported range of the mapping, the start offset of the next folio - * overflows loff_t. This breaks much of the range based processing in the - * buffered write path. - */ -static inline u64 folio_end_pos(struct folio *folio) -{ - return folio_pos(folio) + folio_size(folio); -} - -static inline size_t folio_sectors(struct folio *folio) -{ - return PAGE_SECTORS << folio_order(folio); -} - -static inline loff_t folio_sector(struct folio *folio) -{ - return folio_pos(folio) >> 9; -} - -static inline u64 folio_end_sector(struct folio *folio) -{ - return folio_end_pos(folio) >> 9; -} - -typedef DARRAY(struct folio *) folios; - -static int filemap_get_contig_folios_d(struct address_space *mapping, - loff_t start, u64 end, - int fgp_flags, gfp_t gfp, - folios *folios) -{ - struct folio *f; - u64 pos = start; - int ret = 0; - - while (pos < end) { - if ((u64) pos >= (u64) start + (1ULL << 20)) - fgp_flags &= ~FGP_CREAT; - - ret = darray_make_room_gfp(folios, 1, gfp & GFP_KERNEL); - if (ret) - break; - - f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp); - if (IS_ERR_OR_NULL(f)) - break; - - BUG_ON(folios->nr && folio_pos(f) != pos); - - pos = folio_end_pos(f); - darray_push(folios, f); - } - - if (!folios->nr && !ret && (fgp_flags & FGP_CREAT)) - ret = -ENOMEM; - - return folios->nr ? 0 : ret; -} - -struct nocow_flush { - struct closure *cl; - struct bch_dev *ca; - struct bio bio; -}; - -static void nocow_flush_endio(struct bio *_bio) -{ - - struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio); - - closure_put(bio->cl); - percpu_ref_put(&bio->ca->io_ref); - bio_put(&bio->bio); -} - -static void bch2_inode_flush_nocow_writes_async(struct bch_fs *c, - struct bch_inode_info *inode, - struct closure *cl) -{ - struct nocow_flush *bio; - struct bch_dev *ca; - struct bch_devs_mask devs; - unsigned dev; - - dev = find_first_bit(inode->ei_devs_need_flush.d, BCH_SB_MEMBERS_MAX); - if (dev == BCH_SB_MEMBERS_MAX) - return; - - devs = inode->ei_devs_need_flush; - memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush)); - - for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) { - rcu_read_lock(); - ca = rcu_dereference(c->devs[dev]); - if (ca && !percpu_ref_tryget(&ca->io_ref)) - ca = NULL; - rcu_read_unlock(); - - if (!ca) - continue; - - bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 0, - REQ_OP_FLUSH, - GFP_KERNEL, - &c->nocow_flush_bioset), - struct nocow_flush, bio); - bio->cl = cl; - bio->ca = ca; - bio->bio.bi_end_io = nocow_flush_endio; - closure_bio_submit(&bio->bio, cl); - } -} - -static int bch2_inode_flush_nocow_writes(struct bch_fs *c, - struct bch_inode_info *inode) -{ - struct closure cl; - - closure_init_stack(&cl); - bch2_inode_flush_nocow_writes_async(c, inode, &cl); - closure_sync(&cl); - - return 0; -} - -static inline bool bio_full(struct bio *bio, unsigned len) -{ - if (bio->bi_vcnt >= bio->bi_max_vecs) - return true; - if (bio->bi_iter.bi_size > UINT_MAX - len) - return true; - return false; -} - -static inline struct address_space *faults_disabled_mapping(void) -{ - return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL); -} - -static inline void set_fdm_dropped_locks(void) -{ - current->faults_disabled_mapping = - (void *) (((unsigned long) current->faults_disabled_mapping)|1); -} - -static inline bool fdm_dropped_locks(void) -{ - return ((unsigned long) current->faults_disabled_mapping) & 1; -} - -struct quota_res { - u64 sectors; -}; - -struct bch_writepage_io { - struct bch_inode_info *inode; - - /* must be last: */ - struct bch_write_op op; -}; - -struct dio_write { - struct kiocb *req; - struct address_space *mapping; - struct bch_inode_info *inode; - struct mm_struct *mm; - unsigned loop:1, - extending:1, - sync:1, - flush:1, - free_iov:1; - struct quota_res quota_res; - u64 written; - - struct iov_iter iter; - struct iovec inline_vecs[2]; - - /* must be last: */ - struct bch_write_op op; -}; - -struct dio_read { - struct closure cl; - struct kiocb *req; - long ret; - bool should_dirty; - struct bch_read_bio rbio; -}; - -/* pagecache_block must be held */ -static noinline int write_invalidate_inode_pages_range(struct address_space *mapping, - loff_t start, loff_t end) -{ - int ret; - - /* - * XXX: the way this is currently implemented, we can spin if a process - * is continually redirtying a specific page - */ - do { - if (!mapping->nrpages) - return 0; - - ret = filemap_write_and_wait_range(mapping, start, end); - if (ret) - break; - - if (!mapping->nrpages) - return 0; - - ret = invalidate_inode_pages2_range(mapping, - start >> PAGE_SHIFT, - end >> PAGE_SHIFT); - } while (ret == -EBUSY); - - return ret; -} - -/* quotas */ - -#ifdef CONFIG_BCACHEFS_QUOTA - -static void __bch2_quota_reservation_put(struct bch_fs *c, - struct bch_inode_info *inode, - struct quota_res *res) -{ - BUG_ON(res->sectors > inode->ei_quota_reserved); - - bch2_quota_acct(c, inode->ei_qid, Q_SPC, - -((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC); - inode->ei_quota_reserved -= res->sectors; - res->sectors = 0; -} - -static void bch2_quota_reservation_put(struct bch_fs *c, - struct bch_inode_info *inode, - struct quota_res *res) -{ - if (res->sectors) { - mutex_lock(&inode->ei_quota_lock); - __bch2_quota_reservation_put(c, inode, res); - mutex_unlock(&inode->ei_quota_lock); - } -} - -static int bch2_quota_reservation_add(struct bch_fs *c, - struct bch_inode_info *inode, - struct quota_res *res, - u64 sectors, - bool check_enospc) -{ - int ret; - - if (test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags)) - return 0; - - mutex_lock(&inode->ei_quota_lock); - ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, - check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK); - if (likely(!ret)) { - inode->ei_quota_reserved += sectors; - res->sectors += sectors; - } - mutex_unlock(&inode->ei_quota_lock); - - return ret; -} - -#else - -static void __bch2_quota_reservation_put(struct bch_fs *c, - struct bch_inode_info *inode, - struct quota_res *res) {} - -static void bch2_quota_reservation_put(struct bch_fs *c, - struct bch_inode_info *inode, - struct quota_res *res) {} - -static int bch2_quota_reservation_add(struct bch_fs *c, - struct bch_inode_info *inode, - struct quota_res *res, - unsigned sectors, - bool check_enospc) -{ - return 0; -} - -#endif - -/* i_size updates: */ - -struct inode_new_size { - loff_t new_size; - u64 now; - unsigned fields; -}; - -static int inode_set_size(struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct inode_new_size *s = p; - - bi->bi_size = s->new_size; - if (s->fields & ATTR_ATIME) - bi->bi_atime = s->now; - if (s->fields & ATTR_MTIME) - bi->bi_mtime = s->now; - if (s->fields & ATTR_CTIME) - bi->bi_ctime = s->now; - - return 0; -} - -int __must_check bch2_write_inode_size(struct bch_fs *c, - struct bch_inode_info *inode, - loff_t new_size, unsigned fields) -{ - struct inode_new_size s = { - .new_size = new_size, - .now = bch2_current_time(c), - .fields = fields, - }; - - return bch2_write_inode(c, inode, inode_set_size, &s, fields); -} - -static void __i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, - struct quota_res *quota_res, s64 sectors) -{ - bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, - "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", - inode->v.i_ino, (u64) inode->v.i_blocks, sectors, - inode->ei_inode.bi_sectors); - inode->v.i_blocks += sectors; - -#ifdef CONFIG_BCACHEFS_QUOTA - if (quota_res && - !test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags) && - sectors > 0) { - BUG_ON(sectors > quota_res->sectors); - BUG_ON(sectors > inode->ei_quota_reserved); - - quota_res->sectors -= sectors; - inode->ei_quota_reserved -= sectors; - } else { - bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN); - } -#endif -} - -static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, - struct quota_res *quota_res, s64 sectors) -{ - if (sectors) { - mutex_lock(&inode->ei_quota_lock); - __i_sectors_acct(c, inode, quota_res, sectors); - mutex_unlock(&inode->ei_quota_lock); - } -} - -/* page state: */ - -/* stored in page->private: */ - -#define BCH_FOLIO_SECTOR_STATE() \ - x(unallocated) \ - x(reserved) \ - x(dirty) \ - x(dirty_reserved) \ - x(allocated) - -enum bch_folio_sector_state { -#define x(n) SECTOR_##n, - BCH_FOLIO_SECTOR_STATE() -#undef x -}; - -static const char * const bch2_folio_sector_states[] = { -#define x(n) #n, - BCH_FOLIO_SECTOR_STATE() -#undef x - NULL -}; - -static inline enum bch_folio_sector_state -folio_sector_dirty(enum bch_folio_sector_state state) -{ - switch (state) { - case SECTOR_unallocated: - return SECTOR_dirty; - case SECTOR_reserved: - return SECTOR_dirty_reserved; - default: - return state; - } -} - -static inline enum bch_folio_sector_state -folio_sector_undirty(enum bch_folio_sector_state state) -{ - switch (state) { - case SECTOR_dirty: - return SECTOR_unallocated; - case SECTOR_dirty_reserved: - return SECTOR_reserved; - default: - return state; - } -} - -static inline enum bch_folio_sector_state -folio_sector_reserve(enum bch_folio_sector_state state) -{ - switch (state) { - case SECTOR_unallocated: - return SECTOR_reserved; - case SECTOR_dirty: - return SECTOR_dirty_reserved; - default: - return state; - } -} - -struct bch_folio_sector { - /* Uncompressed, fully allocated replicas (or on disk reservation): */ - unsigned nr_replicas:4; - - /* Owns PAGE_SECTORS * replicas_reserved sized in memory reservation: */ - unsigned replicas_reserved:4; - - /* i_sectors: */ - enum bch_folio_sector_state state:8; -}; - -struct bch_folio { - spinlock_t lock; - atomic_t write_count; - /* - * Is the sector state up to date with the btree? - * (Not the data itself) - */ - bool uptodate; - struct bch_folio_sector s[]; -}; - -static inline void folio_sector_set(struct folio *folio, - struct bch_folio *s, - unsigned i, unsigned n) -{ - s->s[i].state = n; -} - -/* file offset (to folio offset) to bch_folio_sector index */ -static inline int folio_pos_to_s(struct folio *folio, loff_t pos) -{ - u64 f_offset = pos - folio_pos(folio); - BUG_ON(pos < folio_pos(folio) || pos >= folio_end_pos(folio)); - return f_offset >> SECTOR_SHIFT; -} - -static inline struct bch_folio *__bch2_folio(struct folio *folio) -{ - return folio_has_private(folio) - ? (struct bch_folio *) folio_get_private(folio) - : NULL; -} - -static inline struct bch_folio *bch2_folio(struct folio *folio) -{ - EBUG_ON(!folio_test_locked(folio)); - - return __bch2_folio(folio); -} - -/* for newly allocated folios: */ -static void __bch2_folio_release(struct folio *folio) -{ - kfree(folio_detach_private(folio)); -} - -static void bch2_folio_release(struct folio *folio) -{ - EBUG_ON(!folio_test_locked(folio)); - __bch2_folio_release(folio); -} - -/* for newly allocated folios: */ -static struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp) -{ - struct bch_folio *s; - - s = kzalloc(sizeof(*s) + - sizeof(struct bch_folio_sector) * - folio_sectors(folio), gfp); - if (!s) - return NULL; - - spin_lock_init(&s->lock); - folio_attach_private(folio, s); - return s; -} - -static struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp) -{ - return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp); -} - -static unsigned bkey_to_sector_state(struct bkey_s_c k) -{ - if (bkey_extent_is_reservation(k)) - return SECTOR_reserved; - if (bkey_extent_is_allocation(k.k)) - return SECTOR_allocated; - return SECTOR_unallocated; -} - -static void __bch2_folio_set(struct folio *folio, - unsigned pg_offset, unsigned pg_len, - unsigned nr_ptrs, unsigned state) -{ - struct bch_folio *s = bch2_folio(folio); - unsigned i, sectors = folio_sectors(folio); - - BUG_ON(pg_offset >= sectors); - BUG_ON(pg_offset + pg_len > sectors); - - spin_lock(&s->lock); - - for (i = pg_offset; i < pg_offset + pg_len; i++) { - s->s[i].nr_replicas = nr_ptrs; - folio_sector_set(folio, s, i, state); - } - - if (i == sectors) - s->uptodate = true; - - spin_unlock(&s->lock); -} - -/* - * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the - * extents btree: - */ -static int bch2_folio_set(struct bch_fs *c, subvol_inum inum, - struct folio **folios, unsigned nr_folios) -{ - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - struct bch_folio *s; - u64 offset = folio_sector(folios[0]); - unsigned folio_idx; - u32 snapshot; - bool need_set = false; - int ret; - - for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) { - s = bch2_folio_create(folios[folio_idx], GFP_KERNEL); - if (!s) - return -ENOMEM; - - need_set |= !s->uptodate; - } - - if (!need_set) - return 0; - - folio_idx = 0; - bch2_trans_init(&trans, c, 0, 0); -retry: - bch2_trans_begin(&trans); - - ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); - if (ret) - goto err; - - for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, - SPOS(inum.inum, offset, snapshot), - BTREE_ITER_SLOTS, k, ret) { - unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k); - unsigned state = bkey_to_sector_state(k); - - while (folio_idx < nr_folios) { - struct folio *folio = folios[folio_idx]; - u64 folio_start = folio_sector(folio); - u64 folio_end = folio_end_sector(folio); - unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) - folio_start; - unsigned folio_len = min(k.k->p.offset, folio_end) - folio_offset - folio_start; - - BUG_ON(k.k->p.offset < folio_start); - BUG_ON(bkey_start_offset(k.k) > folio_end); - - if (!bch2_folio(folio)->uptodate) - __bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state); - - if (k.k->p.offset < folio_end) - break; - folio_idx++; - } - - if (folio_idx == nr_folios) - break; - } - - offset = iter.pos.offset; - bch2_trans_iter_exit(&trans, &iter); -err: - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - bch2_trans_exit(&trans); - - return ret; -} - -static void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k) -{ - struct bvec_iter iter; - struct folio_vec fv; - unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v - ? 0 : bch2_bkey_nr_ptrs_fully_allocated(k); - unsigned state = bkey_to_sector_state(k); - - bio_for_each_folio(fv, bio, iter) - __bch2_folio_set(fv.fv_folio, - fv.fv_offset >> 9, - fv.fv_len >> 9, - nr_ptrs, state); -} - -static void mark_pagecache_unallocated(struct bch_inode_info *inode, - u64 start, u64 end) -{ - pgoff_t index = start >> PAGE_SECTORS_SHIFT; - pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; - struct folio_batch fbatch; - unsigned i, j; - - if (end <= start) - return; - - folio_batch_init(&fbatch); - - while (filemap_get_folios(inode->v.i_mapping, - &index, end_index, &fbatch)) { - for (i = 0; i < folio_batch_count(&fbatch); i++) { - struct folio *folio = fbatch.folios[i]; - u64 folio_start = folio_sector(folio); - u64 folio_end = folio_end_sector(folio); - unsigned folio_offset = max(start, folio_start) - folio_start; - unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; - struct bch_folio *s; - - BUG_ON(end <= folio_start); - - folio_lock(folio); - s = bch2_folio(folio); - - if (s) { - spin_lock(&s->lock); - for (j = folio_offset; j < folio_offset + folio_len; j++) - s->s[j].nr_replicas = 0; - spin_unlock(&s->lock); - } - - folio_unlock(folio); - } - folio_batch_release(&fbatch); - cond_resched(); - } -} - -static void mark_pagecache_reserved(struct bch_inode_info *inode, - u64 start, u64 end) -{ - struct bch_fs *c = inode->v.i_sb->s_fs_info; - pgoff_t index = start >> PAGE_SECTORS_SHIFT; - pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT; - struct folio_batch fbatch; - s64 i_sectors_delta = 0; - unsigned i, j; - - if (end <= start) - return; - - folio_batch_init(&fbatch); - - while (filemap_get_folios(inode->v.i_mapping, - &index, end_index, &fbatch)) { - for (i = 0; i < folio_batch_count(&fbatch); i++) { - struct folio *folio = fbatch.folios[i]; - u64 folio_start = folio_sector(folio); - u64 folio_end = folio_end_sector(folio); - unsigned folio_offset = max(start, folio_start) - folio_start; - unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; - struct bch_folio *s; - - BUG_ON(end <= folio_start); - - folio_lock(folio); - s = bch2_folio(folio); - - if (s) { - spin_lock(&s->lock); - for (j = folio_offset; j < folio_offset + folio_len; j++) { - i_sectors_delta -= s->s[j].state == SECTOR_dirty; - folio_sector_set(folio, s, j, folio_sector_reserve(s->s[j].state)); - } - spin_unlock(&s->lock); - } - - folio_unlock(folio); - } - folio_batch_release(&fbatch); - cond_resched(); - } - - i_sectors_acct(c, inode, NULL, i_sectors_delta); -} - -static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode) -{ - /* XXX: this should not be open coded */ - return inode->ei_inode.bi_data_replicas - ? inode->ei_inode.bi_data_replicas - 1 - : c->opts.data_replicas; -} - -static inline unsigned sectors_to_reserve(struct bch_folio_sector *s, - unsigned nr_replicas) -{ - return max(0, (int) nr_replicas - - s->nr_replicas - - s->replicas_reserved); -} - -static int bch2_get_folio_disk_reservation(struct bch_fs *c, - struct bch_inode_info *inode, - struct folio *folio, bool check_enospc) -{ - struct bch_folio *s = bch2_folio_create(folio, 0); - unsigned nr_replicas = inode_nr_replicas(c, inode); - struct disk_reservation disk_res = { 0 }; - unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0; - int ret; - - if (!s) - return -ENOMEM; - - for (i = 0; i < sectors; i++) - disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas); - - if (!disk_res_sectors) - return 0; - - ret = bch2_disk_reservation_get(c, &disk_res, - disk_res_sectors, 1, - !check_enospc - ? BCH_DISK_RESERVATION_NOFAIL - : 0); - if (unlikely(ret)) - return ret; - - for (i = 0; i < sectors; i++) - s->s[i].replicas_reserved += - sectors_to_reserve(&s->s[i], nr_replicas); - - return 0; -} - -struct bch2_folio_reservation { - struct disk_reservation disk; - struct quota_res quota; -}; - -static void bch2_folio_reservation_init(struct bch_fs *c, - struct bch_inode_info *inode, - struct bch2_folio_reservation *res) -{ - memset(res, 0, sizeof(*res)); - - res->disk.nr_replicas = inode_nr_replicas(c, inode); -} - -static void bch2_folio_reservation_put(struct bch_fs *c, - struct bch_inode_info *inode, - struct bch2_folio_reservation *res) -{ - bch2_disk_reservation_put(c, &res->disk); - bch2_quota_reservation_put(c, inode, &res->quota); -} - -static int bch2_folio_reservation_get(struct bch_fs *c, - struct bch_inode_info *inode, - struct folio *folio, - struct bch2_folio_reservation *res, - unsigned offset, unsigned len) -{ - struct bch_folio *s = bch2_folio_create(folio, 0); - unsigned i, disk_sectors = 0, quota_sectors = 0; - int ret; - - if (!s) - return -ENOMEM; - - BUG_ON(!s->uptodate); - - for (i = round_down(offset, block_bytes(c)) >> 9; - i < round_up(offset + len, block_bytes(c)) >> 9; - i++) { - disk_sectors += sectors_to_reserve(&s->s[i], - res->disk.nr_replicas); - quota_sectors += s->s[i].state == SECTOR_unallocated; - } - - if (disk_sectors) { - ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0); - if (unlikely(ret)) - return ret; - } - - if (quota_sectors) { - ret = bch2_quota_reservation_add(c, inode, &res->quota, - quota_sectors, true); - if (unlikely(ret)) { - struct disk_reservation tmp = { - .sectors = disk_sectors - }; - - bch2_disk_reservation_put(c, &tmp); - res->disk.sectors -= disk_sectors; - return ret; - } - } - - return 0; -} - -static void bch2_clear_folio_bits(struct folio *folio) -{ - struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_folio *s = bch2_folio(folio); - struct disk_reservation disk_res = { 0 }; - int i, sectors = folio_sectors(folio), dirty_sectors = 0; - - if (!s) - return; - - EBUG_ON(!folio_test_locked(folio)); - EBUG_ON(folio_test_writeback(folio)); - - for (i = 0; i < sectors; i++) { - disk_res.sectors += s->s[i].replicas_reserved; - s->s[i].replicas_reserved = 0; - - dirty_sectors -= s->s[i].state == SECTOR_dirty; - folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state)); - } - - bch2_disk_reservation_put(c, &disk_res); - - i_sectors_acct(c, inode, NULL, dirty_sectors); - - bch2_folio_release(folio); -} - -static void bch2_set_folio_dirty(struct bch_fs *c, - struct bch_inode_info *inode, - struct folio *folio, - struct bch2_folio_reservation *res, - unsigned offset, unsigned len) -{ - struct bch_folio *s = bch2_folio(folio); - unsigned i, dirty_sectors = 0; - - WARN_ON((u64) folio_pos(folio) + offset + len > - round_up((u64) i_size_read(&inode->v), block_bytes(c))); - - BUG_ON(!s->uptodate); - - spin_lock(&s->lock); - - for (i = round_down(offset, block_bytes(c)) >> 9; - i < round_up(offset + len, block_bytes(c)) >> 9; - i++) { - unsigned sectors = sectors_to_reserve(&s->s[i], - res->disk.nr_replicas); - - /* - * This can happen if we race with the error path in - * bch2_writepage_io_done(): - */ - sectors = min_t(unsigned, sectors, res->disk.sectors); - - s->s[i].replicas_reserved += sectors; - res->disk.sectors -= sectors; - - dirty_sectors += s->s[i].state == SECTOR_unallocated; - - folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state)); - } - - spin_unlock(&s->lock); - - i_sectors_acct(c, inode, &res->quota, dirty_sectors); - - if (!folio_test_dirty(folio)) - filemap_dirty_folio(inode->v.i_mapping, folio); -} - -vm_fault_t bch2_page_fault(struct vm_fault *vmf) -{ - struct file *file = vmf->vma->vm_file; - struct address_space *mapping = file->f_mapping; - struct address_space *fdm = faults_disabled_mapping(); - struct bch_inode_info *inode = file_bch_inode(file); - vm_fault_t ret; - - if (fdm == mapping) - return VM_FAULT_SIGBUS; - - /* Lock ordering: */ - if (fdm > mapping) { - struct bch_inode_info *fdm_host = to_bch_ei(fdm->host); - - if (bch2_pagecache_add_tryget(inode)) - goto got_lock; - - bch2_pagecache_block_put(fdm_host); - - bch2_pagecache_add_get(inode); - bch2_pagecache_add_put(inode); - - bch2_pagecache_block_get(fdm_host); - - /* Signal that lock has been dropped: */ - set_fdm_dropped_locks(); - return VM_FAULT_SIGBUS; - } - - bch2_pagecache_add_get(inode); -got_lock: - ret = filemap_fault(vmf); - bch2_pagecache_add_put(inode); - - return ret; -} - -vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) -{ - struct folio *folio = page_folio(vmf->page); - struct file *file = vmf->vma->vm_file; - struct bch_inode_info *inode = file_bch_inode(file); - struct address_space *mapping = file->f_mapping; - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch2_folio_reservation res; - unsigned len; - loff_t isize; - vm_fault_t ret; - - bch2_folio_reservation_init(c, inode, &res); - - sb_start_pagefault(inode->v.i_sb); - file_update_time(file); - - /* - * Not strictly necessary, but helps avoid dio writes livelocking in - * write_invalidate_inode_pages_range() - can drop this if/when we get - * a write_invalidate_inode_pages_range() that works without dropping - * page lock before invalidating page - */ - bch2_pagecache_add_get(inode); - - folio_lock(folio); - isize = i_size_read(&inode->v); - - if (folio->mapping != mapping || folio_pos(folio) >= isize) { - folio_unlock(folio); - ret = VM_FAULT_NOPAGE; - goto out; - } - - len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio)); - - if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?: - bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) { - folio_unlock(folio); - ret = VM_FAULT_SIGBUS; - goto out; - } - - bch2_set_folio_dirty(c, inode, folio, &res, 0, len); - bch2_folio_reservation_put(c, inode, &res); - - folio_wait_stable(folio); - ret = VM_FAULT_LOCKED; -out: - bch2_pagecache_add_put(inode); - sb_end_pagefault(inode->v.i_sb); - - return ret; -} - -void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length) -{ - if (offset || length < folio_size(folio)) - return; - - bch2_clear_folio_bits(folio); -} - -bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask) -{ - if (folio_test_dirty(folio) || folio_test_writeback(folio)) - return false; - - bch2_clear_folio_bits(folio); - return true; -} - -/* readpage(s): */ - -static void bch2_readpages_end_io(struct bio *bio) -{ - struct folio_iter fi; - - bio_for_each_folio_all(fi, bio) { - if (!bio->bi_status) { - folio_mark_uptodate(fi.folio); - } else { - folio_clear_uptodate(fi.folio); - folio_set_error(fi.folio); - } - folio_unlock(fi.folio); - } - - bio_put(bio); -} - -struct readpages_iter { - struct address_space *mapping; - unsigned idx; - folios folios; -}; - -static int readpages_iter_init(struct readpages_iter *iter, - struct readahead_control *ractl) -{ - struct folio **fi; - int ret; - - memset(iter, 0, sizeof(*iter)); - - iter->mapping = ractl->mapping; - - ret = filemap_get_contig_folios_d(iter->mapping, - ractl->_index << PAGE_SHIFT, - (ractl->_index + ractl->_nr_pages) << PAGE_SHIFT, - 0, mapping_gfp_mask(iter->mapping), - &iter->folios); - if (ret) - return ret; - - darray_for_each(iter->folios, fi) { - ractl->_nr_pages -= 1U << folio_order(*fi); - __bch2_folio_create(*fi, __GFP_NOFAIL|GFP_KERNEL); - folio_put(*fi); - folio_put(*fi); - } - - return 0; -} - -static inline struct folio *readpage_iter_peek(struct readpages_iter *iter) -{ - if (iter->idx >= iter->folios.nr) - return NULL; - return iter->folios.data[iter->idx]; -} - -static inline void readpage_iter_advance(struct readpages_iter *iter) -{ - iter->idx++; -} - -static bool extent_partial_reads_expensive(struct bkey_s_c k) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - struct bch_extent_crc_unpacked crc; - const union bch_extent_entry *i; - - bkey_for_each_crc(k.k, ptrs, crc, i) - if (crc.csum_type || crc.compression_type) - return true; - return false; -} - -static int readpage_bio_extend(struct btree_trans *trans, - struct readpages_iter *iter, - struct bio *bio, - unsigned sectors_this_extent, - bool get_more) -{ - /* Don't hold btree locks while allocating memory: */ - bch2_trans_unlock(trans); - - while (bio_sectors(bio) < sectors_this_extent && - bio->bi_vcnt < bio->bi_max_vecs) { - struct folio *folio = readpage_iter_peek(iter); - int ret; - - if (folio) { - readpage_iter_advance(iter); - } else { - pgoff_t folio_offset = bio_end_sector(bio) >> PAGE_SECTORS_SHIFT; - - if (!get_more) - break; - - folio = xa_load(&iter->mapping->i_pages, folio_offset); - if (folio && !xa_is_value(folio)) - break; - - folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), 0); - if (!folio) - break; - - if (!__bch2_folio_create(folio, GFP_KERNEL)) { - folio_put(folio); - break; - } - - ret = filemap_add_folio(iter->mapping, folio, folio_offset, GFP_KERNEL); - if (ret) { - __bch2_folio_release(folio); - folio_put(folio); - break; - } - - folio_put(folio); - } - - BUG_ON(folio_sector(folio) != bio_end_sector(bio)); - - BUG_ON(!bio_add_folio(bio, folio, folio_size(folio), 0)); - } - - return bch2_trans_relock(trans); -} - -static void bchfs_read(struct btree_trans *trans, - struct bch_read_bio *rbio, - subvol_inum inum, - struct readpages_iter *readpages_iter) -{ - struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_buf sk; - int flags = BCH_READ_RETRY_IF_STALE| - BCH_READ_MAY_PROMOTE; - u32 snapshot; - int ret = 0; - - rbio->c = c; - rbio->start_time = local_clock(); - rbio->subvol = inum.subvol; - - bch2_bkey_buf_init(&sk); -retry: - bch2_trans_begin(trans); - iter = (struct btree_iter) { NULL }; - - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); - if (ret) - goto err; - - bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, - SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot), - BTREE_ITER_SLOTS); - while (1) { - struct bkey_s_c k; - unsigned bytes, sectors, offset_into_extent; - enum btree_id data_btree = BTREE_ID_extents; - - /* - * read_extent -> io_time_reset may cause a transaction restart - * without returning an error, we need to check for that here: - */ - ret = bch2_trans_relock(trans); - if (ret) - break; - - bch2_btree_iter_set_pos(&iter, - POS(inum.inum, rbio->bio.bi_iter.bi_sector)); - - k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - break; - - offset_into_extent = iter.pos.offset - - bkey_start_offset(k.k); - sectors = k.k->size - offset_into_extent; - - bch2_bkey_buf_reassemble(&sk, c, k); - - ret = bch2_read_indirect_extent(trans, &data_btree, - &offset_into_extent, &sk); - if (ret) - break; - - k = bkey_i_to_s_c(sk.k); - - sectors = min(sectors, k.k->size - offset_into_extent); - - if (readpages_iter) { - ret = readpage_bio_extend(trans, readpages_iter, &rbio->bio, sectors, - extent_partial_reads_expensive(k)); - if (ret) - break; - } - - bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; - swap(rbio->bio.bi_iter.bi_size, bytes); - - if (rbio->bio.bi_iter.bi_size == bytes) - flags |= BCH_READ_LAST_FRAGMENT; - - bch2_bio_page_state_set(&rbio->bio, k); - - bch2_read_extent(trans, rbio, iter.pos, - data_btree, k, offset_into_extent, flags); - - if (flags & BCH_READ_LAST_FRAGMENT) - break; - - swap(rbio->bio.bi_iter.bi_size, bytes); - bio_advance(&rbio->bio, bytes); - - ret = btree_trans_too_many_iters(trans); - if (ret) - break; - } -err: - bch2_trans_iter_exit(trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - - if (ret) { - bch_err_inum_offset_ratelimited(c, - iter.pos.inode, - iter.pos.offset << 9, - "read error %i from btree lookup", ret); - rbio->bio.bi_status = BLK_STS_IOERR; - bio_endio(&rbio->bio); - } - - bch2_bkey_buf_exit(&sk, c); -} - -void bch2_readahead(struct readahead_control *ractl) -{ - struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_io_opts opts; - struct btree_trans trans; - struct folio *folio; - struct readpages_iter readpages_iter; - int ret; - - bch2_inode_opts_get(&opts, c, &inode->ei_inode); - - ret = readpages_iter_init(&readpages_iter, ractl); - BUG_ON(ret); - - bch2_trans_init(&trans, c, 0, 0); - - bch2_pagecache_add_get(inode); - - while ((folio = readpage_iter_peek(&readpages_iter))) { - unsigned n = min_t(unsigned, - readpages_iter.folios.nr - - readpages_iter.idx, - BIO_MAX_VECS); - struct bch_read_bio *rbio = - rbio_init(bio_alloc_bioset(NULL, n, REQ_OP_READ, - GFP_KERNEL, &c->bio_read), - opts); - - readpage_iter_advance(&readpages_iter); - - rbio->bio.bi_iter.bi_sector = folio_sector(folio); - rbio->bio.bi_end_io = bch2_readpages_end_io; - BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); - - bchfs_read(&trans, rbio, inode_inum(inode), - &readpages_iter); - bch2_trans_unlock(&trans); - } - - bch2_pagecache_add_put(inode); - - bch2_trans_exit(&trans); - darray_exit(&readpages_iter.folios); -} - -static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio, - subvol_inum inum, struct folio *folio) -{ - struct btree_trans trans; - - bch2_folio_create(folio, __GFP_NOFAIL); - - rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; - rbio->bio.bi_iter.bi_sector = folio_sector(folio); - BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); - - bch2_trans_init(&trans, c, 0, 0); - bchfs_read(&trans, rbio, inum, NULL); - bch2_trans_exit(&trans); -} - -static void bch2_read_single_folio_end_io(struct bio *bio) -{ - complete(bio->bi_private); -} - -static int bch2_read_single_folio(struct folio *folio, - struct address_space *mapping) -{ - struct bch_inode_info *inode = to_bch_ei(mapping->host); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_read_bio *rbio; - struct bch_io_opts opts; - int ret; - DECLARE_COMPLETION_ONSTACK(done); - - bch2_inode_opts_get(&opts, c, &inode->ei_inode); - - rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read), - opts); - rbio->bio.bi_private = &done; - rbio->bio.bi_end_io = bch2_read_single_folio_end_io; - - __bchfs_readfolio(c, rbio, inode_inum(inode), folio); - wait_for_completion(&done); - - ret = blk_status_to_errno(rbio->bio.bi_status); - bio_put(&rbio->bio); - - if (ret < 0) - return ret; - - folio_mark_uptodate(folio); - return 0; -} - -int bch2_read_folio(struct file *file, struct folio *folio) -{ - int ret; - - ret = bch2_read_single_folio(folio, folio->mapping); - folio_unlock(folio); - return bch2_err_class(ret); -} - -/* writepages: */ - -struct bch_writepage_state { - struct bch_writepage_io *io; - struct bch_io_opts opts; - struct bch_folio_sector *tmp; - unsigned tmp_sectors; -}; - -static inline struct bch_writepage_state bch_writepage_state_init(struct bch_fs *c, - struct bch_inode_info *inode) -{ - struct bch_writepage_state ret = { 0 }; - - bch2_inode_opts_get(&ret.opts, c, &inode->ei_inode); - return ret; -} - -static void bch2_writepage_io_done(struct bch_write_op *op) -{ - struct bch_writepage_io *io = - container_of(op, struct bch_writepage_io, op); - struct bch_fs *c = io->op.c; - struct bio *bio = &io->op.wbio.bio; - struct folio_iter fi; - unsigned i; - - if (io->op.error) { - set_bit(EI_INODE_ERROR, &io->inode->ei_flags); - - bio_for_each_folio_all(fi, bio) { - struct bch_folio *s; - - folio_set_error(fi.folio); - mapping_set_error(fi.folio->mapping, -EIO); - - s = __bch2_folio(fi.folio); - spin_lock(&s->lock); - for (i = 0; i < folio_sectors(fi.folio); i++) - s->s[i].nr_replicas = 0; - spin_unlock(&s->lock); - } - } - - if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) { - bio_for_each_folio_all(fi, bio) { - struct bch_folio *s; - - s = __bch2_folio(fi.folio); - spin_lock(&s->lock); - for (i = 0; i < folio_sectors(fi.folio); i++) - s->s[i].nr_replicas = 0; - spin_unlock(&s->lock); - } - } - - /* - * racing with fallocate can cause us to add fewer sectors than - * expected - but we shouldn't add more sectors than expected: - */ - WARN_ON_ONCE(io->op.i_sectors_delta > 0); - - /* - * (error (due to going RO) halfway through a page can screw that up - * slightly) - * XXX wtf? - BUG_ON(io->op.op.i_sectors_delta >= PAGE_SECTORS); - */ - - /* - * PageWriteback is effectively our ref on the inode - fixup i_blocks - * before calling end_page_writeback: - */ - i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta); - - bio_for_each_folio_all(fi, bio) { - struct bch_folio *s = __bch2_folio(fi.folio); - - if (atomic_dec_and_test(&s->write_count)) - folio_end_writeback(fi.folio); - } - - bio_put(&io->op.wbio.bio); -} - -static void bch2_writepage_do_io(struct bch_writepage_state *w) -{ - struct bch_writepage_io *io = w->io; - - w->io = NULL; - closure_call(&io->op.cl, bch2_write, NULL, NULL); -} - -/* - * Get a bch_writepage_io and add @page to it - appending to an existing one if - * possible, else allocating a new one: - */ -static void bch2_writepage_io_alloc(struct bch_fs *c, - struct writeback_control *wbc, - struct bch_writepage_state *w, - struct bch_inode_info *inode, - u64 sector, - unsigned nr_replicas) -{ - struct bch_write_op *op; - - w->io = container_of(bio_alloc_bioset(NULL, BIO_MAX_VECS, - REQ_OP_WRITE, - GFP_KERNEL, - &c->writepage_bioset), - struct bch_writepage_io, op.wbio.bio); - - w->io->inode = inode; - op = &w->io->op; - bch2_write_op_init(op, c, w->opts); - op->target = w->opts.foreground_target; - op->nr_replicas = nr_replicas; - op->res.nr_replicas = nr_replicas; - op->write_point = writepoint_hashed(inode->ei_last_dirtied); - op->subvol = inode->ei_subvol; - op->pos = POS(inode->v.i_ino, sector); - op->end_io = bch2_writepage_io_done; - op->devs_need_flush = &inode->ei_devs_need_flush; - op->wbio.bio.bi_iter.bi_sector = sector; - op->wbio.bio.bi_opf = wbc_to_write_flags(wbc); -} - -static int __bch2_writepage(struct folio *folio, - struct writeback_control *wbc, - void *data) -{ - struct bch_inode_info *inode = to_bch_ei(folio->mapping->host); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_writepage_state *w = data; - struct bch_folio *s; - unsigned i, offset, f_sectors, nr_replicas_this_write = U32_MAX; - loff_t i_size = i_size_read(&inode->v); - int ret; - - EBUG_ON(!folio_test_uptodate(folio)); - - /* Is the folio fully inside i_size? */ - if (folio_end_pos(folio) <= i_size) - goto do_io; - - /* Is the folio fully outside i_size? (truncate in progress) */ - if (folio_pos(folio) >= i_size) { - folio_unlock(folio); - return 0; - } - - /* - * The folio straddles i_size. It must be zeroed out on each and every - * writepage invocation because it may be mmapped. "A file is mapped - * in multiples of the folio size. For a file that is not a multiple of - * the folio size, the remaining memory is zeroed when mapped, and - * writes to that region are not written out to the file." - */ - folio_zero_segment(folio, - i_size - folio_pos(folio), - folio_size(folio)); -do_io: - f_sectors = folio_sectors(folio); - s = bch2_folio(folio); - - if (f_sectors > w->tmp_sectors) { - kfree(w->tmp); - w->tmp = kzalloc(sizeof(struct bch_folio_sector) * - f_sectors, __GFP_NOFAIL); - w->tmp_sectors = f_sectors; - } - - /* - * Things get really hairy with errors during writeback: - */ - ret = bch2_get_folio_disk_reservation(c, inode, folio, false); - BUG_ON(ret); - - /* Before unlocking the page, get copy of reservations: */ - spin_lock(&s->lock); - memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors); - - for (i = 0; i < f_sectors; i++) { - if (s->s[i].state < SECTOR_dirty) - continue; - - nr_replicas_this_write = - min_t(unsigned, nr_replicas_this_write, - s->s[i].nr_replicas + - s->s[i].replicas_reserved); - } - - for (i = 0; i < f_sectors; i++) { - if (s->s[i].state < SECTOR_dirty) - continue; - - s->s[i].nr_replicas = w->opts.compression - ? 0 : nr_replicas_this_write; - - s->s[i].replicas_reserved = 0; - folio_sector_set(folio, s, i, SECTOR_allocated); - } - spin_unlock(&s->lock); - - BUG_ON(atomic_read(&s->write_count)); - atomic_set(&s->write_count, 1); - - BUG_ON(folio_test_writeback(folio)); - folio_start_writeback(folio); - - folio_unlock(folio); - - offset = 0; - while (1) { - unsigned sectors = 0, dirty_sectors = 0, reserved_sectors = 0; - u64 sector; - - while (offset < f_sectors && - w->tmp[offset].state < SECTOR_dirty) - offset++; - - if (offset == f_sectors) - break; - - while (offset + sectors < f_sectors && - w->tmp[offset + sectors].state >= SECTOR_dirty) { - reserved_sectors += w->tmp[offset + sectors].replicas_reserved; - dirty_sectors += w->tmp[offset + sectors].state == SECTOR_dirty; - sectors++; - } - BUG_ON(!sectors); - - sector = folio_sector(folio) + offset; - - if (w->io && - (w->io->op.res.nr_replicas != nr_replicas_this_write || - bio_full(&w->io->op.wbio.bio, sectors << 9) || - w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >= - (BIO_MAX_VECS * PAGE_SIZE) || - bio_end_sector(&w->io->op.wbio.bio) != sector)) - bch2_writepage_do_io(w); - - if (!w->io) - bch2_writepage_io_alloc(c, wbc, w, inode, sector, - nr_replicas_this_write); - - atomic_inc(&s->write_count); - - BUG_ON(inode != w->io->inode); - BUG_ON(!bio_add_folio(&w->io->op.wbio.bio, folio, - sectors << 9, offset << 9)); - - /* Check for writing past i_size: */ - WARN_ONCE((bio_end_sector(&w->io->op.wbio.bio) << 9) > - round_up(i_size, block_bytes(c)) && - !test_bit(BCH_FS_EMERGENCY_RO, &c->flags), - "writing past i_size: %llu > %llu (unrounded %llu)\n", - bio_end_sector(&w->io->op.wbio.bio) << 9, - round_up(i_size, block_bytes(c)), - i_size); - - w->io->op.res.sectors += reserved_sectors; - w->io->op.i_sectors_delta -= dirty_sectors; - w->io->op.new_i_size = i_size; - - offset += sectors; - } - - if (atomic_dec_and_test(&s->write_count)) - folio_end_writeback(folio); - - return 0; -} - -int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc) -{ - struct bch_fs *c = mapping->host->i_sb->s_fs_info; - struct bch_writepage_state w = - bch_writepage_state_init(c, to_bch_ei(mapping->host)); - struct blk_plug plug; - int ret; - - blk_start_plug(&plug); - ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w); - if (w.io) - bch2_writepage_do_io(&w); - blk_finish_plug(&plug); - kfree(w.tmp); - return bch2_err_class(ret); -} - -/* buffered writes: */ - -int bch2_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, - struct page **pagep, void **fsdata) -{ - struct bch_inode_info *inode = to_bch_ei(mapping->host); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch2_folio_reservation *res; - struct folio *folio; - unsigned offset; - int ret = -ENOMEM; - - res = kmalloc(sizeof(*res), GFP_KERNEL); - if (!res) - return -ENOMEM; - - bch2_folio_reservation_init(c, inode, res); - *fsdata = res; - - bch2_pagecache_add_get(inode); - - folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, - FGP_LOCK|FGP_WRITE|FGP_CREAT|FGP_STABLE, - mapping_gfp_mask(mapping)); - if (IS_ERR_OR_NULL(folio)) - goto err_unlock; - - if (folio_test_uptodate(folio)) - goto out; - - offset = pos - folio_pos(folio); - len = min_t(size_t, len, folio_end_pos(folio) - pos); - - /* If we're writing entire folio, don't need to read it in first: */ - if (!offset && len == folio_size(folio)) - goto out; - - if (!offset && pos + len >= inode->v.i_size) { - folio_zero_segment(folio, len, folio_size(folio)); - flush_dcache_folio(folio); - goto out; - } - - if (folio_pos(folio) >= inode->v.i_size) { - folio_zero_segments(folio, 0, offset, offset + len, folio_size(folio)); - flush_dcache_folio(folio); - goto out; - } -readpage: - ret = bch2_read_single_folio(folio, mapping); - if (ret) - goto err; -out: - ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); - if (ret) - goto err; - - ret = bch2_folio_reservation_get(c, inode, folio, res, offset, len); - if (ret) { - if (!folio_test_uptodate(folio)) { - /* - * If the folio hasn't been read in, we won't know if we - * actually need a reservation - we don't actually need - * to read here, we just need to check if the folio is - * fully backed by uncompressed data: - */ - goto readpage; - } - - goto err; - } - - *pagep = &folio->page; - return 0; -err: - folio_unlock(folio); - folio_put(folio); - *pagep = NULL; -err_unlock: - bch2_pagecache_add_put(inode); - kfree(res); - *fsdata = NULL; - return bch2_err_class(ret); -} - -int bch2_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - struct bch_inode_info *inode = to_bch_ei(mapping->host); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch2_folio_reservation *res = fsdata; - struct folio *folio = page_folio(page); - unsigned offset = pos - folio_pos(folio); - - lockdep_assert_held(&inode->v.i_rwsem); - BUG_ON(offset + copied > folio_size(folio)); - - if (unlikely(copied < len && !folio_test_uptodate(folio))) { - /* - * The folio needs to be read in, but that would destroy - * our partial write - simplest thing is to just force - * userspace to redo the write: - */ - folio_zero_range(folio, 0, folio_size(folio)); - flush_dcache_folio(folio); - copied = 0; - } - - spin_lock(&inode->v.i_lock); - if (pos + copied > inode->v.i_size) - i_size_write(&inode->v, pos + copied); - spin_unlock(&inode->v.i_lock); - - if (copied) { - if (!folio_test_uptodate(folio)) - folio_mark_uptodate(folio); - - bch2_set_folio_dirty(c, inode, folio, res, offset, copied); - - inode->ei_last_dirtied = (unsigned long) current; - } - - folio_unlock(folio); - folio_put(folio); - bch2_pagecache_add_put(inode); - - bch2_folio_reservation_put(c, inode, res); - kfree(res); - - return copied; -} - -static noinline void folios_trunc(folios *folios, struct folio **fi) -{ - while (folios->data + folios->nr > fi) { - struct folio *f = darray_pop(folios); - - folio_unlock(f); - folio_put(f); - } -} - -static int __bch2_buffered_write(struct bch_inode_info *inode, - struct address_space *mapping, - struct iov_iter *iter, - loff_t pos, unsigned len) -{ - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch2_folio_reservation res; - folios folios; - struct folio **fi, *f; - unsigned copied = 0, f_offset; - u64 end = pos + len, f_pos; - loff_t last_folio_pos = inode->v.i_size; - int ret = 0; - - BUG_ON(!len); - - bch2_folio_reservation_init(c, inode, &res); - darray_init(&folios); - - ret = filemap_get_contig_folios_d(mapping, pos, end, - FGP_LOCK|FGP_WRITE|FGP_STABLE|FGP_CREAT, - mapping_gfp_mask(mapping), - &folios); - if (ret) - goto out; - - BUG_ON(!folios.nr); - - f = darray_first(folios); - if (pos != folio_pos(f) && !folio_test_uptodate(f)) { - ret = bch2_read_single_folio(f, mapping); - if (ret) - goto out; - } - - f = darray_last(folios); - end = min(end, folio_end_pos(f)); - last_folio_pos = folio_pos(f); - if (end != folio_end_pos(f) && !folio_test_uptodate(f)) { - if (end >= inode->v.i_size) { - folio_zero_range(f, 0, folio_size(f)); - } else { - ret = bch2_read_single_folio(f, mapping); - if (ret) - goto out; - } - } - - ret = bch2_folio_set(c, inode_inum(inode), folios.data, folios.nr); - if (ret) - goto out; - - f_pos = pos; - f_offset = pos - folio_pos(darray_first(folios)); - darray_for_each(folios, fi) { - struct folio *f = *fi; - u64 f_len = min(end, folio_end_pos(f)) - f_pos; - - /* - * XXX: per POSIX and fstests generic/275, on -ENOSPC we're - * supposed to write as much as we have disk space for. - * - * On failure here we should still write out a partial page if - * we aren't completely out of disk space - we don't do that - * yet: - */ - ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len); - if (unlikely(ret)) { - folios_trunc(&folios, fi); - if (!folios.nr) - goto out; - - end = min(end, folio_end_pos(darray_last(folios))); - break; - } - - f_pos = folio_end_pos(f); - f_offset = 0; - } - - if (mapping_writably_mapped(mapping)) - darray_for_each(folios, fi) - flush_dcache_folio(*fi); - - f_pos = pos; - f_offset = pos - folio_pos(darray_first(folios)); - darray_for_each(folios, fi) { - struct folio *f = *fi; - u64 f_len = min(end, folio_end_pos(f)) - f_pos; - unsigned f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter); - - if (!f_copied) { - folios_trunc(&folios, fi); - break; - } - - if (!folio_test_uptodate(f) && - f_copied != folio_size(f) && - pos + copied + f_copied < inode->v.i_size) { - folio_zero_range(f, 0, folio_size(f)); - folios_trunc(&folios, fi); - break; - } - - flush_dcache_folio(f); - copied += f_copied; - - if (f_copied != f_len) { - folios_trunc(&folios, fi + 1); - break; - } - - f_pos = folio_end_pos(f); - f_offset = 0; - } - - if (!copied) - goto out; - - end = pos + copied; - - spin_lock(&inode->v.i_lock); - if (end > inode->v.i_size) - i_size_write(&inode->v, end); - spin_unlock(&inode->v.i_lock); - - f_pos = pos; - f_offset = pos - folio_pos(darray_first(folios)); - darray_for_each(folios, fi) { - struct folio *f = *fi; - u64 f_len = min(end, folio_end_pos(f)) - f_pos; - - if (!folio_test_uptodate(f)) - folio_mark_uptodate(f); - - bch2_set_folio_dirty(c, inode, f, &res, f_offset, f_len); - - f_pos = folio_end_pos(f); - f_offset = 0; - } - - inode->ei_last_dirtied = (unsigned long) current; -out: - darray_for_each(folios, fi) { - folio_unlock(*fi); - folio_put(*fi); - } - - /* - * If the last folio added to the mapping starts beyond current EOF, we - * performed a short write but left around at least one post-EOF folio. - * Clean up the mapping before we return. - */ - if (last_folio_pos >= inode->v.i_size) - truncate_pagecache(&inode->v, inode->v.i_size); - - darray_exit(&folios); - bch2_folio_reservation_put(c, inode, &res); - - return copied ?: ret; -} - -static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter) -{ - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct bch_inode_info *inode = file_bch_inode(file); - loff_t pos = iocb->ki_pos; - ssize_t written = 0; - int ret = 0; - - bch2_pagecache_add_get(inode); - - do { - unsigned offset = pos & (PAGE_SIZE - 1); - unsigned bytes = iov_iter_count(iter); -again: - /* - * Bring in the user page that we will copy from _first_. - * Otherwise there's a nasty deadlock on copying from the - * same page as we're writing to, without it being marked - * up-to-date. - * - * Not only is this an optimisation, but it is also required - * to check that the address is actually valid, when atomic - * usercopies are used, below. - */ - if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { - bytes = min_t(unsigned long, iov_iter_count(iter), - PAGE_SIZE - offset); - - if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { - ret = -EFAULT; - break; - } - } - - if (unlikely(fatal_signal_pending(current))) { - ret = -EINTR; - break; - } - - ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes); - if (unlikely(ret < 0)) - break; - - cond_resched(); - - if (unlikely(ret == 0)) { - /* - * If we were unable to copy any data at all, we must - * fall back to a single segment length write. - * - * If we didn't fallback here, we could livelock - * because not all segments in the iov can be copied at - * once without a pagefault. - */ - bytes = min_t(unsigned long, PAGE_SIZE - offset, - iov_iter_single_seg_count(iter)); - goto again; - } - pos += ret; - written += ret; - ret = 0; - - balance_dirty_pages_ratelimited(mapping); - } while (iov_iter_count(iter)); - - bch2_pagecache_add_put(inode); - - return written ? written : ret; -} - -/* O_DIRECT reads */ - -static void bio_check_or_release(struct bio *bio, bool check_dirty) -{ - if (check_dirty) { - bio_check_pages_dirty(bio); - } else { - bio_release_pages(bio, false); - bio_put(bio); - } -} - -static void bch2_dio_read_complete(struct closure *cl) -{ - struct dio_read *dio = container_of(cl, struct dio_read, cl); - - dio->req->ki_complete(dio->req, dio->ret); - bio_check_or_release(&dio->rbio.bio, dio->should_dirty); -} - -static void bch2_direct_IO_read_endio(struct bio *bio) -{ - struct dio_read *dio = bio->bi_private; - - if (bio->bi_status) - dio->ret = blk_status_to_errno(bio->bi_status); - - closure_put(&dio->cl); -} - -static void bch2_direct_IO_read_split_endio(struct bio *bio) -{ - struct dio_read *dio = bio->bi_private; - bool should_dirty = dio->should_dirty; - - bch2_direct_IO_read_endio(bio); - bio_check_or_release(bio, should_dirty); -} - -static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) -{ - struct file *file = req->ki_filp; - struct bch_inode_info *inode = file_bch_inode(file); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch_io_opts opts; - struct dio_read *dio; - struct bio *bio; - loff_t offset = req->ki_pos; - bool sync = is_sync_kiocb(req); - size_t shorten; - ssize_t ret; - - bch2_inode_opts_get(&opts, c, &inode->ei_inode); - - if ((offset|iter->count) & (block_bytes(c) - 1)) - return -EINVAL; - - ret = min_t(loff_t, iter->count, - max_t(loff_t, 0, i_size_read(&inode->v) - offset)); - - if (!ret) - return ret; - - shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c)); - iter->count -= shorten; - - bio = bio_alloc_bioset(NULL, - bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), - REQ_OP_READ, - GFP_KERNEL, - &c->dio_read_bioset); - - bio->bi_end_io = bch2_direct_IO_read_endio; - - dio = container_of(bio, struct dio_read, rbio.bio); - closure_init(&dio->cl, NULL); - - /* - * this is a _really_ horrible hack just to avoid an atomic sub at the - * end: - */ - if (!sync) { - set_closure_fn(&dio->cl, bch2_dio_read_complete, NULL); - atomic_set(&dio->cl.remaining, - CLOSURE_REMAINING_INITIALIZER - - CLOSURE_RUNNING + - CLOSURE_DESTRUCTOR); - } else { - atomic_set(&dio->cl.remaining, - CLOSURE_REMAINING_INITIALIZER + 1); - } - - dio->req = req; - dio->ret = ret; - /* - * This is one of the sketchier things I've encountered: we have to skip - * the dirtying of requests that are internal from the kernel (i.e. from - * loopback), because we'll deadlock on page_lock. - */ - dio->should_dirty = iter_is_iovec(iter); - - goto start; - while (iter->count) { - bio = bio_alloc_bioset(NULL, - bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), - REQ_OP_READ, - GFP_KERNEL, - &c->bio_read); - bio->bi_end_io = bch2_direct_IO_read_split_endio; -start: - bio->bi_opf = REQ_OP_READ|REQ_SYNC; - bio->bi_iter.bi_sector = offset >> 9; - bio->bi_private = dio; - - ret = bio_iov_iter_get_pages(bio, iter); - if (ret < 0) { - /* XXX: fault inject this path */ - bio->bi_status = BLK_STS_RESOURCE; - bio_endio(bio); - break; - } - - offset += bio->bi_iter.bi_size; - - if (dio->should_dirty) - bio_set_pages_dirty(bio); - - if (iter->count) - closure_get(&dio->cl); - - bch2_read(c, rbio_init(bio, opts), inode_inum(inode)); - } - - iter->count += shorten; - - if (sync) { - closure_sync(&dio->cl); - closure_debug_destroy(&dio->cl); - ret = dio->ret; - bio_check_or_release(&dio->rbio.bio, dio->should_dirty); - return ret; - } else { - return -EIOCBQUEUED; - } -} - -ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter) -{ - struct file *file = iocb->ki_filp; - struct bch_inode_info *inode = file_bch_inode(file); - struct address_space *mapping = file->f_mapping; - size_t count = iov_iter_count(iter); - ssize_t ret; - - if (!count) - return 0; /* skip atime */ - - if (iocb->ki_flags & IOCB_DIRECT) { - struct blk_plug plug; - - if (unlikely(mapping->nrpages)) { - ret = filemap_write_and_wait_range(mapping, - iocb->ki_pos, - iocb->ki_pos + count - 1); - if (ret < 0) - goto out; - } - - file_accessed(file); - - blk_start_plug(&plug); - ret = bch2_direct_IO_read(iocb, iter); - blk_finish_plug(&plug); - - if (ret >= 0) - iocb->ki_pos += ret; - } else { - bch2_pagecache_add_get(inode); - ret = generic_file_read_iter(iocb, iter); - bch2_pagecache_add_put(inode); - } -out: - return bch2_err_class(ret); -} - -/* O_DIRECT writes */ - -static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, - u64 offset, u64 size, - unsigned nr_replicas, bool compressed) -{ - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - u64 end = offset + size; - u32 snapshot; - bool ret = true; - int err; - - bch2_trans_init(&trans, c, 0, 0); -retry: - bch2_trans_begin(&trans); - - err = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); - if (err) - goto err; - - for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, - SPOS(inum.inum, offset, snapshot), - BTREE_ITER_SLOTS, k, err) { - if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end))) - break; - - if (k.k->p.snapshot != snapshot || - nr_replicas > bch2_bkey_replicas(c, k) || - (!compressed && bch2_bkey_sectors_compressed(k))) { - ret = false; - break; - } - } - - offset = iter.pos.offset; - bch2_trans_iter_exit(&trans, &iter); -err: - if (bch2_err_matches(err, BCH_ERR_transaction_restart)) - goto retry; - bch2_trans_exit(&trans); - - return err ? false : ret; -} - -static noinline bool bch2_dio_write_check_allocated(struct dio_write *dio) -{ - struct bch_fs *c = dio->op.c; - struct bch_inode_info *inode = dio->inode; - struct bio *bio = &dio->op.wbio.bio; - - return bch2_check_range_allocated(c, inode_inum(inode), - dio->op.pos.offset, bio_sectors(bio), - dio->op.opts.data_replicas, - dio->op.opts.compression != 0); -} - -static void bch2_dio_write_loop_async(struct bch_write_op *); -static __always_inline long bch2_dio_write_done(struct dio_write *dio); - -/* - * We're going to return -EIOCBQUEUED, but we haven't finished consuming the - * iov_iter yet, so we need to stash a copy of the iovec: it might be on the - * caller's stack, we're not guaranteed that it will live for the duration of - * the IO: - */ -static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) -{ - struct iovec *iov = dio->inline_vecs; - - /* - * iov_iter has a single embedded iovec - nothing to do: - */ - if (iter_is_ubuf(&dio->iter)) - return 0; - - /* - * We don't currently handle non-iovec iov_iters here - return an error, - * and we'll fall back to doing the IO synchronously: - */ - if (!iter_is_iovec(&dio->iter)) - return -1; - - if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { - iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov), - GFP_KERNEL); - if (unlikely(!iov)) - return -ENOMEM; - - dio->free_iov = true; - } - - memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov)); - dio->iter.__iov = iov; - return 0; -} - -static void bch2_dio_write_flush_done(struct closure *cl) -{ - struct dio_write *dio = container_of(cl, struct dio_write, op.cl); - struct bch_fs *c = dio->op.c; - - closure_debug_destroy(cl); - - dio->op.error = bch2_journal_error(&c->journal); - - bch2_dio_write_done(dio); -} +struct nocow_flush { + struct closure *cl; + struct bch_dev *ca; + struct bio bio; +}; -static noinline void bch2_dio_write_flush(struct dio_write *dio) +static void nocow_flush_endio(struct bio *_bio) { - struct bch_fs *c = dio->op.c; - struct bch_inode_unpacked inode; - int ret; - - dio->flush = 0; - closure_init(&dio->op.cl, NULL); - - if (!dio->op.error) { - ret = bch2_inode_find_by_inum(c, inode_inum(dio->inode), &inode); - if (ret) { - dio->op.error = ret; - } else { - bch2_journal_flush_seq_async(&c->journal, inode.bi_journal_seq, &dio->op.cl); - bch2_inode_flush_nocow_writes_async(c, dio->inode, &dio->op.cl); - } - } + struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio); - if (dio->sync) { - closure_sync(&dio->op.cl); - closure_debug_destroy(&dio->op.cl); - } else { - continue_at(&dio->op.cl, bch2_dio_write_flush_done, NULL); - } + closure_put(bio->cl); + percpu_ref_put(&bio->ca->io_ref); + bio_put(&bio->bio); } -static __always_inline long bch2_dio_write_done(struct dio_write *dio) +void bch2_inode_flush_nocow_writes_async(struct bch_fs *c, + struct bch_inode_info *inode, + struct closure *cl) { - struct kiocb *req = dio->req; - struct bch_inode_info *inode = dio->inode; - bool sync = dio->sync; - long ret; - - if (unlikely(dio->flush)) { - bch2_dio_write_flush(dio); - if (!sync) - return -EIOCBQUEUED; - } - - bch2_pagecache_block_put(inode); + struct nocow_flush *bio; + struct bch_dev *ca; + struct bch_devs_mask devs; + unsigned dev; - if (dio->free_iov) - kfree(dio->iter.__iov); + dev = find_first_bit(inode->ei_devs_need_flush.d, BCH_SB_MEMBERS_MAX); + if (dev == BCH_SB_MEMBERS_MAX) + return; - ret = dio->op.error ?: ((long) dio->written << 9); - bio_put(&dio->op.wbio.bio); + devs = inode->ei_devs_need_flush; + memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush)); - /* inode->i_dio_count is our ref on inode and thus bch_fs */ - inode_dio_end(&inode->v); + for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) { + rcu_read_lock(); + ca = rcu_dereference(c->devs[dev]); + if (ca && !percpu_ref_tryget(&ca->io_ref)) + ca = NULL; + rcu_read_unlock(); - if (ret < 0) - ret = bch2_err_class(ret); + if (!ca) + continue; - if (!sync) { - req->ki_complete(req, ret); - ret = -EIOCBQUEUED; + bio = container_of(bio_alloc_bioset(ca->disk_sb.bdev, 0, + REQ_OP_FLUSH, + GFP_KERNEL, + &c->nocow_flush_bioset), + struct nocow_flush, bio); + bio->cl = cl; + bio->ca = ca; + bio->bio.bi_end_io = nocow_flush_endio; + closure_bio_submit(&bio->bio, cl); } - return ret; } -static __always_inline void bch2_dio_write_end(struct dio_write *dio) +static int bch2_inode_flush_nocow_writes(struct bch_fs *c, + struct bch_inode_info *inode) { - struct bch_fs *c = dio->op.c; - struct kiocb *req = dio->req; - struct bch_inode_info *inode = dio->inode; - struct bio *bio = &dio->op.wbio.bio; - - req->ki_pos += (u64) dio->op.written << 9; - dio->written += dio->op.written; - - if (dio->extending) { - spin_lock(&inode->v.i_lock); - if (req->ki_pos > inode->v.i_size) - i_size_write(&inode->v, req->ki_pos); - spin_unlock(&inode->v.i_lock); - } - - if (dio->op.i_sectors_delta || dio->quota_res.sectors) { - mutex_lock(&inode->ei_quota_lock); - __i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta); - __bch2_quota_reservation_put(c, inode, &dio->quota_res); - mutex_unlock(&inode->ei_quota_lock); - } + struct closure cl; - bio_release_pages(bio, false); + closure_init_stack(&cl); + bch2_inode_flush_nocow_writes_async(c, inode, &cl); + closure_sync(&cl); - if (unlikely(dio->op.error)) - set_bit(EI_INODE_ERROR, &inode->ei_flags); + return 0; } -static __always_inline long bch2_dio_write_loop(struct dio_write *dio) -{ - struct bch_fs *c = dio->op.c; - struct kiocb *req = dio->req; - struct address_space *mapping = dio->mapping; - struct bch_inode_info *inode = dio->inode; - struct bch_io_opts opts; - struct bio *bio = &dio->op.wbio.bio; - unsigned unaligned, iter_count; - bool sync = dio->sync, dropped_locks; - long ret; - - bch2_inode_opts_get(&opts, c, &inode->ei_inode); - - while (1) { - iter_count = dio->iter.count; - - EBUG_ON(current->faults_disabled_mapping); - current->faults_disabled_mapping = mapping; - - ret = bio_iov_iter_get_pages(bio, &dio->iter); - - dropped_locks = fdm_dropped_locks(); - - current->faults_disabled_mapping = NULL; - - /* - * If the fault handler returned an error but also signalled - * that it dropped & retook ei_pagecache_lock, we just need to - * re-shoot down the page cache and retry: - */ - if (dropped_locks && ret) - ret = 0; - - if (unlikely(ret < 0)) - goto err; - - if (unlikely(dropped_locks)) { - ret = write_invalidate_inode_pages_range(mapping, - req->ki_pos, - req->ki_pos + iter_count - 1); - if (unlikely(ret)) - goto err; - - if (!bio->bi_iter.bi_size) - continue; - } - - unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1); - bio->bi_iter.bi_size -= unaligned; - iov_iter_revert(&dio->iter, unaligned); - - if (!bio->bi_iter.bi_size) { - /* - * bio_iov_iter_get_pages was only able to get < - * blocksize worth of pages: - */ - ret = -EFAULT; - goto err; - } - - bch2_write_op_init(&dio->op, c, opts); - dio->op.end_io = sync - ? NULL - : bch2_dio_write_loop_async; - dio->op.target = dio->op.opts.foreground_target; - dio->op.write_point = writepoint_hashed((unsigned long) current); - dio->op.nr_replicas = dio->op.opts.data_replicas; - dio->op.subvol = inode->ei_subvol; - dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9); - dio->op.devs_need_flush = &inode->ei_devs_need_flush; - - if (sync) - dio->op.flags |= BCH_WRITE_SYNC; - dio->op.flags |= BCH_WRITE_CHECK_ENOSPC; - - ret = bch2_quota_reservation_add(c, inode, &dio->quota_res, - bio_sectors(bio), true); - if (unlikely(ret)) - goto err; - - ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio), - dio->op.opts.data_replicas, 0); - if (unlikely(ret) && - !bch2_dio_write_check_allocated(dio)) - goto err; - - task_io_account_write(bio->bi_iter.bi_size); - - if (unlikely(dio->iter.count) && - !dio->sync && - !dio->loop && - bch2_dio_write_copy_iov(dio)) - dio->sync = sync = true; - - dio->loop = true; - closure_call(&dio->op.cl, bch2_write, NULL, NULL); - - if (!sync) - return -EIOCBQUEUED; - - bch2_dio_write_end(dio); - - if (likely(!dio->iter.count) || dio->op.error) - break; - - bio_reset(bio, NULL, REQ_OP_WRITE); - } -out: - return bch2_dio_write_done(dio); -err: - dio->op.error = ret; - - bio_release_pages(bio, false); +/* i_size updates: */ - bch2_quota_reservation_put(c, inode, &dio->quota_res); - goto out; -} +struct inode_new_size { + loff_t new_size; + u64 now; + unsigned fields; +}; -static noinline __cold void bch2_dio_write_continue(struct dio_write *dio) +static int inode_set_size(struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) { - struct mm_struct *mm = dio->mm; + struct inode_new_size *s = p; - bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE); + bi->bi_size = s->new_size; + if (s->fields & ATTR_ATIME) + bi->bi_atime = s->now; + if (s->fields & ATTR_MTIME) + bi->bi_mtime = s->now; + if (s->fields & ATTR_CTIME) + bi->bi_ctime = s->now; - if (mm) - kthread_use_mm(mm); - bch2_dio_write_loop(dio); - if (mm) - kthread_unuse_mm(mm); + return 0; } -static void bch2_dio_write_loop_async(struct bch_write_op *op) +int __must_check bch2_write_inode_size(struct bch_fs *c, + struct bch_inode_info *inode, + loff_t new_size, unsigned fields) { - struct dio_write *dio = container_of(op, struct dio_write, op); - - bch2_dio_write_end(dio); + struct inode_new_size s = { + .new_size = new_size, + .now = bch2_current_time(c), + .fields = fields, + }; - if (likely(!dio->iter.count) || dio->op.error) - bch2_dio_write_done(dio); - else - bch2_dio_write_continue(dio); + return bch2_write_inode(c, inode, inode_set_size, &s, fields); } -static noinline -ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) +void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, + struct quota_res *quota_res, s64 sectors) { - struct file *file = req->ki_filp; - struct address_space *mapping = file->f_mapping; - struct bch_inode_info *inode = file_bch_inode(file); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct dio_write *dio; - struct bio *bio; - bool locked = true, extending; - ssize_t ret; - - prefetch(&c->opts); - prefetch((void *) &c->opts + 64); - prefetch(&inode->ei_inode); - prefetch((void *) &inode->ei_inode + 64); - - inode_lock(&inode->v); - - ret = generic_write_checks(req, iter); - if (unlikely(ret <= 0)) - goto err; - - ret = file_remove_privs(file); - if (unlikely(ret)) - goto err; - - ret = file_update_time(file); - if (unlikely(ret)) - goto err; - - if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) - goto err; - - inode_dio_begin(&inode->v); - bch2_pagecache_block_get(inode); - - extending = req->ki_pos + iter->count > inode->v.i_size; - if (!extending) { - inode_unlock(&inode->v); - locked = false; - } - - bio = bio_alloc_bioset(NULL, - bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), - REQ_OP_WRITE, - GFP_KERNEL, - &c->dio_write_bioset); - dio = container_of(bio, struct dio_write, op.wbio.bio); - dio->req = req; - dio->mapping = mapping; - dio->inode = inode; - dio->mm = current->mm; - dio->loop = false; - dio->extending = extending; - dio->sync = is_sync_kiocb(req) || extending; - dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled; - dio->free_iov = false; - dio->quota_res.sectors = 0; - dio->written = 0; - dio->iter = *iter; - dio->op.c = c; - - if (unlikely(mapping->nrpages)) { - ret = write_invalidate_inode_pages_range(mapping, - req->ki_pos, - req->ki_pos + iter->count - 1); - if (unlikely(ret)) - goto err_put_bio; - } - - ret = bch2_dio_write_loop(dio); -err: - if (locked) - inode_unlock(&inode->v); - return ret; -err_put_bio: - bch2_pagecache_block_put(inode); - bio_put(bio); - inode_dio_end(&inode->v); - goto err; -} + bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, + "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", + inode->v.i_ino, (u64) inode->v.i_blocks, sectors, + inode->ei_inode.bi_sectors); + inode->v.i_blocks += sectors; -ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) -{ - struct file *file = iocb->ki_filp; - struct bch_inode_info *inode = file_bch_inode(file); - ssize_t ret; +#ifdef CONFIG_BCACHEFS_QUOTA + if (quota_res && + !test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags) && + sectors > 0) { + BUG_ON(sectors > quota_res->sectors); + BUG_ON(sectors > inode->ei_quota_reserved); - if (iocb->ki_flags & IOCB_DIRECT) { - ret = bch2_direct_write(iocb, from); - goto out; + quota_res->sectors -= sectors; + inode->ei_quota_reserved -= sectors; + } else { + bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN); } - - inode_lock(&inode->v); - - ret = generic_write_checks(iocb, from); - if (ret <= 0) - goto unlock; - - ret = file_remove_privs(file); - if (ret) - goto unlock; - - ret = file_update_time(file); - if (ret) - goto unlock; - - ret = bch2_buffered_write(iocb, from); - if (likely(ret > 0)) - iocb->ki_pos += ret; -unlock: - inode_unlock(&inode->v); - - if (ret > 0) - ret = generic_write_sync(iocb, ret); -out: - return bch2_err_class(ret); +#endif } /* fsync: */ @@ -2908,10 +302,10 @@ static int __bch2_truncate_folio(struct bch_inode_info *inode, s->s[i].nr_replicas = 0; i_sectors_delta -= s->s[i].state == SECTOR_dirty; - folio_sector_set(folio, s, i, SECTOR_unallocated); + bch2_folio_sector_set(folio, s, i, SECTOR_unallocated); } - i_sectors_acct(c, inode, NULL, i_sectors_delta); + bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); /* * Caller needs to know whether this folio will be written out by @@ -3102,7 +496,7 @@ int bch2_truncate(struct mnt_idmap *idmap, ret = bch2_fpunch(c, inode_inum(inode), round_up(iattr->ia_size, block_bytes(c)) >> 9, U64_MAX, &i_sectors_delta); - i_sectors_acct(c, inode, NULL, i_sectors_delta); + bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && !bch2_journal_error(&c->journal), c, @@ -3156,7 +550,7 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len ret = bch2_fpunch(c, inode_inum(inode), block_start >> 9, block_end >> 9, &i_sectors_delta); - i_sectors_acct(c, inode, NULL, i_sectors_delta); + bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); } mutex_lock(&inode->ei_update_lock); @@ -3207,7 +601,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, new_size = inode->v.i_size + shift; - ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); + ret = bch2_write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); if (ret) return ret; @@ -3223,7 +617,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, ret = bch2_fpunch(c, inode_inum(inode), offset >> 9, (offset + len) >> 9, &i_sectors_delta); - i_sectors_acct(c, inode, NULL, i_sectors_delta); + bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); if (ret) return ret; @@ -3444,10 +838,10 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, if (ret) goto bkey_err; - i_sectors_acct(c, inode, "a_res, i_sectors_delta); + bch2_i_sectors_acct(c, inode, "a_res, i_sectors_delta); drop_locks_do(&trans, - (mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0)); + (bch2_mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0)); bkey_err: bch2_quota_reservation_put(c, inode, "a_res); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -3460,7 +854,7 @@ bkey_err: bch2_fpunch_at(&trans, &iter, inode_inum(inode), end_sector, &i_sectors_delta); - i_sectors_acct(c, inode, "a_res, i_sectors_delta); + bch2_i_sectors_acct(c, inode, "a_res, i_sectors_delta); bch2_quota_reservation_put(c, inode, "a_res); } @@ -3654,7 +1048,7 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, aligned_len = round_up((u64) len, block_bytes(c)); - ret = write_invalidate_inode_pages_range(dst->v.i_mapping, + ret = bch2_write_invalidate_inode_pages_range(dst->v.i_mapping, pos_dst, pos_dst + len - 1); if (ret) goto err; @@ -3666,7 +1060,7 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, file_update_time(file_dst); - mark_pagecache_unallocated(src, pos_src >> 9, + bch2_mark_pagecache_unallocated(src, pos_src >> 9, (pos_src + aligned_len) >> 9); ret = bch2_remap_range(c, @@ -3682,7 +1076,7 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, */ ret = min((u64) ret << 9, (u64) len); - i_sectors_acct(c, dst, "a_res, i_sectors_delta); + bch2_i_sectors_acct(c, dst, "a_res, i_sectors_delta); spin_lock(&dst->v.i_lock); if (pos_dst + ret > dst->v.i_size) @@ -3701,68 +1095,6 @@ err: /* fseek: */ -static int folio_data_offset(struct folio *folio, loff_t pos, - unsigned min_replicas) -{ - struct bch_folio *s = bch2_folio(folio); - unsigned i, sectors = folio_sectors(folio); - - if (s) - for (i = folio_pos_to_s(folio, pos); i < sectors; i++) - if (s->s[i].state >= SECTOR_dirty && - s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas) - return i << SECTOR_SHIFT; - - return -1; -} - -static loff_t bch2_seek_pagecache_data(struct inode *vinode, - loff_t start_offset, - loff_t end_offset, - unsigned min_replicas, - bool nonblock) -{ - struct folio_batch fbatch; - pgoff_t start_index = start_offset >> PAGE_SHIFT; - pgoff_t end_index = end_offset >> PAGE_SHIFT; - pgoff_t index = start_index; - unsigned i; - loff_t ret; - int offset; - - folio_batch_init(&fbatch); - - while (filemap_get_folios(vinode->i_mapping, - &index, end_index, &fbatch)) { - for (i = 0; i < folio_batch_count(&fbatch); i++) { - struct folio *folio = fbatch.folios[i]; - - if (!nonblock) { - folio_lock(folio); - } else if (!folio_trylock(folio)) { - folio_batch_release(&fbatch); - return -EAGAIN; - } - - offset = folio_data_offset(folio, - max(folio_pos(folio), start_offset), - min_replicas); - if (offset >= 0) { - ret = clamp(folio_pos(folio) + offset, - start_offset, end_offset); - folio_unlock(folio); - folio_batch_release(&fbatch); - return ret; - } - folio_unlock(folio); - } - folio_batch_release(&fbatch); - cond_resched(); - } - - return end_offset; -} - static loff_t bch2_seek_data(struct file *file, u64 offset) { struct bch_inode_info *inode = file_bch_inode(file); @@ -3816,85 +1148,6 @@ err: return vfs_setpos(file, next_data, MAX_LFS_FILESIZE); } -static int folio_hole_offset(struct address_space *mapping, loff_t *offset, - unsigned min_replicas, bool nonblock) -{ - struct folio *folio; - struct bch_folio *s; - unsigned i, sectors; - bool ret = true; - - folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT, - FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0); - if (folio == ERR_PTR(-EAGAIN)) - return -EAGAIN; - if (IS_ERR_OR_NULL(folio)) - return true; - - s = bch2_folio(folio); - if (!s) - goto unlock; - - sectors = folio_sectors(folio); - for (i = folio_pos_to_s(folio, *offset); i < sectors; i++) - if (s->s[i].state < SECTOR_dirty || - s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) { - *offset = max(*offset, - folio_pos(folio) + (i << SECTOR_SHIFT)); - goto unlock; - } - - *offset = folio_end_pos(folio); - ret = false; -unlock: - folio_unlock(folio); - folio_put(folio); - return ret; -} - -static loff_t bch2_seek_pagecache_hole(struct inode *vinode, - loff_t start_offset, - loff_t end_offset, - unsigned min_replicas, - bool nonblock) -{ - struct address_space *mapping = vinode->i_mapping; - loff_t offset = start_offset; - - while (offset < end_offset && - !folio_hole_offset(mapping, &offset, min_replicas, nonblock)) - ; - - return min(offset, end_offset); -} - -static int bch2_clamp_data_hole(struct inode *inode, - u64 *hole_start, - u64 *hole_end, - unsigned min_replicas, - bool nonblock) -{ - loff_t ret; - - ret = bch2_seek_pagecache_hole(inode, - *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9; - if (ret < 0) - return ret; - - *hole_start = ret; - - if (*hole_start == *hole_end) - return 0; - - ret = bch2_seek_pagecache_data(inode, - *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9; - if (ret < 0) - return ret; - - *hole_end = ret; - return 0; -} - static loff_t bch2_seek_hole(struct file *file, u64 offset) { struct bch_inode_info *inode = file_bch_inode(file); @@ -3979,28 +1232,10 @@ loff_t bch2_llseek(struct file *file, loff_t offset, int whence) void bch2_fs_fsio_exit(struct bch_fs *c) { bioset_exit(&c->nocow_flush_bioset); - bioset_exit(&c->dio_write_bioset); - bioset_exit(&c->dio_read_bioset); - bioset_exit(&c->writepage_bioset); } int bch2_fs_fsio_init(struct bch_fs *c) { - if (bioset_init(&c->writepage_bioset, - 4, offsetof(struct bch_writepage_io, op.wbio.bio), - BIOSET_NEED_BVECS)) - return -BCH_ERR_ENOMEM_writepage_bioset_init; - - if (bioset_init(&c->dio_read_bioset, - 4, offsetof(struct dio_read, rbio.bio), - BIOSET_NEED_BVECS)) - return -BCH_ERR_ENOMEM_dio_read_bioset_init; - - if (bioset_init(&c->dio_write_bioset, - 4, offsetof(struct dio_write, op.wbio.bio), - BIOSET_NEED_BVECS)) - return -BCH_ERR_ENOMEM_dio_write_bioset_init; - if (bioset_init(&c->nocow_flush_bioset, 1, offsetof(struct nocow_flush, bio), 0)) return -BCH_ERR_ENOMEM_nocow_flush_bioset_init; diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h index af905331542d..bb5b709fa8cf 100644 --- a/fs/bcachefs/fs-io.h +++ b/fs/bcachefs/fs-io.h @@ -5,28 +5,163 @@ #ifndef NO_BCACHEFS_FS #include "buckets.h" +#include "fs.h" #include "io_types.h" +#include "quota.h" #include -struct quota_res; +struct folio_vec { + struct folio *fv_folio; + size_t fv_offset; + size_t fv_len; +}; + +static inline struct folio_vec biovec_to_foliovec(struct bio_vec bv) +{ + + struct folio *folio = page_folio(bv.bv_page); + size_t offset = (folio_page_idx(folio, bv.bv_page) << PAGE_SHIFT) + + bv.bv_offset; + size_t len = min_t(size_t, folio_size(folio) - offset, bv.bv_len); + + return (struct folio_vec) { + .fv_folio = folio, + .fv_offset = offset, + .fv_len = len, + }; +} + +static inline struct folio_vec bio_iter_iovec_folio(struct bio *bio, + struct bvec_iter iter) +{ + return biovec_to_foliovec(bio_iter_iovec(bio, iter)); +} + +#define __bio_for_each_folio(bvl, bio, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ + ((bvl = bio_iter_iovec_folio((bio), (iter))), 1); \ + bio_advance_iter_single((bio), &(iter), (bvl).fv_len)) + +/** + * bio_for_each_folio - iterate over folios within a bio + * + * Like other non-_all versions, this iterates over what bio->bi_iter currently + * points to. This version is for drivers, where the bio may have previously + * been split or cloned. + */ +#define bio_for_each_folio(bvl, bio, iter) \ + __bio_for_each_folio(bvl, bio, iter, (bio)->bi_iter) + +struct quota_res { + u64 sectors; +}; + +#ifdef CONFIG_BCACHEFS_QUOTA + +static inline void __bch2_quota_reservation_put(struct bch_fs *c, + struct bch_inode_info *inode, + struct quota_res *res) +{ + BUG_ON(res->sectors > inode->ei_quota_reserved); + + bch2_quota_acct(c, inode->ei_qid, Q_SPC, + -((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC); + inode->ei_quota_reserved -= res->sectors; + res->sectors = 0; +} + +static inline void bch2_quota_reservation_put(struct bch_fs *c, + struct bch_inode_info *inode, + struct quota_res *res) +{ + if (res->sectors) { + mutex_lock(&inode->ei_quota_lock); + __bch2_quota_reservation_put(c, inode, res); + mutex_unlock(&inode->ei_quota_lock); + } +} + +static inline int bch2_quota_reservation_add(struct bch_fs *c, + struct bch_inode_info *inode, + struct quota_res *res, + u64 sectors, + bool check_enospc) +{ + int ret; + + if (test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags)) + return 0; + + mutex_lock(&inode->ei_quota_lock); + ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, + check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK); + if (likely(!ret)) { + inode->ei_quota_reserved += sectors; + res->sectors += sectors; + } + mutex_unlock(&inode->ei_quota_lock); + + return ret; +} -int __must_check bch2_write_inode_size(struct bch_fs *, - struct bch_inode_info *, - loff_t, unsigned); +#else + +static inline void __bch2_quota_reservation_put(struct bch_fs *c, + struct bch_inode_info *inode, + struct quota_res *res) {} -int bch2_read_folio(struct file *, struct folio *); +static inline void bch2_quota_reservation_put(struct bch_fs *c, + struct bch_inode_info *inode, + struct quota_res *res) {} -int bch2_writepages(struct address_space *, struct writeback_control *); -void bch2_readahead(struct readahead_control *); +static inline int bch2_quota_reservation_add(struct bch_fs *c, + struct bch_inode_info *inode, + struct quota_res *res, + unsigned sectors, + bool check_enospc) +{ + return 0; +} -int bch2_write_begin(struct file *, struct address_space *, loff_t, - unsigned, struct page **, void **); -int bch2_write_end(struct file *, struct address_space *, loff_t, - unsigned, unsigned, struct page *, void *); +#endif -ssize_t bch2_read_iter(struct kiocb *, struct iov_iter *); -ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *); +void __bch2_i_sectors_acct(struct bch_fs *, struct bch_inode_info *, + struct quota_res *, s64); + +static inline void bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, + struct quota_res *quota_res, s64 sectors) +{ + if (sectors) { + mutex_lock(&inode->ei_quota_lock); + __bch2_i_sectors_acct(c, inode, quota_res, sectors); + mutex_unlock(&inode->ei_quota_lock); + } +} + +static inline struct address_space *faults_disabled_mapping(void) +{ + return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL); +} + +static inline void set_fdm_dropped_locks(void) +{ + current->faults_disabled_mapping = + (void *) (((unsigned long) current->faults_disabled_mapping)|1); +} + +static inline bool fdm_dropped_locks(void) +{ + return ((unsigned long) current->faults_disabled_mapping) & 1; +} + +void bch2_inode_flush_nocow_writes_async(struct bch_fs *, + struct bch_inode_info *, struct closure *); + +int __must_check bch2_write_inode_size(struct bch_fs *, + struct bch_inode_info *, + loff_t, unsigned); int bch2_fsync(struct file *, loff_t, loff_t, int); @@ -39,11 +174,6 @@ loff_t bch2_remap_file_range(struct file *, loff_t, struct file *, loff_t bch2_llseek(struct file *, loff_t, int); -vm_fault_t bch2_page_fault(struct vm_fault *); -vm_fault_t bch2_page_mkwrite(struct vm_fault *); -void bch2_invalidate_folio(struct folio *, size_t, size_t); -bool bch2_release_folio(struct folio *, gfp_t); - void bch2_fs_fsio_exit(struct bch_fs *); int bch2_fs_fsio_init(struct bch_fs *); #else diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 695b8bc55590..aa7ec5dc9ff1 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -14,6 +14,9 @@ #include "fs-common.h" #include "fs-io.h" #include "fs-ioctl.h" +#include "fs-io-buffered.h" +#include "fs-io-direct.h" +#include "fs-io-pagecache.h" #include "fsck.h" #include "inode.h" #include "io.h" diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 7ec22631cdd3..de7bc0192c3d 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -30,6 +30,8 @@ #include "error.h" #include "fs.h" #include "fs-io.h" +#include "fs-io-buffered.h" +#include "fs-io-direct.h" #include "fsck.h" #include "inode.h" #include "io.h" @@ -469,6 +471,8 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_fs_counters_exit(c); bch2_fs_snapshots_exit(c); bch2_fs_quota_exit(c); + bch2_fs_fs_io_direct_exit(c); + bch2_fs_fs_io_buffered_exit(c); bch2_fs_fsio_exit(c); bch2_fs_ec_exit(c); bch2_fs_encryption_exit(c); @@ -842,7 +846,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_encryption_init(c) ?: bch2_fs_compress_init(c) ?: bch2_fs_ec_init(c) ?: - bch2_fs_fsio_init(c); + bch2_fs_fsio_init(c) ?: + bch2_fs_fs_io_buffered_init(c); + bch2_fs_fs_io_direct_init(c); if (ret) goto err; -- cgit v1.2.3 From 791236b85c2dfd3bc6b857431658efb49de83343 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Sat, 12 Aug 2023 15:47:45 +0100 Subject: bcachefs: Add btree_trans* to inode_set_fn This will be used when we need to re-hash a directory tree when setting flags. It is not possible to have concurrent btree_trans on a thread. Signed-off-by: Joshua Ashton Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 12 ++++++++---- fs/bcachefs/fs-ioctl.c | 11 +++++++---- fs/bcachefs/fs.c | 5 +++-- fs/bcachefs/fs.h | 3 ++- fs/bcachefs/xattr.c | 3 ++- 5 files changed, 22 insertions(+), 12 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 11a4919f30cd..ceab12fb8a8f 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -109,7 +109,8 @@ struct inode_new_size { unsigned fields; }; -static int inode_set_size(struct bch_inode_info *inode, +static int inode_set_size(struct btree_trans *trans, + struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { @@ -389,7 +390,8 @@ static int bch2_extend(struct mnt_idmap *idmap, return bch2_setattr_nonsize(idmap, inode, iattr); } -static int bch2_truncate_finish_fn(struct bch_inode_info *inode, +static int bch2_truncate_finish_fn(struct btree_trans *trans, + struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { @@ -397,7 +399,8 @@ static int bch2_truncate_finish_fn(struct bch_inode_info *inode, return 0; } -static int bch2_truncate_start_fn(struct bch_inode_info *inode, +static int bch2_truncate_start_fn(struct btree_trans *trans, + struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { u64 *new_i_size = p; @@ -518,7 +521,8 @@ err: /* fallocate: */ -static int inode_update_times_fn(struct bch_inode_info *inode, +static int inode_update_times_fn(struct btree_trans *trans, + struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { struct bch_fs *c = inode->v.i_sb->s_fs_info; diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index dfa1bf73c854..141bcced031e 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -31,7 +31,8 @@ struct flags_set { bool projinherit; }; -static int bch2_inode_flags_set(struct bch_inode_info *inode, +static int bch2_inode_flags_set(struct btree_trans *trans, + struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { @@ -124,7 +125,8 @@ static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode, return copy_to_user(arg, &fa, sizeof(fa)); } -static int fssetxattr_inode_update_fn(struct bch_inode_info *inode, +static int fssetxattr_inode_update_fn(struct btree_trans *trans, + struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { @@ -135,7 +137,7 @@ static int fssetxattr_inode_update_fn(struct bch_inode_info *inode, bi->bi_project = s->projid; } - return bch2_inode_flags_set(inode, bi, p); + return bch2_inode_flags_set(trans, inode, bi, p); } static int bch2_ioc_fssetxattr(struct bch_fs *c, @@ -192,7 +194,8 @@ err: return ret; } -static int bch2_reinherit_attrs_fn(struct bch_inode_info *inode, +static int bch2_reinherit_attrs_fn(struct btree_trans *trans, + struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index aa7ec5dc9ff1..113518ebd095 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -92,7 +92,7 @@ retry: ret = bch2_inode_peek(&trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_INTENT) ?: - (set ? set(inode, &inode_u, p) : 0) ?: + (set ? set(&trans, inode, &inode_u, p) : 0) ?: bch2_inode_write(&trans, &iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); @@ -1414,7 +1414,8 @@ static void bch2_destroy_inode(struct inode *vinode) call_rcu(&vinode->i_rcu, bch2_i_callback); } -static int inode_update_times_fn(struct bch_inode_info *inode, +static int inode_update_times_fn(struct btree_trans *trans, + struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index 6170d214d648..10e11119ded2 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -174,7 +174,8 @@ static inline int bch2_set_projid(struct bch_fs *c, struct inode *bch2_vfs_inode_get(struct bch_fs *, subvol_inum); /* returns 0 if we want to do the update, or error is passed up */ -typedef int (*inode_set_fn)(struct bch_inode_info *, +typedef int (*inode_set_fn)(struct btree_trans *, + struct bch_inode_info *, struct bch_inode_unpacked *, void *); void bch2_inode_update_after_write(struct btree_trans *, diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 70f78006daf2..6f6b3caf0607 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -494,7 +494,8 @@ struct inode_opt_set { bool defined; }; -static int inode_opt_set_fn(struct bch_inode_info *inode, +static int inode_opt_set_fn(struct btree_trans *trans, + struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { -- cgit v1.2.3 From 01a7e74fe14179cba90bf3f52ad3188a1d6819d2 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Sat, 12 Aug 2023 22:26:29 +0100 Subject: bcachefs: Introduce bch2_dirent_get_name A nice cleanup that avoids a bunch of open-coding name/string usage around dirent usage. Will be used by casefolding impl in future commits. Signed-off-by: Joshua Ashton Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 52 ++++++++++++++++++++++++++++++---------------------- fs/bcachefs/dirent.h | 2 +- fs/bcachefs/fs.c | 8 +++++--- 3 files changed, 36 insertions(+), 26 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 065ea59ee9fa..a87c4e5f089d 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -13,7 +13,7 @@ #include -unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) +static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) { unsigned len = bkey_val_bytes(d.k) - offsetof(struct bch_dirent, d_name); @@ -21,6 +21,11 @@ unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) return strnlen(d.v->d_name, len); } +struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d) +{ + return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d)); +} + static u64 bch2_dirent_hash(const struct bch_hash_info *info, const struct qstr *name) { @@ -41,7 +46,7 @@ static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key) static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); - struct qstr name = QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d)); + struct qstr name = bch2_dirent_get_name(d); return bch2_dirent_hash(info, &name); } @@ -49,20 +54,20 @@ static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r) { struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); - int len = bch2_dirent_name_bytes(l); - const struct qstr *r = _r; + const struct qstr l_name = bch2_dirent_get_name(l); + const struct qstr *r_name = _r; - return len - r->len ?: memcmp(l.v->d_name, r->name, len); + return l_name.len - r_name->len ?: memcmp(l_name.name, r_name->name, l_name.len); } static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) { struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r); - int l_len = bch2_dirent_name_bytes(l); - int r_len = bch2_dirent_name_bytes(r); + const struct qstr l_name = bch2_dirent_get_name(l); + const struct qstr r_name = bch2_dirent_get_name(r); - return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len); + return l_name.len - r_name.len ?: memcmp(l_name.name, r_name.name, l_name.len); } static bool dirent_is_visible(subvol_inum inum, struct bkey_s_c k) @@ -89,37 +94,36 @@ int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); - unsigned len; + struct qstr d_name = bch2_dirent_get_name(d); - len = bch2_dirent_name_bytes(d); - if (!len) { + if (!d_name.len) { prt_printf(err, "empty name"); return -BCH_ERR_invalid_bkey; } - if (bkey_val_u64s(k.k) > dirent_val_u64s(len)) { + if (bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len)) { prt_printf(err, "value too big (%zu > %u)", - bkey_val_u64s(k.k), dirent_val_u64s(len)); + bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); return -BCH_ERR_invalid_bkey; } - if (len > BCH_NAME_MAX) { + if (d_name.len > BCH_NAME_MAX) { prt_printf(err, "dirent name too big (%u > %u)", - len, BCH_NAME_MAX); + d_name.len, BCH_NAME_MAX); return -BCH_ERR_invalid_bkey; } - if (len == 1 && !memcmp(d.v->d_name, ".", 1)) { + if (d_name.len == 1 && !memcmp(d_name.name, ".", 1)) { prt_printf(err, "invalid name"); return -BCH_ERR_invalid_bkey; } - if (len == 2 && !memcmp(d.v->d_name, "..", 2)) { + if (d_name.len == 2 && !memcmp(d_name.name, "..", 2)) { prt_printf(err, "invalid name"); return -BCH_ERR_invalid_bkey; } - if (memchr(d.v->d_name, '/', len)) { + if (memchr(d_name.name, '/', d_name.len)) { prt_printf(err, "invalid name"); return -BCH_ERR_invalid_bkey; } @@ -137,10 +141,11 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); + struct qstr d_name = bch2_dirent_get_name(d); prt_printf(out, "%.*s -> %llu type %s", - bch2_dirent_name_bytes(d), - d.v->d_name, + d_name.len, + d_name.name, d.v->d_type != DT_SUBVOL ? le64_to_cpu(d.v->d_inum) : le32_to_cpu(d.v->d_child_subvol), @@ -507,6 +512,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) subvol_inum target; u32 snapshot; struct bkey_buf sk; + struct qstr name; int ret; bch2_bkey_buf_init(&sk); @@ -537,9 +543,11 @@ retry: dirent = bkey_i_to_s_c_dirent(sk.k); bch2_trans_unlock(&trans); + name = bch2_dirent_get_name(dirent); + ctx->pos = dirent.k->p.offset; - if (!dir_emit(ctx, dirent.v->d_name, - bch2_dirent_name_bytes(dirent), + if (!dir_emit(ctx, name.name, + name.len, target.inum, vfs_d_type(dirent.v->d_type))) break; diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index b42f4a13bc55..e9fa1df38232 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -24,7 +24,7 @@ struct bch_fs; struct bch_hash_info; struct bch_inode_info; -unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent); +struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d); static inline unsigned dirent_val_u64s(unsigned len) { diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 113518ebd095..0e1b31707d80 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1237,7 +1237,8 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child struct bch_inode_unpacked inode_u; subvol_inum target; u32 snapshot; - unsigned name_len; + struct qstr dirent_name; + unsigned name_len = 0; int ret; if (!S_ISDIR(dir->v.i_mode)) @@ -1314,9 +1315,10 @@ retry: ret = -ENOENT; goto err; found: - name_len = min_t(unsigned, bch2_dirent_name_bytes(d), NAME_MAX); + dirent_name = bch2_dirent_get_name(d); - memcpy(name, d.v->d_name, name_len); + name_len = min_t(unsigned, dirent_name.len, NAME_MAX); + memcpy(name, dirent_name.name, name_len); name[name_len] = '\0'; err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) -- cgit v1.2.3 From 8e877caaad818595ecb6754355cea2058fd9848e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 16 Aug 2023 16:54:33 -0400 Subject: bcachefs: Split out snapshot.c subvolume.c has gotten a bit large, this splits out a separate file just for managing snapshot trees - BTREE_ID_snapshots. Signed-off-by: Kent Overstreet --- fs/bcachefs/Makefile | 1 + fs/bcachefs/bcachefs_format.h | 5 + fs/bcachefs/bkey_methods.c | 1 + fs/bcachefs/btree_iter.c | 2 +- fs/bcachefs/btree_trans_commit.c | 2 +- fs/bcachefs/btree_update.c | 2 +- fs/bcachefs/fs.c | 1 + fs/bcachefs/fsck.c | 2 +- fs/bcachefs/inode.c | 1 + fs/bcachefs/quota.c | 2 +- fs/bcachefs/recovery.c | 1 + fs/bcachefs/snapshot.c | 1358 +++++++++++++++++++++++++++++++++++++ fs/bcachefs/snapshot.h | 256 +++++++ fs/bcachefs/subvolume.c | 1360 +------------------------------------- fs/bcachefs/subvolume.h | 223 ------- fs/bcachefs/super.c | 1 + fs/bcachefs/tests.c | 2 +- fs/bcachefs/util.h | 5 + 18 files changed, 1655 insertions(+), 1570 deletions(-) create mode 100644 fs/bcachefs/snapshot.c create mode 100644 fs/bcachefs/snapshot.h (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 4b7f384f703f..b4fa88dfd484 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -69,6 +69,7 @@ bcachefs-y := \ sb-members.o \ siphash.o \ six.o \ + snapshot.o \ subvolume.o \ super.o \ super-io.o \ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 23bae622309c..20e96daf9ca1 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1124,6 +1124,11 @@ struct bch_subvolume { __le32 flags; __le32 snapshot; __le64 inode; + /* + * Snapshot subvolumes form a tree, separate from the snapshot nodes + * tree - if this subvolume is a snapshot, this is the ID of the + * subvolume it was created from: + */ __le32 parent; __le32 pad; bch_le128 otime; diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 90557f4c156d..6547142db428 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -13,6 +13,7 @@ #include "lru.h" #include "quota.h" #include "reflink.h" +#include "snapshot.h" #include "subvolume.h" #include "xattr.h" diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index d22412dc5b46..98cf52c5e132 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -14,7 +14,7 @@ #include "extents.h" #include "journal.h" #include "replicas.h" -#include "subvolume.h" +#include "snapshot.h" #include "trace.h" #include diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 78a09aa050c7..83cc7f64c57c 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -14,7 +14,7 @@ #include "journal.h" #include "journal_reclaim.h" #include "replicas.h" -#include "subvolume.h" +#include "snapshot.h" #include diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 612fba60be14..a7fa20727d4b 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -11,7 +11,7 @@ #include "error.h" #include "extents.h" #include "keylist.h" -#include "subvolume.h" +#include "snapshot.h" #include "trace.h" static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 0e1b31707d80..d2f93a8af4ac 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -23,6 +23,7 @@ #include "journal.h" #include "keylist.h" #include "quota.h" +#include "snapshot.h" #include "super.h" #include "xattr.h" diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 0b4ddf650a97..9524bd621b2c 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -12,7 +12,7 @@ #include "inode.h" #include "keylist.h" #include "recovery.h" -#include "subvolume.h" +#include "snapshot.h" #include "super.h" #include "xattr.h" diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 46c4012facb0..8114b6e4f202 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -11,6 +11,7 @@ #include "extent_update.h" #include "inode.h" #include "str_hash.h" +#include "snapshot.h" #include "subvolume.h" #include "varint.h" diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 4f0654ff816f..ca99772aedc6 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -5,7 +5,7 @@ #include "error.h" #include "inode.h" #include "quota.h" -#include "subvolume.h" +#include "snapshot.h" #include "super-io.h" static const char * const bch2_quota_types[] = { diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 33a68a335be6..30efb3c90560 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -25,6 +25,7 @@ #include "recovery.h" #include "replicas.h" #include "sb-clean.h" +#include "snapshot.h" #include "subvolume.h" #include "super-io.h" diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c new file mode 100644 index 000000000000..ad7991ad87a9 --- /dev/null +++ b/fs/bcachefs/snapshot.c @@ -0,0 +1,1358 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" +#include "btree_key_cache.h" +#include "btree_update.h" +#include "errcode.h" +#include "error.h" +#include "fs.h" +#include "snapshot.h" + +#include + +/* + * Snapshot trees: + * + * Keys in BTREE_ID_snapshot_trees identify a whole tree of snapshot nodes; they + * exist to provide a stable identifier for the whole lifetime of a snapshot + * tree. + */ + +void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) +{ + struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k); + + prt_printf(out, "subvol %u root snapshot %u", + le32_to_cpu(t.v->master_subvol), + le32_to_cpu(t.v->root_snapshot)); +} + +int bch2_snapshot_tree_invalid(const struct bch_fs *c, struct bkey_s_c k, + enum bkey_invalid_flags flags, + struct printbuf *err) +{ + if (bkey_gt(k.k->p, POS(0, U32_MAX)) || + bkey_lt(k.k->p, POS(0, 1))) { + prt_printf(err, "bad pos"); + return -BCH_ERR_invalid_bkey; + } + + return 0; +} + +int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id, + struct bch_snapshot_tree *s) +{ + int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id), + BTREE_ITER_WITH_UPDATES, snapshot_tree, s); + + if (bch2_err_matches(ret, ENOENT)) + ret = -BCH_ERR_ENOENT_snapshot_tree; + return ret; +} + +struct bkey_i_snapshot_tree * +__bch2_snapshot_tree_create(struct btree_trans *trans) +{ + struct btree_iter iter; + int ret = bch2_bkey_get_empty_slot(trans, &iter, + BTREE_ID_snapshot_trees, POS(0, U32_MAX)); + struct bkey_i_snapshot_tree *s_t; + + if (ret == -BCH_ERR_ENOSPC_btree_slot) + ret = -BCH_ERR_ENOSPC_snapshot_tree; + if (ret) + return ERR_PTR(ret); + + s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree); + ret = PTR_ERR_OR_ZERO(s_t); + bch2_trans_iter_exit(trans, &iter); + return ret ? ERR_PTR(ret) : s_t; +} + +static int bch2_snapshot_tree_create(struct btree_trans *trans, + u32 root_id, u32 subvol_id, u32 *tree_id) +{ + struct bkey_i_snapshot_tree *n_tree = + __bch2_snapshot_tree_create(trans); + + if (IS_ERR(n_tree)) + return PTR_ERR(n_tree); + + n_tree->v.master_subvol = cpu_to_le32(subvol_id); + n_tree->v.root_snapshot = cpu_to_le32(root_id); + *tree_id = n_tree->k.p.offset; + return 0; +} + +/* Snapshot nodes: */ + +static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor) +{ + const struct snapshot_t *s = __snapshot_t(t, id); + + if (s->skip[2] <= ancestor) + return s->skip[2]; + if (s->skip[1] <= ancestor) + return s->skip[1]; + if (s->skip[0] <= ancestor) + return s->skip[0]; + return s->parent; +} + +bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) +{ + struct snapshot_table *t; + bool ret; + + EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots); + + rcu_read_lock(); + t = rcu_dereference(c->snapshots); + + while (id && id < ancestor - IS_ANCESTOR_BITMAP) + id = get_ancestor_below(t, id, ancestor); + + ret = id && id < ancestor + ? test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor) + : id == ancestor; + rcu_read_unlock(); + + return ret; +} + +static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) +{ + struct snapshot_table *t; + + rcu_read_lock(); + t = rcu_dereference(c->snapshots); + + while (id && id < ancestor) + id = __snapshot_t(t, id)->parent; + rcu_read_unlock(); + + return id == ancestor; +} + +struct snapshot_t_free_rcu { + struct rcu_head rcu; + struct snapshot_table *t; +}; + +static void snapshot_t_free_rcu(struct rcu_head *rcu) +{ + struct snapshot_t_free_rcu *free_rcu = + container_of(rcu, struct snapshot_t_free_rcu, rcu); + + kvfree(free_rcu->t); + kfree(free_rcu); +} + +static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id) +{ + size_t idx = U32_MAX - id; + size_t new_size; + struct snapshot_table *new, *old; + + new_size = max(16UL, roundup_pow_of_two(idx + 1)); + + new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL); + if (!new) + return NULL; + + old = rcu_dereference_protected(c->snapshots, true); + if (old) + memcpy(new->s, + rcu_dereference_protected(c->snapshots, true)->s, + sizeof(new->s[0]) * c->snapshot_table_size); + + rcu_assign_pointer(c->snapshots, new); + c->snapshot_table_size = new_size; + if (old) { + struct snapshot_t_free_rcu *rcu = + kmalloc(sizeof(*rcu), GFP_KERNEL|__GFP_NOFAIL); + + rcu->t = old; + call_rcu(&rcu->rcu, snapshot_t_free_rcu); + } + + return &rcu_dereference_protected(c->snapshots, true)->s[idx]; +} + +static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id) +{ + size_t idx = U32_MAX - id; + + lockdep_assert_held(&c->snapshot_table_lock); + + if (likely(idx < c->snapshot_table_size)) + return &rcu_dereference_protected(c->snapshots, true)->s[idx]; + + return __snapshot_t_mut(c, id); +} + +void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) +{ + struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); + + prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u", + BCH_SNAPSHOT_SUBVOL(s.v), + BCH_SNAPSHOT_DELETED(s.v), + le32_to_cpu(s.v->parent), + le32_to_cpu(s.v->children[0]), + le32_to_cpu(s.v->children[1]), + le32_to_cpu(s.v->subvol), + le32_to_cpu(s.v->tree)); + + if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth)) + prt_printf(out, " depth %u skiplist %u %u %u", + le32_to_cpu(s.v->depth), + le32_to_cpu(s.v->skip[0]), + le32_to_cpu(s.v->skip[1]), + le32_to_cpu(s.v->skip[2])); +} + +int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k, + enum bkey_invalid_flags flags, + struct printbuf *err) +{ + struct bkey_s_c_snapshot s; + u32 i, id; + + if (bkey_gt(k.k->p, POS(0, U32_MAX)) || + bkey_lt(k.k->p, POS(0, 1))) { + prt_printf(err, "bad pos"); + return -BCH_ERR_invalid_bkey; + } + + s = bkey_s_c_to_snapshot(k); + + id = le32_to_cpu(s.v->parent); + if (id && id <= k.k->p.offset) { + prt_printf(err, "bad parent node (%u <= %llu)", + id, k.k->p.offset); + return -BCH_ERR_invalid_bkey; + } + + if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) { + prt_printf(err, "children not normalized"); + return -BCH_ERR_invalid_bkey; + } + + if (s.v->children[0] && + s.v->children[0] == s.v->children[1]) { + prt_printf(err, "duplicate child nodes"); + return -BCH_ERR_invalid_bkey; + } + + for (i = 0; i < 2; i++) { + id = le32_to_cpu(s.v->children[i]); + + if (id >= k.k->p.offset) { + prt_printf(err, "bad child node (%u >= %llu)", + id, k.k->p.offset); + return -BCH_ERR_invalid_bkey; + } + } + + if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { + if (le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || + le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2])) { + prt_printf(err, "skiplist not normalized"); + return -BCH_ERR_invalid_bkey; + } + + for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { + id = le32_to_cpu(s.v->skip[i]); + + if (!id != !s.v->parent || + (s.v->parent && + id <= k.k->p.offset)) { + prt_printf(err, "bad skiplist node %u)", id); + return -BCH_ERR_invalid_bkey; + } + } + } + + return 0; +} + +int bch2_mark_snapshot(struct btree_trans *trans, + enum btree_id btree, unsigned level, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) +{ + struct bch_fs *c = trans->c; + struct snapshot_t *t; + u32 id = new.k->p.offset; + int ret = 0; + + mutex_lock(&c->snapshot_table_lock); + + t = snapshot_t_mut(c, id); + if (!t) { + ret = -BCH_ERR_ENOMEM_mark_snapshot; + goto err; + } + + if (new.k->type == KEY_TYPE_snapshot) { + struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); + u32 parent = id; + + t->parent = le32_to_cpu(s.v->parent); + t->children[0] = le32_to_cpu(s.v->children[0]); + t->children[1] = le32_to_cpu(s.v->children[1]); + t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0; + t->tree = le32_to_cpu(s.v->tree); + + if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) { + t->depth = le32_to_cpu(s.v->depth); + t->skip[0] = le32_to_cpu(s.v->skip[0]); + t->skip[1] = le32_to_cpu(s.v->skip[1]); + t->skip[2] = le32_to_cpu(s.v->skip[2]); + } else { + t->depth = 0; + t->skip[0] = 0; + t->skip[1] = 0; + t->skip[2] = 0; + } + + while ((parent = bch2_snapshot_parent_early(c, parent)) && + parent - id - 1 < IS_ANCESTOR_BITMAP) + __set_bit(parent - id - 1, t->is_ancestor); + + if (BCH_SNAPSHOT_DELETED(s.v)) { + set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_delete_dead_snapshots); + } + } else { + memset(t, 0, sizeof(*t)); + } +err: + mutex_unlock(&c->snapshot_table_lock); + return ret; +} + +int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, + struct bch_snapshot *s) +{ + return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id), + BTREE_ITER_WITH_UPDATES, snapshot, s); +} + +int bch2_snapshot_live(struct btree_trans *trans, u32 id) +{ + struct bch_snapshot v; + int ret; + + if (!id) + return 0; + + ret = bch2_snapshot_lookup(trans, id, &v); + if (bch2_err_matches(ret, ENOENT)) + bch_err(trans->c, "snapshot node %u not found", id); + if (ret) + return ret; + + return !BCH_SNAPSHOT_DELETED(&v); +} + +/* + * If @k is a snapshot with just one live child, it's part of a linear chain, + * which we consider to be an equivalence class: and then after snapshot + * deletion cleanup, there should only be a single key at a given position in + * this equivalence class. + * + * This sets the equivalence class of @k to be the child's equivalence class, if + * it's part of such a linear chain: this correctly sets equivalence classes on + * startup if we run leaf to root (i.e. in natural key order). + */ +int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + unsigned i, nr_live = 0, live_idx = 0; + struct bkey_s_c_snapshot snap; + u32 id = k.k->p.offset, child[2]; + + if (k.k->type != KEY_TYPE_snapshot) + return 0; + + snap = bkey_s_c_to_snapshot(k); + + child[0] = le32_to_cpu(snap.v->children[0]); + child[1] = le32_to_cpu(snap.v->children[1]); + + for (i = 0; i < 2; i++) { + int ret = bch2_snapshot_live(trans, child[i]); + + if (ret < 0) + return ret; + + if (ret) + live_idx = i; + nr_live += ret; + } + + mutex_lock(&c->snapshot_table_lock); + + snapshot_t_mut(c, id)->equiv = nr_live == 1 + ? snapshot_t_mut(c, child[live_idx])->equiv + : id; + + mutex_unlock(&c->snapshot_table_lock); + + return 0; +} + +/* fsck: */ + +static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child) +{ + return snapshot_t(c, id)->children[child]; +} + +static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id) +{ + return bch2_snapshot_child(c, id, 0); +} + +static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id) +{ + return bch2_snapshot_child(c, id, 1); +} + +static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id) +{ + u32 n, parent; + + n = bch2_snapshot_left_child(c, id); + if (n) + return n; + + while ((parent = bch2_snapshot_parent(c, id))) { + n = bch2_snapshot_right_child(c, parent); + if (n && n != id) + return n; + id = parent; + } + + return 0; +} + +static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root) +{ + u32 id = snapshot_root; + u32 subvol = 0, s; + + while (id) { + s = snapshot_t(c, id)->subvol; + + if (s && (!subvol || s < subvol)) + subvol = s; + + id = bch2_snapshot_tree_next(c, id); + } + + return subvol; +} + +static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, + u32 snapshot_root, u32 *subvol_id) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_s_c_subvolume s; + bool found = false; + int ret; + + for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, + 0, k, ret) { + if (k.k->type != KEY_TYPE_subvolume) + continue; + + s = bkey_s_c_to_subvolume(k); + if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root)) + continue; + if (!BCH_SUBVOLUME_SNAP(s.v)) { + *subvol_id = s.k->p.offset; + found = true; + break; + } + } + + bch2_trans_iter_exit(trans, &iter); + + if (!ret && !found) { + struct bkey_i_subvolume *s; + + *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root); + + s = bch2_bkey_get_mut_typed(trans, &iter, + BTREE_ID_subvolumes, POS(0, *subvol_id), + 0, subvolume); + ret = PTR_ERR_OR_ZERO(s); + if (ret) + return ret; + + SET_BCH_SUBVOLUME_SNAP(&s->v, false); + } + + return ret; +} + +static int check_snapshot_tree(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + struct bkey_s_c_snapshot_tree st; + struct bch_snapshot s; + struct bch_subvolume subvol; + struct printbuf buf = PRINTBUF; + u32 root_id; + int ret; + + if (k.k->type != KEY_TYPE_snapshot_tree) + return 0; + + st = bkey_s_c_to_snapshot_tree(k); + root_id = le32_to_cpu(st.v->root_snapshot); + + ret = bch2_snapshot_lookup(trans, root_id, &s); + if (ret && !bch2_err_matches(ret, ENOENT)) + goto err; + + if (fsck_err_on(ret || + root_id != bch2_snapshot_root(c, root_id) || + st.k->p.offset != le32_to_cpu(s.tree), + c, + "snapshot tree points to missing/incorrect snapshot:\n %s", + (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { + ret = bch2_btree_delete_at(trans, iter, 0); + goto err; + } + + ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), + false, 0, &subvol); + if (ret && !bch2_err_matches(ret, ENOENT)) + goto err; + + if (fsck_err_on(ret, c, + "snapshot tree points to missing subvolume:\n %s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || + fsck_err_on(!bch2_snapshot_is_ancestor_early(c, + le32_to_cpu(subvol.snapshot), + root_id), c, + "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || + fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), c, + "snapshot tree points to snapshot subvolume:\n %s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { + struct bkey_i_snapshot_tree *u; + u32 subvol_id; + + ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id); + if (ret) + goto err; + + u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree); + ret = PTR_ERR_OR_ZERO(u); + if (ret) + goto err; + + u->v.master_subvol = cpu_to_le32(subvol_id); + st = snapshot_tree_i_to_s_c(u); + } +err: +fsck_err: + printbuf_exit(&buf); + return ret; +} + +/* + * For each snapshot_tree, make sure it points to the root of a snapshot tree + * and that snapshot entry points back to it, or delete it. + * + * And, make sure it points to a subvolume within that snapshot tree, or correct + * it to point to the oldest subvolume within that snapshot tree. + */ +int bch2_check_snapshot_trees(struct bch_fs *c) +{ + struct btree_iter iter; + struct bkey_s_c k; + int ret; + + ret = bch2_trans_run(c, + for_each_btree_key_commit(&trans, iter, + BTREE_ID_snapshot_trees, POS_MIN, + BTREE_ITER_PREFETCH, k, + NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + check_snapshot_tree(&trans, &iter, k))); + + if (ret) + bch_err(c, "error %i checking snapshot trees", ret); + return ret; +} + +/* + * Look up snapshot tree for @tree_id and find root, + * make sure @snap_id is a descendent: + */ +static int snapshot_tree_ptr_good(struct btree_trans *trans, + u32 snap_id, u32 tree_id) +{ + struct bch_snapshot_tree s_t; + int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); + + if (bch2_err_matches(ret, ENOENT)) + return 0; + if (ret) + return ret; + + return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot)); +} + +u32 bch2_snapshot_skiplist_get(struct bch_fs *c, u32 id) +{ + const struct snapshot_t *s; + + if (!id) + return 0; + + rcu_read_lock(); + s = snapshot_t(c, id); + if (s->parent) + id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)); + rcu_read_unlock(); + + return id; +} + +static int snapshot_skiplist_good(struct btree_trans *trans, struct bch_snapshot s) +{ + struct bch_snapshot a; + unsigned i; + int ret; + + for (i = 0; i < 3; i++) { + if (!s.parent != !s.skip[i]) + return false; + + if (!s.parent) + continue; + + ret = bch2_snapshot_lookup(trans, le32_to_cpu(s.skip[i]), &a); + if (bch2_err_matches(ret, ENOENT)) + return false; + if (ret) + return ret; + + if (a.tree != s.tree) + return false; + } + + return true; +} + +/* + * snapshot_tree pointer was incorrect: look up root snapshot node, make sure + * its snapshot_tree pointer is correct (allocate new one if necessary), then + * update this node's pointer to root node's pointer: + */ +static int snapshot_tree_ptr_repair(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k, + struct bch_snapshot *s) +{ + struct bch_fs *c = trans->c; + struct btree_iter root_iter; + struct bch_snapshot_tree s_t; + struct bkey_s_c_snapshot root; + struct bkey_i_snapshot *u; + u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id; + int ret; + + root = bch2_bkey_get_iter_typed(trans, &root_iter, + BTREE_ID_snapshots, POS(0, root_id), + BTREE_ITER_WITH_UPDATES, snapshot); + ret = bkey_err(root); + if (ret) + goto err; + + tree_id = le32_to_cpu(root.v->tree); + + ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); + if (ret && !bch2_err_matches(ret, ENOENT)) + return ret; + + if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) { + u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot); + ret = PTR_ERR_OR_ZERO(u) ?: + bch2_snapshot_tree_create(trans, root_id, + bch2_snapshot_tree_oldest_subvol(c, root_id), + &tree_id); + if (ret) + goto err; + + u->v.tree = cpu_to_le32(tree_id); + if (k.k->p.offset == root_id) + *s = u->v; + } + + if (k.k->p.offset != root_id) { + u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); + ret = PTR_ERR_OR_ZERO(u); + if (ret) + goto err; + + u->v.tree = cpu_to_le32(tree_id); + *s = u->v; + } +err: + bch2_trans_iter_exit(trans, &root_iter); + return ret; +} + +static int check_snapshot(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k) +{ + struct bch_fs *c = trans->c; + struct bch_snapshot s; + struct bch_subvolume subvol; + struct bch_snapshot v; + struct bkey_i_snapshot *u; + u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); + u32 real_depth; + struct printbuf buf = PRINTBUF; + bool should_have_subvol; + u32 i, id; + int ret = 0; + + if (k.k->type != KEY_TYPE_snapshot) + return 0; + + memset(&s, 0, sizeof(s)); + memcpy(&s, k.v, bkey_val_bytes(k.k)); + + id = le32_to_cpu(s.parent); + if (id) { + ret = bch2_snapshot_lookup(trans, id, &v); + if (bch2_err_matches(ret, ENOENT)) + bch_err(c, "snapshot with nonexistent parent:\n %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + if (ret) + goto err; + + if (le32_to_cpu(v.children[0]) != k.k->p.offset && + le32_to_cpu(v.children[1]) != k.k->p.offset) { + bch_err(c, "snapshot parent %u missing pointer to child %llu", + id, k.k->p.offset); + ret = -EINVAL; + goto err; + } + } + + for (i = 0; i < 2 && s.children[i]; i++) { + id = le32_to_cpu(s.children[i]); + + ret = bch2_snapshot_lookup(trans, id, &v); + if (bch2_err_matches(ret, ENOENT)) + bch_err(c, "snapshot node %llu has nonexistent child %u", + k.k->p.offset, id); + if (ret) + goto err; + + if (le32_to_cpu(v.parent) != k.k->p.offset) { + bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)", + id, le32_to_cpu(v.parent), k.k->p.offset); + ret = -EINVAL; + goto err; + } + } + + should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && + !BCH_SNAPSHOT_DELETED(&s); + + if (should_have_subvol) { + id = le32_to_cpu(s.subvol); + ret = bch2_subvolume_get(trans, id, 0, false, &subvol); + if (bch2_err_matches(ret, ENOENT)) + bch_err(c, "snapshot points to nonexistent subvolume:\n %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + if (ret) + goto err; + + if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) { + bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL", + k.k->p.offset); + ret = -EINVAL; + goto err; + } + } else { + if (fsck_err_on(s.subvol, c, "snapshot should not point to subvol:\n %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); + ret = PTR_ERR_OR_ZERO(u); + if (ret) + goto err; + + u->v.subvol = 0; + s = u->v; + } + } + + ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree)); + if (ret < 0) + goto err; + + if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = snapshot_tree_ptr_repair(trans, iter, k, &s); + if (ret) + goto err; + } + ret = 0; + + real_depth = bch2_snapshot_depth(c, parent_id); + + if (le32_to_cpu(s.depth) != real_depth && + (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || + fsck_err(c, "snapshot with incorrect depth field, should be %u:\n %s", + real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { + u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); + ret = PTR_ERR_OR_ZERO(u); + if (ret) + goto err; + + u->v.depth = cpu_to_le32(real_depth); + s = u->v; + } + + ret = snapshot_skiplist_good(trans, s); + if (ret < 0) + goto err; + + if (!ret && + (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || + fsck_err(c, "snapshot with bad skiplist field:\n %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { + u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); + ret = PTR_ERR_OR_ZERO(u); + if (ret) + goto err; + + for (i = 0; i < ARRAY_SIZE(u->v.skip); i++) + u->v.skip[i] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent_id)); + + bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32); + s = u->v; + } + ret = 0; +err: +fsck_err: + printbuf_exit(&buf); + return ret; +} + +int bch2_check_snapshots(struct bch_fs *c) +{ + struct btree_iter iter; + struct bkey_s_c k; + int ret; + + /* + * We iterate backwards as checking/fixing the depth field requires that + * the parent's depth already be correct: + */ + ret = bch2_trans_run(c, + for_each_btree_key_reverse_commit(&trans, iter, + BTREE_ID_snapshots, POS_MAX, + BTREE_ITER_PREFETCH, k, + NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + check_snapshot(&trans, &iter, k))); + if (ret) + bch_err_fn(c, ret); + return ret; +} + +/* + * Mark a snapshot as deleted, for future cleanup: + */ +int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) +{ + struct btree_iter iter; + struct bkey_i_snapshot *s; + int ret = 0; + + s = bch2_bkey_get_mut_typed(trans, &iter, + BTREE_ID_snapshots, POS(0, id), + 0, snapshot); + ret = PTR_ERR_OR_ZERO(s); + if (unlikely(ret)) { + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), + trans->c, "missing snapshot %u", id); + return ret; + } + + /* already deleted? */ + if (BCH_SNAPSHOT_DELETED(&s->v)) + goto err; + + SET_BCH_SNAPSHOT_DELETED(&s->v, true); + SET_BCH_SNAPSHOT_SUBVOL(&s->v, false); + s->v.subvol = 0; +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter, p_iter = (struct btree_iter) { NULL }; + struct btree_iter tree_iter = (struct btree_iter) { NULL }; + struct bkey_s_c_snapshot s; + u32 parent_id; + unsigned i; + int ret = 0; + + s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id), + BTREE_ITER_INTENT, snapshot); + ret = bkey_err(s); + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, + "missing snapshot %u", id); + + if (ret) + goto err; + + BUG_ON(!BCH_SNAPSHOT_DELETED(s.v)); + parent_id = le32_to_cpu(s.v->parent); + + if (parent_id) { + struct bkey_i_snapshot *parent; + + parent = bch2_bkey_get_mut_typed(trans, &p_iter, + BTREE_ID_snapshots, POS(0, parent_id), + 0, snapshot); + ret = PTR_ERR_OR_ZERO(parent); + if (unlikely(ret)) { + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, + "missing snapshot %u", parent_id); + goto err; + } + + for (i = 0; i < 2; i++) + if (le32_to_cpu(parent->v.children[i]) == id) + break; + + if (i == 2) + bch_err(c, "snapshot %u missing child pointer to %u", + parent_id, id); + else + parent->v.children[i] = 0; + + if (le32_to_cpu(parent->v.children[0]) < + le32_to_cpu(parent->v.children[1])) + swap(parent->v.children[0], + parent->v.children[1]); + } else { + /* + * We're deleting the root of a snapshot tree: update the + * snapshot_tree entry to point to the new root, or delete it if + * this is the last snapshot ID in this tree: + */ + struct bkey_i_snapshot_tree *s_t; + + BUG_ON(s.v->children[1]); + + s_t = bch2_bkey_get_mut_typed(trans, &tree_iter, + BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)), + 0, snapshot_tree); + ret = PTR_ERR_OR_ZERO(s_t); + if (ret) + goto err; + + if (s.v->children[0]) { + s_t->v.root_snapshot = s.v->children[0]; + } else { + s_t->k.type = KEY_TYPE_deleted; + set_bkey_val_u64s(&s_t->k, 0); + } + } + + ret = bch2_btree_delete_at(trans, &iter, 0); +err: + bch2_trans_iter_exit(trans, &tree_iter); + bch2_trans_iter_exit(trans, &p_iter); + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, + u32 *new_snapids, + u32 *snapshot_subvols, + unsigned nr_snapids) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_i_snapshot *n; + struct bkey_s_c k; + unsigned i, j; + u32 depth = bch2_snapshot_depth(c, parent); + int ret; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, + POS_MIN, BTREE_ITER_INTENT); + k = bch2_btree_iter_peek(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + for (i = 0; i < nr_snapids; i++) { + k = bch2_btree_iter_prev_slot(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (!k.k || !k.k->p.offset) { + ret = -BCH_ERR_ENOSPC_snapshot_create; + goto err; + } + + n = bch2_bkey_alloc(trans, &iter, 0, snapshot); + ret = PTR_ERR_OR_ZERO(n); + if (ret) + goto err; + + n->v.flags = 0; + n->v.parent = cpu_to_le32(parent); + n->v.subvol = cpu_to_le32(snapshot_subvols[i]); + n->v.tree = cpu_to_le32(tree); + n->v.depth = cpu_to_le32(depth); + + for (j = 0; j < ARRAY_SIZE(n->v.skip); j++) + n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent)); + + bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32); + SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); + + ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, + bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); + if (ret) + goto err; + + new_snapids[i] = iter.pos.offset; + } +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +/* + * Create new snapshot IDs as children of an existing snapshot ID: + */ +static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent, + u32 *new_snapids, + u32 *snapshot_subvols, + unsigned nr_snapids) +{ + struct btree_iter iter; + struct bkey_i_snapshot *n_parent; + int ret = 0; + + n_parent = bch2_bkey_get_mut_typed(trans, &iter, + BTREE_ID_snapshots, POS(0, parent), + 0, snapshot); + ret = PTR_ERR_OR_ZERO(n_parent); + if (unlikely(ret)) { + if (bch2_err_matches(ret, ENOENT)) + bch_err(trans->c, "snapshot %u not found", parent); + return ret; + } + + if (n_parent->v.children[0] || n_parent->v.children[1]) { + bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children"); + ret = -EINVAL; + goto err; + } + + ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree), + new_snapids, snapshot_subvols, nr_snapids); + if (ret) + goto err; + + n_parent->v.children[0] = cpu_to_le32(new_snapids[0]); + n_parent->v.children[1] = cpu_to_le32(new_snapids[1]); + n_parent->v.subvol = 0; + SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +/* + * Create a snapshot node that is the root of a new tree: + */ +static int bch2_snapshot_node_create_tree(struct btree_trans *trans, + u32 *new_snapids, + u32 *snapshot_subvols, + unsigned nr_snapids) +{ + struct bkey_i_snapshot_tree *n_tree; + int ret; + + n_tree = __bch2_snapshot_tree_create(trans); + ret = PTR_ERR_OR_ZERO(n_tree) ?: + create_snapids(trans, 0, n_tree->k.p.offset, + new_snapids, snapshot_subvols, nr_snapids); + if (ret) + return ret; + + n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]); + n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]); + return 0; +} + +int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, + u32 *new_snapids, + u32 *snapshot_subvols, + unsigned nr_snapids) +{ + BUG_ON((parent == 0) != (nr_snapids == 1)); + BUG_ON((parent != 0) != (nr_snapids == 2)); + + return parent + ? bch2_snapshot_node_create_children(trans, parent, + new_snapids, snapshot_subvols, nr_snapids) + : bch2_snapshot_node_create_tree(trans, + new_snapids, snapshot_subvols, nr_snapids); + +} + +/* + * If we have an unlinked inode in an internal snapshot node, and the inode + * really has been deleted in all child snapshots, how does this get cleaned up? + * + * first there is the problem of how keys that have been overwritten in all + * child snapshots get deleted (unimplemented?), but inodes may perhaps be + * special? + * + * also: unlinked inode in internal snapshot appears to not be getting deleted + * correctly if inode doesn't exist in leaf snapshots + */ + +static int snapshot_delete_key(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k, + snapshot_id_list *deleted, + snapshot_id_list *equiv_seen, + struct bpos *last_pos) +{ + struct bch_fs *c = trans->c; + u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); + + if (!bkey_eq(k.k->p, *last_pos)) + equiv_seen->nr = 0; + *last_pos = k.k->p; + + if (snapshot_list_has_id(deleted, k.k->p.snapshot) || + snapshot_list_has_id(equiv_seen, equiv)) { + return bch2_btree_delete_at(trans, iter, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + } else { + return snapshot_list_add(c, equiv_seen, equiv); + } +} + +/* + * For a given snapshot, if it doesn't have a subvolume that points to it, and + * it doesn't have child snapshot nodes - it's now redundant and we can mark it + * as deleted. + */ +static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k) +{ + struct bkey_s_c_snapshot snap; + u32 children[2]; + int ret; + + if (k.k->type != KEY_TYPE_snapshot) + return 0; + + snap = bkey_s_c_to_snapshot(k); + if (BCH_SNAPSHOT_DELETED(snap.v) || + BCH_SNAPSHOT_SUBVOL(snap.v)) + return 0; + + children[0] = le32_to_cpu(snap.v->children[0]); + children[1] = le32_to_cpu(snap.v->children[1]); + + ret = bch2_snapshot_live(trans, children[0]) ?: + bch2_snapshot_live(trans, children[1]); + if (ret < 0) + return ret; + + if (!ret) + return bch2_snapshot_node_set_deleted(trans, k.k->p.offset); + return 0; +} + +int bch2_delete_dead_snapshots(struct bch_fs *c) +{ + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_s_c_snapshot snap; + snapshot_id_list deleted = { 0 }; + u32 i, id; + int ret = 0; + + if (!test_bit(BCH_FS_STARTED, &c->flags)) { + ret = bch2_fs_read_write_early(c); + if (ret) { + bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret)); + return ret; + } + } + + bch2_trans_init(&trans, c, 0, 0); + + /* + * For every snapshot node: If we have no live children and it's not + * pointed to by a subvolume, delete it: + */ + ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots, + POS_MIN, 0, k, + NULL, NULL, 0, + bch2_delete_redundant_snapshot(&trans, &iter, k)); + if (ret) { + bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret)); + goto err; + } + + for_each_btree_key2(&trans, iter, BTREE_ID_snapshots, + POS_MIN, 0, k, + bch2_snapshot_set_equiv(&trans, k)); + if (ret) { + bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret)); + goto err; + } + + for_each_btree_key(&trans, iter, BTREE_ID_snapshots, + POS_MIN, 0, k, ret) { + if (k.k->type != KEY_TYPE_snapshot) + continue; + + snap = bkey_s_c_to_snapshot(k); + if (BCH_SNAPSHOT_DELETED(snap.v)) { + ret = snapshot_list_add(c, &deleted, k.k->p.offset); + if (ret) + break; + } + } + bch2_trans_iter_exit(&trans, &iter); + + if (ret) { + bch_err_msg(c, ret, "walking snapshots"); + goto err; + } + + for (id = 0; id < BTREE_ID_NR; id++) { + struct bpos last_pos = POS_MIN; + snapshot_id_list equiv_seen = { 0 }; + + if (!btree_type_has_snapshots(id)) + continue; + + ret = for_each_btree_key_commit(&trans, iter, + id, POS_MIN, + BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, + NULL, NULL, BTREE_INSERT_NOFAIL, + snapshot_delete_key(&trans, &iter, k, &deleted, &equiv_seen, &last_pos)); + + darray_exit(&equiv_seen); + + if (ret) { + bch_err_msg(c, ret, "deleting keys from dying snapshots"); + goto err; + } + } + + for (i = 0; i < deleted.nr; i++) { + u32 node_to_delete = deleted.data[i]; + + ret = commit_do(&trans, NULL, NULL, 0, + bch2_snapshot_node_delete(&trans, node_to_delete)); + if (ret) { + bch_err_msg(c, ret, "deleting snapshot %u", node_to_delete); + goto err; + } + } + + clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); +err: + darray_exit(&deleted); + bch2_trans_exit(&trans); + if (ret) + bch_err_fn(c, ret); + return ret; +} + +void bch2_delete_dead_snapshots_work(struct work_struct *work) +{ + struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); + + if (test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags)) + bch2_delete_dead_snapshots(c); + bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); +} + +void bch2_delete_dead_snapshots_async(struct bch_fs *c) +{ + if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) && + !queue_work(c->write_ref_wq, &c->snapshot_delete_work)) + bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); +} + +int bch2_delete_dead_snapshots_hook(struct btree_trans *trans, + struct btree_trans_commit_hook *h) +{ + struct bch_fs *c = trans->c; + + set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); + + if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_delete_dead_snapshots) + return 0; + + bch2_delete_dead_snapshots_async(c); + return 0; +} + +int bch2_snapshots_read(struct bch_fs *c) +{ + struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; + + ret = bch2_trans_run(c, + for_each_btree_key2(&trans, iter, BTREE_ID_snapshots, + POS_MIN, 0, k, + bch2_mark_snapshot(&trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: + bch2_snapshot_set_equiv(&trans, k))); + if (ret) + bch_err_fn(c, ret); + return ret; +} + +void bch2_fs_snapshots_exit(struct bch_fs *c) +{ + kfree(rcu_dereference_protected(c->snapshots, true)); +} diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h new file mode 100644 index 000000000000..826bff2ff7be --- /dev/null +++ b/fs/bcachefs/snapshot.h @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SNAPSHOT_H +#define _BCACHEFS_SNAPSHOT_H + +enum bkey_invalid_flags; + +void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +int bch2_snapshot_tree_invalid(const struct bch_fs *, struct bkey_s_c, + enum bkey_invalid_flags, struct printbuf *); + +#define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ + .key_invalid = bch2_snapshot_tree_invalid, \ + .val_to_text = bch2_snapshot_tree_to_text, \ + .min_val_size = 8, \ +}) + +struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *); + +int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); + +void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +int bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c, + enum bkey_invalid_flags, struct printbuf *); +int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s_c, unsigned); + +#define bch2_bkey_ops_snapshot ((struct bkey_ops) { \ + .key_invalid = bch2_snapshot_invalid, \ + .val_to_text = bch2_snapshot_to_text, \ + .atomic_trigger = bch2_mark_snapshot, \ + .min_val_size = 24, \ +}) + +static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id) +{ + return &t->s[U32_MAX - id]; +} + +static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) +{ + return __snapshot_t(rcu_dereference(c->snapshots), id); +} + +static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id) +{ + rcu_read_lock(); + id = snapshot_t(c, id)->tree; + rcu_read_unlock(); + + return id; +} + +static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id) +{ + return snapshot_t(c, id)->parent; +} + +static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) +{ + rcu_read_lock(); + id = __bch2_snapshot_parent_early(c, id); + rcu_read_unlock(); + + return id; +} + +static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id) +{ +#ifdef CONFIG_BCACHEFS_DEBUG + u32 parent = snapshot_t(c, id)->parent; + + if (parent && + snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1) + panic("id %u depth=%u parent %u depth=%u\n", + id, snapshot_t(c, id)->depth, + parent, snapshot_t(c, parent)->depth); + + return parent; +#else + return snapshot_t(c, id)->parent; +#endif +} + +static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) +{ + rcu_read_lock(); + id = __bch2_snapshot_parent(c, id); + rcu_read_unlock(); + + return id; +} + +static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n) +{ + rcu_read_lock(); + while (n--) + id = __bch2_snapshot_parent(c, id); + rcu_read_unlock(); + + return id; +} + +u32 bch2_snapshot_skiplist_get(struct bch_fs *, u32); + +static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id) +{ + u32 parent; + + rcu_read_lock(); + while ((parent = __bch2_snapshot_parent(c, id))) + id = parent; + rcu_read_unlock(); + + return id; +} + +static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id) +{ + return snapshot_t(c, id)->equiv; +} + +static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) +{ + rcu_read_lock(); + id = __bch2_snapshot_equiv(c, id); + rcu_read_unlock(); + + return id; +} + +static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id) +{ + return id == bch2_snapshot_equiv(c, id); +} + +static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id) +{ + const struct snapshot_t *s; + bool ret; + + rcu_read_lock(); + s = snapshot_t(c, id); + ret = s->children[0]; + rcu_read_unlock(); + + return ret; +} + +static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id) +{ + return !bch2_snapshot_is_internal_node(c, id); +} + +static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id) +{ + const struct snapshot_t *s; + u32 parent = __bch2_snapshot_parent(c, id); + + if (!parent) + return 0; + + s = snapshot_t(c, __bch2_snapshot_parent(c, id)); + if (id == s->children[0]) + return s->children[1]; + if (id == s->children[1]) + return s->children[0]; + return 0; +} + +static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent) +{ + u32 depth; + + rcu_read_lock(); + depth = parent ? snapshot_t(c, parent)->depth + 1 : 0; + rcu_read_unlock(); + + return depth; +} + +bool __bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32); + +static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) +{ + return id == ancestor + ? true + : __bch2_snapshot_is_ancestor(c, id, ancestor); +} + +static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) +{ + const struct snapshot_t *t; + bool ret; + + rcu_read_lock(); + t = snapshot_t(c, id); + ret = (t->children[0]|t->children[1]) != 0; + rcu_read_unlock(); + + return ret; +} + +static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id) +{ + u32 *i; + + darray_for_each(*s, i) + if (*i == id) + return true; + return false; +} + +static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list *s, u32 id) +{ + u32 *i; + + darray_for_each(*s, i) + if (bch2_snapshot_is_ancestor(c, id, *i)) + return true; + return false; +} + +static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id) +{ + int ret; + + BUG_ON(snapshot_list_has_id(s, id)); + ret = darray_push(s, id); + if (ret) + bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size); + return ret; +} + +int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, + struct bch_snapshot *s); +int bch2_snapshot_get_subvol(struct btree_trans *, u32, + struct bch_subvolume *); +int bch2_snapshot_live(struct btree_trans *trans, u32 id); +int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k); + +/* only exported for tests: */ +int bch2_snapshot_node_create(struct btree_trans *, u32, + u32 *, u32 *, unsigned); + +int bch2_check_snapshot_trees(struct bch_fs *); +int bch2_check_snapshots(struct bch_fs *); + +int bch2_snapshot_node_set_deleted(struct btree_trans *, u32); +int bch2_delete_dead_snapshots_hook(struct btree_trans *, + struct btree_trans_commit_hook *); +void bch2_delete_dead_snapshots_work(struct work_struct *); + +int bch2_snapshots_read(struct bch_fs *); +void bch2_fs_snapshots_exit(struct bch_fs *); + +#endif /* _BCACHEFS_SNAPSHOT_H */ diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 7de6fe0cdd43..0214a98deb4f 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -6,885 +6,13 @@ #include "errcode.h" #include "error.h" #include "fs.h" +#include "snapshot.h" #include "subvolume.h" #include static int bch2_subvolume_delete(struct btree_trans *, u32); -static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor) -{ - const struct snapshot_t *s = __snapshot_t(t, id); - - if (s->skip[2] <= ancestor) - return s->skip[2]; - if (s->skip[1] <= ancestor) - return s->skip[1]; - if (s->skip[0] <= ancestor) - return s->skip[0]; - return s->parent; -} - -bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) -{ - struct snapshot_table *t; - bool ret; - - EBUG_ON(c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_snapshots); - - rcu_read_lock(); - t = rcu_dereference(c->snapshots); - - while (id && id < ancestor - IS_ANCESTOR_BITMAP) - id = get_ancestor_below(t, id, ancestor); - - ret = id && id < ancestor - ? test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor) - : id == ancestor; - rcu_read_unlock(); - - return ret; -} - -static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) -{ - struct snapshot_table *t; - - rcu_read_lock(); - t = rcu_dereference(c->snapshots); - - while (id && id < ancestor) - id = __snapshot_t(t, id)->parent; - rcu_read_unlock(); - - return id == ancestor; -} - -static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent) -{ - u32 depth; - - rcu_read_lock(); - depth = parent ? snapshot_t(c, parent)->depth + 1 : 0; - rcu_read_unlock(); - - return depth; -} - -struct snapshot_t_free_rcu { - struct rcu_head rcu; - struct snapshot_table *t; -}; - -static void snapshot_t_free_rcu(struct rcu_head *rcu) -{ - struct snapshot_t_free_rcu *free_rcu = - container_of(rcu, struct snapshot_t_free_rcu, rcu); - - kvfree(free_rcu->t); - kfree(free_rcu); -} - -static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id) -{ - size_t idx = U32_MAX - id; - size_t new_size; - struct snapshot_table *new, *old; - - new_size = max(16UL, roundup_pow_of_two(idx + 1)); - - new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL); - if (!new) - return NULL; - - old = rcu_dereference_protected(c->snapshots, true); - if (old) - memcpy(new->s, - rcu_dereference_protected(c->snapshots, true)->s, - sizeof(new->s[0]) * c->snapshot_table_size); - - rcu_assign_pointer(c->snapshots, new); - c->snapshot_table_size = new_size; - if (old) { - struct snapshot_t_free_rcu *rcu = - kmalloc(sizeof(*rcu), GFP_KERNEL|__GFP_NOFAIL); - - rcu->t = old; - call_rcu(&rcu->rcu, snapshot_t_free_rcu); - } - - return &rcu_dereference_protected(c->snapshots, true)->s[idx]; -} - -static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id) -{ - size_t idx = U32_MAX - id; - - lockdep_assert_held(&c->snapshot_table_lock); - - if (likely(idx < c->snapshot_table_size)) - return &rcu_dereference_protected(c->snapshots, true)->s[idx]; - - return __snapshot_t_mut(c, id); -} - -/* Snapshot tree: */ - -void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c k) -{ - struct bkey_s_c_snapshot_tree t = bkey_s_c_to_snapshot_tree(k); - - prt_printf(out, "subvol %u root snapshot %u", - le32_to_cpu(t.v->master_subvol), - le32_to_cpu(t.v->root_snapshot)); -} - -int bch2_snapshot_tree_invalid(const struct bch_fs *c, struct bkey_s_c k, - enum bkey_invalid_flags flags, - struct printbuf *err) -{ - if (bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1))) { - prt_printf(err, "bad pos"); - return -BCH_ERR_invalid_bkey; - } - - return 0; -} - -int bch2_snapshot_tree_lookup(struct btree_trans *trans, u32 id, - struct bch_snapshot_tree *s) -{ - int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_snapshot_trees, POS(0, id), - BTREE_ITER_WITH_UPDATES, snapshot_tree, s); - - if (bch2_err_matches(ret, ENOENT)) - ret = -BCH_ERR_ENOENT_snapshot_tree; - return ret; -} - -static struct bkey_i_snapshot_tree * -__snapshot_tree_create(struct btree_trans *trans) -{ - struct btree_iter iter; - int ret = bch2_bkey_get_empty_slot(trans, &iter, - BTREE_ID_snapshot_trees, POS(0, U32_MAX)); - struct bkey_i_snapshot_tree *s_t; - - if (ret == -BCH_ERR_ENOSPC_btree_slot) - ret = -BCH_ERR_ENOSPC_snapshot_tree; - if (ret) - return ERR_PTR(ret); - - s_t = bch2_bkey_alloc(trans, &iter, 0, snapshot_tree); - ret = PTR_ERR_OR_ZERO(s_t); - bch2_trans_iter_exit(trans, &iter); - return ret ? ERR_PTR(ret) : s_t; -} - -static int snapshot_tree_create(struct btree_trans *trans, - u32 root_id, u32 subvol_id, u32 *tree_id) -{ - struct bkey_i_snapshot_tree *n_tree = - __snapshot_tree_create(trans); - - if (IS_ERR(n_tree)) - return PTR_ERR(n_tree); - - n_tree->v.master_subvol = cpu_to_le32(subvol_id); - n_tree->v.root_snapshot = cpu_to_le32(root_id); - *tree_id = n_tree->k.p.offset; - return 0; -} - -/* Snapshot nodes: */ - -void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c k) -{ - struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); - - prt_printf(out, "is_subvol %llu deleted %llu parent %10u children %10u %10u subvol %u tree %u", - BCH_SNAPSHOT_SUBVOL(s.v), - BCH_SNAPSHOT_DELETED(s.v), - le32_to_cpu(s.v->parent), - le32_to_cpu(s.v->children[0]), - le32_to_cpu(s.v->children[1]), - le32_to_cpu(s.v->subvol), - le32_to_cpu(s.v->tree)); - - if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, depth)) - prt_printf(out, " depth %u skiplist %u %u %u", - le32_to_cpu(s.v->depth), - le32_to_cpu(s.v->skip[0]), - le32_to_cpu(s.v->skip[1]), - le32_to_cpu(s.v->skip[2])); -} - -int bch2_snapshot_invalid(const struct bch_fs *c, struct bkey_s_c k, - enum bkey_invalid_flags flags, - struct printbuf *err) -{ - struct bkey_s_c_snapshot s; - u32 i, id; - - if (bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1))) { - prt_printf(err, "bad pos"); - return -BCH_ERR_invalid_bkey; - } - - s = bkey_s_c_to_snapshot(k); - - id = le32_to_cpu(s.v->parent); - if (id && id <= k.k->p.offset) { - prt_printf(err, "bad parent node (%u <= %llu)", - id, k.k->p.offset); - return -BCH_ERR_invalid_bkey; - } - - if (le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1])) { - prt_printf(err, "children not normalized"); - return -BCH_ERR_invalid_bkey; - } - - if (s.v->children[0] && - s.v->children[0] == s.v->children[1]) { - prt_printf(err, "duplicate child nodes"); - return -BCH_ERR_invalid_bkey; - } - - for (i = 0; i < 2; i++) { - id = le32_to_cpu(s.v->children[i]); - - if (id >= k.k->p.offset) { - prt_printf(err, "bad child node (%u >= %llu)", - id, k.k->p.offset); - return -BCH_ERR_invalid_bkey; - } - } - - if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { - if (le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || - le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2])) { - prt_printf(err, "skiplist not normalized"); - return -BCH_ERR_invalid_bkey; - } - - for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { - id = le32_to_cpu(s.v->skip[i]); - - if (!id != !s.v->parent || - (s.v->parent && - id <= k.k->p.offset)) { - prt_printf(err, "bad skiplist node %u)", id); - return -BCH_ERR_invalid_bkey; - } - } - } - - return 0; -} - -int bch2_mark_snapshot(struct btree_trans *trans, - enum btree_id btree, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) -{ - struct bch_fs *c = trans->c; - struct snapshot_t *t; - u32 id = new.k->p.offset; - int ret = 0; - - mutex_lock(&c->snapshot_table_lock); - - t = snapshot_t_mut(c, id); - if (!t) { - ret = -BCH_ERR_ENOMEM_mark_snapshot; - goto err; - } - - if (new.k->type == KEY_TYPE_snapshot) { - struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); - u32 parent = id; - - t->parent = le32_to_cpu(s.v->parent); - t->children[0] = le32_to_cpu(s.v->children[0]); - t->children[1] = le32_to_cpu(s.v->children[1]); - t->subvol = BCH_SNAPSHOT_SUBVOL(s.v) ? le32_to_cpu(s.v->subvol) : 0; - t->tree = le32_to_cpu(s.v->tree); - - if (bkey_val_bytes(s.k) > offsetof(struct bch_snapshot, depth)) { - t->depth = le32_to_cpu(s.v->depth); - t->skip[0] = le32_to_cpu(s.v->skip[0]); - t->skip[1] = le32_to_cpu(s.v->skip[1]); - t->skip[2] = le32_to_cpu(s.v->skip[2]); - } else { - t->depth = 0; - t->skip[0] = 0; - t->skip[1] = 0; - t->skip[2] = 0; - } - - while ((parent = bch2_snapshot_parent_early(c, parent)) && - parent - id - 1 < IS_ANCESTOR_BITMAP) - __set_bit(parent - id - 1, t->is_ancestor); - - if (BCH_SNAPSHOT_DELETED(s.v)) { - set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); - c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_delete_dead_snapshots); - } - } else { - memset(t, 0, sizeof(*t)); - } -err: - mutex_unlock(&c->snapshot_table_lock); - return ret; -} - -static int snapshot_lookup(struct btree_trans *trans, u32 id, - struct bch_snapshot *s) -{ - return bch2_bkey_get_val_typed(trans, BTREE_ID_snapshots, POS(0, id), - BTREE_ITER_WITH_UPDATES, snapshot, s); -} - -static int snapshot_live(struct btree_trans *trans, u32 id) -{ - struct bch_snapshot v; - int ret; - - if (!id) - return 0; - - ret = snapshot_lookup(trans, id, &v); - if (bch2_err_matches(ret, ENOENT)) - bch_err(trans->c, "snapshot node %u not found", id); - if (ret) - return ret; - - return !BCH_SNAPSHOT_DELETED(&v); -} - -static int bch2_snapshot_set_equiv(struct btree_trans *trans, struct bkey_s_c k) -{ - struct bch_fs *c = trans->c; - unsigned i, nr_live = 0, live_idx = 0; - struct bkey_s_c_snapshot snap; - u32 id = k.k->p.offset, child[2]; - - if (k.k->type != KEY_TYPE_snapshot) - return 0; - - snap = bkey_s_c_to_snapshot(k); - - child[0] = le32_to_cpu(snap.v->children[0]); - child[1] = le32_to_cpu(snap.v->children[1]); - - for (i = 0; i < 2; i++) { - int ret = snapshot_live(trans, child[i]); - - if (ret < 0) - return ret; - - if (ret) - live_idx = i; - nr_live += ret; - } - - mutex_lock(&c->snapshot_table_lock); - - snapshot_t_mut(c, id)->equiv = nr_live == 1 - ? snapshot_t_mut(c, child[live_idx])->equiv - : id; - - mutex_unlock(&c->snapshot_table_lock); - - return 0; -} - -/* fsck: */ - -static u32 bch2_snapshot_child(struct bch_fs *c, u32 id, unsigned child) -{ - return snapshot_t(c, id)->children[child]; -} - -static u32 bch2_snapshot_left_child(struct bch_fs *c, u32 id) -{ - return bch2_snapshot_child(c, id, 0); -} - -static u32 bch2_snapshot_right_child(struct bch_fs *c, u32 id) -{ - return bch2_snapshot_child(c, id, 1); -} - -static u32 bch2_snapshot_tree_next(struct bch_fs *c, u32 id) -{ - u32 n, parent; - - n = bch2_snapshot_left_child(c, id); - if (n) - return n; - - while ((parent = bch2_snapshot_parent(c, id))) { - n = bch2_snapshot_right_child(c, parent); - if (n && n != id) - return n; - id = parent; - } - - return 0; -} - -static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root) -{ - u32 id = snapshot_root; - u32 subvol = 0, s; - - while (id) { - s = snapshot_t(c, id)->subvol; - - if (s && (!subvol || s < subvol)) - subvol = s; - - id = bch2_snapshot_tree_next(c, id); - } - - return subvol; -} - -static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, - u32 snapshot_root, u32 *subvol_id) -{ - struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_s_c_subvolume s; - bool found = false; - int ret; - - for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, - 0, k, ret) { - if (k.k->type != KEY_TYPE_subvolume) - continue; - - s = bkey_s_c_to_subvolume(k); - if (!bch2_snapshot_is_ancestor(c, le32_to_cpu(s.v->snapshot), snapshot_root)) - continue; - if (!BCH_SUBVOLUME_SNAP(s.v)) { - *subvol_id = s.k->p.offset; - found = true; - break; - } - } - - bch2_trans_iter_exit(trans, &iter); - - if (!ret && !found) { - struct bkey_i_subvolume *s; - - *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root); - - s = bch2_bkey_get_mut_typed(trans, &iter, - BTREE_ID_subvolumes, POS(0, *subvol_id), - 0, subvolume); - ret = PTR_ERR_OR_ZERO(s); - if (ret) - return ret; - - SET_BCH_SUBVOLUME_SNAP(&s->v, false); - } - - return ret; -} - -static int check_snapshot_tree(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k) -{ - struct bch_fs *c = trans->c; - struct bkey_s_c_snapshot_tree st; - struct bch_snapshot s; - struct bch_subvolume subvol; - struct printbuf buf = PRINTBUF; - u32 root_id; - int ret; - - if (k.k->type != KEY_TYPE_snapshot_tree) - return 0; - - st = bkey_s_c_to_snapshot_tree(k); - root_id = le32_to_cpu(st.v->root_snapshot); - - ret = snapshot_lookup(trans, root_id, &s); - if (ret && !bch2_err_matches(ret, ENOENT)) - goto err; - - if (fsck_err_on(ret || - root_id != bch2_snapshot_root(c, root_id) || - st.k->p.offset != le32_to_cpu(s.tree), - c, - "snapshot tree points to missing/incorrect snapshot:\n %s", - (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { - ret = bch2_btree_delete_at(trans, iter, 0); - goto err; - } - - ret = bch2_subvolume_get(trans, le32_to_cpu(st.v->master_subvol), - false, 0, &subvol); - if (ret && !bch2_err_matches(ret, ENOENT)) - goto err; - - if (fsck_err_on(ret, c, - "snapshot tree points to missing subvolume:\n %s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || - fsck_err_on(!bch2_snapshot_is_ancestor_early(c, - le32_to_cpu(subvol.snapshot), - root_id), c, - "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || - fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol), c, - "snapshot tree points to snapshot subvolume:\n %s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { - struct bkey_i_snapshot_tree *u; - u32 subvol_id; - - ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id); - if (ret) - goto err; - - u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot_tree); - ret = PTR_ERR_OR_ZERO(u); - if (ret) - goto err; - - u->v.master_subvol = cpu_to_le32(subvol_id); - st = snapshot_tree_i_to_s_c(u); - } -err: -fsck_err: - printbuf_exit(&buf); - return ret; -} - -/* - * For each snapshot_tree, make sure it points to the root of a snapshot tree - * and that snapshot entry points back to it, or delete it. - * - * And, make sure it points to a subvolume within that snapshot tree, or correct - * it to point to the oldest subvolume within that snapshot tree. - */ -int bch2_check_snapshot_trees(struct bch_fs *c) -{ - struct btree_iter iter; - struct bkey_s_c k; - int ret; - - ret = bch2_trans_run(c, - for_each_btree_key_commit(&trans, iter, - BTREE_ID_snapshot_trees, POS_MIN, - BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, - check_snapshot_tree(&trans, &iter, k))); - - if (ret) - bch_err(c, "error %i checking snapshot trees", ret); - return ret; -} - -/* - * Look up snapshot tree for @tree_id and find root, - * make sure @snap_id is a descendent: - */ -static int snapshot_tree_ptr_good(struct btree_trans *trans, - u32 snap_id, u32 tree_id) -{ - struct bch_snapshot_tree s_t; - int ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); - - if (bch2_err_matches(ret, ENOENT)) - return 0; - if (ret) - return ret; - - return bch2_snapshot_is_ancestor_early(trans->c, snap_id, le32_to_cpu(s_t.root_snapshot)); -} - -static u32 snapshot_skiplist_get(struct bch_fs *c, u32 id) -{ - const struct snapshot_t *s; - - if (!id) - return 0; - - rcu_read_lock(); - s = snapshot_t(c, id); - if (s->parent) - id = bch2_snapshot_nth_parent(c, id, get_random_u32_below(s->depth)); - rcu_read_unlock(); - - return id; -} - -static int snapshot_skiplist_good(struct btree_trans *trans, struct bch_snapshot s) -{ - struct bch_snapshot a; - unsigned i; - int ret; - - for (i = 0; i < 3; i++) { - if (!s.parent != !s.skip[i]) - return false; - - if (!s.parent) - continue; - - ret = snapshot_lookup(trans, le32_to_cpu(s.skip[i]), &a); - if (bch2_err_matches(ret, ENOENT)) - return false; - if (ret) - return ret; - - if (a.tree != s.tree) - return false; - } - - return true; -} - -/* - * snapshot_tree pointer was incorrect: look up root snapshot node, make sure - * its snapshot_tree pointer is correct (allocate new one if necessary), then - * update this node's pointer to root node's pointer: - */ -static int snapshot_tree_ptr_repair(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k, - struct bch_snapshot *s) -{ - struct bch_fs *c = trans->c; - struct btree_iter root_iter; - struct bch_snapshot_tree s_t; - struct bkey_s_c_snapshot root; - struct bkey_i_snapshot *u; - u32 root_id = bch2_snapshot_root(c, k.k->p.offset), tree_id; - int ret; - - root = bch2_bkey_get_iter_typed(trans, &root_iter, - BTREE_ID_snapshots, POS(0, root_id), - BTREE_ITER_WITH_UPDATES, snapshot); - ret = bkey_err(root); - if (ret) - goto err; - - tree_id = le32_to_cpu(root.v->tree); - - ret = bch2_snapshot_tree_lookup(trans, tree_id, &s_t); - if (ret && !bch2_err_matches(ret, ENOENT)) - return ret; - - if (ret || le32_to_cpu(s_t.root_snapshot) != root_id) { - u = bch2_bkey_make_mut_typed(trans, &root_iter, &root.s_c, 0, snapshot); - ret = PTR_ERR_OR_ZERO(u) ?: - snapshot_tree_create(trans, root_id, - bch2_snapshot_tree_oldest_subvol(c, root_id), - &tree_id); - if (ret) - goto err; - - u->v.tree = cpu_to_le32(tree_id); - if (k.k->p.offset == root_id) - *s = u->v; - } - - if (k.k->p.offset != root_id) { - u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); - ret = PTR_ERR_OR_ZERO(u); - if (ret) - goto err; - - u->v.tree = cpu_to_le32(tree_id); - *s = u->v; - } -err: - bch2_trans_iter_exit(trans, &root_iter); - return ret; -} - -static int cmp_le32(__le32 l, __le32 r) -{ - return cmp_int(le32_to_cpu(l), le32_to_cpu(r)); -} - -static int check_snapshot(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k) -{ - struct bch_fs *c = trans->c; - struct bch_snapshot s; - struct bch_subvolume subvol; - struct bch_snapshot v; - struct bkey_i_snapshot *u; - u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); - u32 real_depth; - struct printbuf buf = PRINTBUF; - bool should_have_subvol; - u32 i, id; - int ret = 0; - - if (k.k->type != KEY_TYPE_snapshot) - return 0; - - memset(&s, 0, sizeof(s)); - memcpy(&s, k.v, bkey_val_bytes(k.k)); - - id = le32_to_cpu(s.parent); - if (id) { - ret = snapshot_lookup(trans, id, &v); - if (bch2_err_matches(ret, ENOENT)) - bch_err(c, "snapshot with nonexistent parent:\n %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (ret) - goto err; - - if (le32_to_cpu(v.children[0]) != k.k->p.offset && - le32_to_cpu(v.children[1]) != k.k->p.offset) { - bch_err(c, "snapshot parent %u missing pointer to child %llu", - id, k.k->p.offset); - ret = -EINVAL; - goto err; - } - } - - for (i = 0; i < 2 && s.children[i]; i++) { - id = le32_to_cpu(s.children[i]); - - ret = snapshot_lookup(trans, id, &v); - if (bch2_err_matches(ret, ENOENT)) - bch_err(c, "snapshot node %llu has nonexistent child %u", - k.k->p.offset, id); - if (ret) - goto err; - - if (le32_to_cpu(v.parent) != k.k->p.offset) { - bch_err(c, "snapshot child %u has wrong parent (got %u should be %llu)", - id, le32_to_cpu(v.parent), k.k->p.offset); - ret = -EINVAL; - goto err; - } - } - - should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && - !BCH_SNAPSHOT_DELETED(&s); - - if (should_have_subvol) { - id = le32_to_cpu(s.subvol); - ret = bch2_subvolume_get(trans, id, 0, false, &subvol); - if (bch2_err_matches(ret, ENOENT)) - bch_err(c, "snapshot points to nonexistent subvolume:\n %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (ret) - goto err; - - if (BCH_SNAPSHOT_SUBVOL(&s) != (le32_to_cpu(subvol.snapshot) == k.k->p.offset)) { - bch_err(c, "snapshot node %llu has wrong BCH_SNAPSHOT_SUBVOL", - k.k->p.offset); - ret = -EINVAL; - goto err; - } - } else { - if (fsck_err_on(s.subvol, c, "snapshot should not point to subvol:\n %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); - ret = PTR_ERR_OR_ZERO(u); - if (ret) - goto err; - - u->v.subvol = 0; - s = u->v; - } - } - - ret = snapshot_tree_ptr_good(trans, k.k->p.offset, le32_to_cpu(s.tree)); - if (ret < 0) - goto err; - - if (fsck_err_on(!ret, c, "snapshot points to missing/incorrect tree:\n %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = snapshot_tree_ptr_repair(trans, iter, k, &s); - if (ret) - goto err; - } - ret = 0; - - real_depth = bch2_snapshot_depth(c, parent_id); - - if (le32_to_cpu(s.depth) != real_depth && - (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || - fsck_err(c, "snapshot with incorrect depth field, should be %u:\n %s", - real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { - u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); - ret = PTR_ERR_OR_ZERO(u); - if (ret) - goto err; - - u->v.depth = cpu_to_le32(real_depth); - s = u->v; - } - - ret = snapshot_skiplist_good(trans, s); - if (ret < 0) - goto err; - - if (!ret && - (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || - fsck_err(c, "snapshot with bad skiplist field:\n %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { - u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); - ret = PTR_ERR_OR_ZERO(u); - if (ret) - goto err; - - for (i = 0; i < ARRAY_SIZE(u->v.skip); i++) - u->v.skip[i] = cpu_to_le32(snapshot_skiplist_get(c, parent_id)); - - bubble_sort(u->v.skip, ARRAY_SIZE(u->v.skip), cmp_le32); - s = u->v; - } - ret = 0; -err: -fsck_err: - printbuf_exit(&buf); - return ret; -} - -int bch2_check_snapshots(struct bch_fs *c) -{ - struct btree_iter iter; - struct bkey_s_c k; - int ret; - - /* - * We iterate backwards as checking/fixing the depth field requires that - * the parent's depth already be correct: - */ - ret = bch2_trans_run(c, - for_each_btree_key_reverse_commit(&trans, iter, - BTREE_ID_snapshots, POS_MAX, - BTREE_ITER_PREFETCH, k, - NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, - check_snapshot(&trans, &iter, k))); - if (ret) - bch_err_fn(c, ret); - return ret; -} - static int check_subvol(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) @@ -900,7 +28,7 @@ static int check_subvol(struct btree_trans *trans, subvol = bkey_s_c_to_subvolume(k); snapid = le32_to_cpu(subvol.v->snapshot); - ret = snapshot_lookup(trans, snapid, &snapshot); + ret = bch2_snapshot_lookup(trans, snapid, &snapshot); if (bch2_err_matches(ret, ENOENT)) bch_err(c, "subvolume %llu points to nonexistent snapshot %u", @@ -968,462 +96,6 @@ int bch2_check_subvols(struct bch_fs *c) return ret; } -void bch2_fs_snapshots_exit(struct bch_fs *c) -{ - kfree(rcu_dereference_protected(c->snapshots, true)); -} - -int bch2_snapshots_read(struct bch_fs *c) -{ - struct btree_iter iter; - struct bkey_s_c k; - int ret = 0; - - ret = bch2_trans_run(c, - for_each_btree_key2(&trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, - bch2_mark_snapshot(&trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: - bch2_snapshot_set_equiv(&trans, k))); - if (ret) - bch_err_fn(c, ret); - return ret; -} - -/* - * Mark a snapshot as deleted, for future cleanup: - */ -static int bch2_snapshot_node_set_deleted(struct btree_trans *trans, u32 id) -{ - struct btree_iter iter; - struct bkey_i_snapshot *s; - int ret = 0; - - s = bch2_bkey_get_mut_typed(trans, &iter, - BTREE_ID_snapshots, POS(0, id), - 0, snapshot); - ret = PTR_ERR_OR_ZERO(s); - if (unlikely(ret)) { - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), - trans->c, "missing snapshot %u", id); - return ret; - } - - /* already deleted? */ - if (BCH_SNAPSHOT_DELETED(&s->v)) - goto err; - - SET_BCH_SNAPSHOT_DELETED(&s->v, true); - SET_BCH_SNAPSHOT_SUBVOL(&s->v, false); - s->v.subvol = 0; -err: - bch2_trans_iter_exit(trans, &iter); - return ret; -} - -static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) -{ - struct bch_fs *c = trans->c; - struct btree_iter iter, p_iter = (struct btree_iter) { NULL }; - struct btree_iter tree_iter = (struct btree_iter) { NULL }; - struct bkey_s_c_snapshot s; - u32 parent_id; - unsigned i; - int ret = 0; - - s = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_snapshots, POS(0, id), - BTREE_ITER_INTENT, snapshot); - ret = bkey_err(s); - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, - "missing snapshot %u", id); - - if (ret) - goto err; - - BUG_ON(!BCH_SNAPSHOT_DELETED(s.v)); - parent_id = le32_to_cpu(s.v->parent); - - if (parent_id) { - struct bkey_i_snapshot *parent; - - parent = bch2_bkey_get_mut_typed(trans, &p_iter, - BTREE_ID_snapshots, POS(0, parent_id), - 0, snapshot); - ret = PTR_ERR_OR_ZERO(parent); - if (unlikely(ret)) { - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c, - "missing snapshot %u", parent_id); - goto err; - } - - for (i = 0; i < 2; i++) - if (le32_to_cpu(parent->v.children[i]) == id) - break; - - if (i == 2) - bch_err(c, "snapshot %u missing child pointer to %u", - parent_id, id); - else - parent->v.children[i] = 0; - - if (le32_to_cpu(parent->v.children[0]) < - le32_to_cpu(parent->v.children[1])) - swap(parent->v.children[0], - parent->v.children[1]); - } else { - /* - * We're deleting the root of a snapshot tree: update the - * snapshot_tree entry to point to the new root, or delete it if - * this is the last snapshot ID in this tree: - */ - struct bkey_i_snapshot_tree *s_t; - - BUG_ON(s.v->children[1]); - - s_t = bch2_bkey_get_mut_typed(trans, &tree_iter, - BTREE_ID_snapshot_trees, POS(0, le32_to_cpu(s.v->tree)), - 0, snapshot_tree); - ret = PTR_ERR_OR_ZERO(s_t); - if (ret) - goto err; - - if (s.v->children[0]) { - s_t->v.root_snapshot = s.v->children[0]; - } else { - s_t->k.type = KEY_TYPE_deleted; - set_bkey_val_u64s(&s_t->k, 0); - } - } - - ret = bch2_btree_delete_at(trans, &iter, 0); -err: - bch2_trans_iter_exit(trans, &tree_iter); - bch2_trans_iter_exit(trans, &p_iter); - bch2_trans_iter_exit(trans, &iter); - return ret; -} - -static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, - u32 *new_snapids, - u32 *snapshot_subvols, - unsigned nr_snapids) -{ - struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_i_snapshot *n; - struct bkey_s_c k; - unsigned i, j; - u32 depth = bch2_snapshot_depth(c, parent); - int ret; - - bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, - POS_MIN, BTREE_ITER_INTENT); - k = bch2_btree_iter_peek(&iter); - ret = bkey_err(k); - if (ret) - goto err; - - for (i = 0; i < nr_snapids; i++) { - k = bch2_btree_iter_prev_slot(&iter); - ret = bkey_err(k); - if (ret) - goto err; - - if (!k.k || !k.k->p.offset) { - ret = -BCH_ERR_ENOSPC_snapshot_create; - goto err; - } - - n = bch2_bkey_alloc(trans, &iter, 0, snapshot); - ret = PTR_ERR_OR_ZERO(n); - if (ret) - goto err; - - n->v.flags = 0; - n->v.parent = cpu_to_le32(parent); - n->v.subvol = cpu_to_le32(snapshot_subvols[i]); - n->v.tree = cpu_to_le32(tree); - n->v.depth = cpu_to_le32(depth); - - for (j = 0; j < ARRAY_SIZE(n->v.skip); j++) - n->v.skip[j] = cpu_to_le32(snapshot_skiplist_get(c, parent)); - - bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32); - SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); - - ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, - bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); - if (ret) - goto err; - - new_snapids[i] = iter.pos.offset; - } -err: - bch2_trans_iter_exit(trans, &iter); - return ret; -} - -/* - * Create new snapshot IDs as children of an existing snapshot ID: - */ -static int bch2_snapshot_node_create_children(struct btree_trans *trans, u32 parent, - u32 *new_snapids, - u32 *snapshot_subvols, - unsigned nr_snapids) -{ - struct btree_iter iter; - struct bkey_i_snapshot *n_parent; - int ret = 0; - - n_parent = bch2_bkey_get_mut_typed(trans, &iter, - BTREE_ID_snapshots, POS(0, parent), - 0, snapshot); - ret = PTR_ERR_OR_ZERO(n_parent); - if (unlikely(ret)) { - if (bch2_err_matches(ret, ENOENT)) - bch_err(trans->c, "snapshot %u not found", parent); - return ret; - } - - if (n_parent->v.children[0] || n_parent->v.children[1]) { - bch_err(trans->c, "Trying to add child snapshot nodes to parent that already has children"); - ret = -EINVAL; - goto err; - } - - ret = create_snapids(trans, parent, le32_to_cpu(n_parent->v.tree), - new_snapids, snapshot_subvols, nr_snapids); - if (ret) - goto err; - - n_parent->v.children[0] = cpu_to_le32(new_snapids[0]); - n_parent->v.children[1] = cpu_to_le32(new_snapids[1]); - n_parent->v.subvol = 0; - SET_BCH_SNAPSHOT_SUBVOL(&n_parent->v, false); -err: - bch2_trans_iter_exit(trans, &iter); - return ret; -} - -/* - * Create a snapshot node that is the root of a new tree: - */ -static int bch2_snapshot_node_create_tree(struct btree_trans *trans, - u32 *new_snapids, - u32 *snapshot_subvols, - unsigned nr_snapids) -{ - struct bkey_i_snapshot_tree *n_tree; - int ret; - - n_tree = __snapshot_tree_create(trans); - ret = PTR_ERR_OR_ZERO(n_tree) ?: - create_snapids(trans, 0, n_tree->k.p.offset, - new_snapids, snapshot_subvols, nr_snapids); - if (ret) - return ret; - - n_tree->v.master_subvol = cpu_to_le32(snapshot_subvols[0]); - n_tree->v.root_snapshot = cpu_to_le32(new_snapids[0]); - return 0; -} - -int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, - u32 *new_snapids, - u32 *snapshot_subvols, - unsigned nr_snapids) -{ - BUG_ON((parent == 0) != (nr_snapids == 1)); - BUG_ON((parent != 0) != (nr_snapids == 2)); - - return parent - ? bch2_snapshot_node_create_children(trans, parent, - new_snapids, snapshot_subvols, nr_snapids) - : bch2_snapshot_node_create_tree(trans, - new_snapids, snapshot_subvols, nr_snapids); - -} - -static int snapshot_delete_key(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k, - snapshot_id_list *deleted, - snapshot_id_list *equiv_seen, - struct bpos *last_pos) -{ - struct bch_fs *c = trans->c; - u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); - - if (!bkey_eq(k.k->p, *last_pos)) - equiv_seen->nr = 0; - *last_pos = k.k->p; - - if (snapshot_list_has_id(deleted, k.k->p.snapshot) || - snapshot_list_has_id(equiv_seen, equiv)) { - return bch2_btree_delete_at(trans, iter, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); - } else { - return snapshot_list_add(c, equiv_seen, equiv); - } -} - -static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_s_c k) -{ - struct bkey_s_c_snapshot snap; - u32 children[2]; - int ret; - - if (k.k->type != KEY_TYPE_snapshot) - return 0; - - snap = bkey_s_c_to_snapshot(k); - if (BCH_SNAPSHOT_DELETED(snap.v) || - BCH_SNAPSHOT_SUBVOL(snap.v)) - return 0; - - children[0] = le32_to_cpu(snap.v->children[0]); - children[1] = le32_to_cpu(snap.v->children[1]); - - ret = snapshot_live(trans, children[0]) ?: - snapshot_live(trans, children[1]); - if (ret < 0) - return ret; - - if (!ret) - return bch2_snapshot_node_set_deleted(trans, k.k->p.offset); - return 0; -} - -int bch2_delete_dead_snapshots(struct bch_fs *c) -{ - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_s_c_snapshot snap; - snapshot_id_list deleted = { 0 }; - u32 i, id; - int ret = 0; - - if (!test_bit(BCH_FS_STARTED, &c->flags)) { - ret = bch2_fs_read_write_early(c); - if (ret) { - bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret)); - return ret; - } - } - - bch2_trans_init(&trans, c, 0, 0); - - /* - * For every snapshot node: If we have no live children and it's not - * pointed to by a subvolume, delete it: - */ - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, - NULL, NULL, 0, - bch2_delete_redundant_snapshot(&trans, &iter, k)); - if (ret) { - bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret)); - goto err; - } - - for_each_btree_key2(&trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, - bch2_snapshot_set_equiv(&trans, k)); - if (ret) { - bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret)); - goto err; - } - - for_each_btree_key(&trans, iter, BTREE_ID_snapshots, - POS_MIN, 0, k, ret) { - if (k.k->type != KEY_TYPE_snapshot) - continue; - - snap = bkey_s_c_to_snapshot(k); - if (BCH_SNAPSHOT_DELETED(snap.v)) { - ret = snapshot_list_add(c, &deleted, k.k->p.offset); - if (ret) - break; - } - } - bch2_trans_iter_exit(&trans, &iter); - - if (ret) { - bch_err(c, "error walking snapshots: %s", bch2_err_str(ret)); - goto err; - } - - for (id = 0; id < BTREE_ID_NR; id++) { - struct bpos last_pos = POS_MIN; - snapshot_id_list equiv_seen = { 0 }; - - if (!btree_type_has_snapshots(id)) - continue; - - ret = for_each_btree_key_commit(&trans, iter, - id, POS_MIN, - BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - NULL, NULL, BTREE_INSERT_NOFAIL, - snapshot_delete_key(&trans, &iter, k, &deleted, &equiv_seen, &last_pos)); - - darray_exit(&equiv_seen); - - if (ret) { - bch_err(c, "error deleting snapshot keys: %s", bch2_err_str(ret)); - goto err; - } - } - - for (i = 0; i < deleted.nr; i++) { - ret = commit_do(&trans, NULL, NULL, 0, - bch2_snapshot_node_delete(&trans, deleted.data[i])); - if (ret) { - bch_err(c, "error deleting snapshot %u: %s", - deleted.data[i], bch2_err_str(ret)); - goto err; - } - } - - clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); -err: - darray_exit(&deleted); - bch2_trans_exit(&trans); - if (ret) - bch_err_fn(c, ret); - return ret; -} - -static void bch2_delete_dead_snapshots_work(struct work_struct *work) -{ - struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work); - - if (test_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags)) - bch2_delete_dead_snapshots(c); - bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); -} - -void bch2_delete_dead_snapshots_async(struct bch_fs *c) -{ - if (bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots) && - !queue_work(c->write_ref_wq, &c->snapshot_delete_work)) - bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots); -} - -static int bch2_delete_dead_snapshots_hook(struct btree_trans *trans, - struct btree_trans_commit_hook *h) -{ - struct bch_fs *c = trans->c; - - set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags); - - if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_delete_dead_snapshots) - return 0; - - bch2_delete_dead_snapshots_async(c); - return 0; -} - /* Subvolumes: */ int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k, @@ -1478,26 +150,27 @@ int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, { struct bch_snapshot snap; - return snapshot_lookup(trans, snapshot, &snap) ?: + return bch2_snapshot_lookup(trans, snapshot, &snap) ?: bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); } -int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvol, +int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, u32 *snapid) { struct btree_iter iter; - struct bkey_s_c k; + struct bkey_s_c_subvolume subvol; int ret; - k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_subvolumes, POS(0, subvol), - BTREE_ITER_CACHED| - BTREE_ITER_WITH_UPDATES); - ret = bkey_err(k) ?: k.k->type == KEY_TYPE_subvolume ? 0 : -BCH_ERR_ENOENT_subvolume; + subvol = bch2_bkey_get_iter_typed(trans, &iter, + BTREE_ID_subvolumes, POS(0, subvolid), + BTREE_ITER_CACHED|BTREE_ITER_WITH_UPDATES, + subvolume); + ret = bkey_err(subvol); + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, + "missing subvolume %u", subvolid); if (likely(!ret)) - *snapid = le32_to_cpu(bkey_s_c_to_subvolume(k).v->snapshot); - else if (bch2_err_matches(ret, ENOENT)) - bch2_fs_inconsistent(trans->c, "missing subvolume %u", subvol); + *snapid = le32_to_cpu(subvol.v->snapshot); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1527,7 +200,12 @@ static int bch2_subvolume_reparent(struct btree_trans *trans, } /* - * Scan for subvolumes with parent @subvolid_to_delete, reparent: + * Separate from the snapshot tree in the snapshots btree, we record the tree + * structure of how snapshot subvolumes were created - the parent subvolume of + * each snapshot subvolume. + * + * When a subvolume is deleted, we scan for child subvolumes and reparant them, + * to avoid dangling references: */ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_delete) { diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index 6905e91a9470..8d4c50f4cd05 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -7,225 +7,8 @@ enum bkey_invalid_flags; -void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_tree_invalid(const struct bch_fs *, struct bkey_s_c, - enum bkey_invalid_flags, struct printbuf *); - -#define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_tree_invalid, \ - .val_to_text = bch2_snapshot_tree_to_text, \ - .min_val_size = 8, \ -}) - -int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); - -void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c, - enum bkey_invalid_flags, struct printbuf *); -int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); - -#define bch2_bkey_ops_snapshot ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_invalid, \ - .val_to_text = bch2_snapshot_to_text, \ - .atomic_trigger = bch2_mark_snapshot, \ - .min_val_size = 24, \ -}) - -static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id) -{ - return &t->s[U32_MAX - id]; -} - -static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) -{ - return __snapshot_t(rcu_dereference(c->snapshots), id); -} - -static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id) -{ - rcu_read_lock(); - id = snapshot_t(c, id)->tree; - rcu_read_unlock(); - - return id; -} - -static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id) -{ - return snapshot_t(c, id)->parent; -} - -static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id) -{ - rcu_read_lock(); - id = __bch2_snapshot_parent_early(c, id); - rcu_read_unlock(); - - return id; -} - -static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id) -{ -#ifdef CONFIG_BCACHEFS_DEBUG - u32 parent = snapshot_t(c, id)->parent; - - if (parent && - snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1) - panic("id %u depth=%u parent %u depth=%u\n", - id, snapshot_t(c, id)->depth, - parent, snapshot_t(c, parent)->depth); - - return parent; -#else - return snapshot_t(c, id)->parent; -#endif -} - -static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) -{ - rcu_read_lock(); - id = __bch2_snapshot_parent(c, id); - rcu_read_unlock(); - - return id; -} - -static inline u32 bch2_snapshot_nth_parent(struct bch_fs *c, u32 id, u32 n) -{ - rcu_read_lock(); - while (n--) - id = __bch2_snapshot_parent(c, id); - rcu_read_unlock(); - - return id; -} - -static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id) -{ - u32 parent; - - rcu_read_lock(); - while ((parent = __bch2_snapshot_parent(c, id))) - id = parent; - rcu_read_unlock(); - - return id; -} - -static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id) -{ - return snapshot_t(c, id)->equiv; -} - -static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) -{ - rcu_read_lock(); - id = __bch2_snapshot_equiv(c, id); - rcu_read_unlock(); - - return id; -} - -static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id) -{ - return id == bch2_snapshot_equiv(c, id); -} - -static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id) -{ - const struct snapshot_t *s; - bool ret; - - rcu_read_lock(); - s = snapshot_t(c, id); - ret = s->children[0]; - rcu_read_unlock(); - - return ret; -} - -static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id) -{ - return !bch2_snapshot_is_internal_node(c, id); -} - -static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id) -{ - const struct snapshot_t *s; - u32 parent = __bch2_snapshot_parent(c, id); - - if (!parent) - return 0; - - s = snapshot_t(c, __bch2_snapshot_parent(c, id)); - if (id == s->children[0]) - return s->children[1]; - if (id == s->children[1]) - return s->children[0]; - return 0; -} - -bool __bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32); - -static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) -{ - return id == ancestor - ? true - : __bch2_snapshot_is_ancestor(c, id, ancestor); -} - -static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) -{ - const struct snapshot_t *t; - bool ret; - - rcu_read_lock(); - t = snapshot_t(c, id); - ret = (t->children[0]|t->children[1]) != 0; - rcu_read_unlock(); - - return ret; -} - -static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id) -{ - u32 *i; - - darray_for_each(*s, i) - if (*i == id) - return true; - return false; -} - -static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list *s, u32 id) -{ - u32 *i; - - darray_for_each(*s, i) - if (bch2_snapshot_is_ancestor(c, id, *i)) - return true; - return false; -} - -static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id) -{ - int ret; - - BUG_ON(snapshot_list_has_id(s, id)); - ret = darray_push(s, id); - if (ret) - bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size); - return ret; -} - -int bch2_check_snapshot_trees(struct bch_fs *); -int bch2_check_snapshots(struct bch_fs *); int bch2_check_subvols(struct bch_fs *); -void bch2_fs_snapshots_exit(struct bch_fs *); -int bch2_snapshots_read(struct bch_fs *); - int bch2_subvolume_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *); void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -238,14 +21,8 @@ void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c) int bch2_subvolume_get(struct btree_trans *, unsigned, bool, int, struct bch_subvolume *); -int bch2_snapshot_get_subvol(struct btree_trans *, u32, - struct bch_subvolume *); int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); -/* only exported for tests: */ -int bch2_snapshot_node_create(struct btree_trans *, u32, - u32 *, u32 *, unsigned); - int bch2_delete_dead_snapshots(struct bch_fs *); void bch2_delete_dead_snapshots_async(struct bch_fs *); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index d9dbcd0bdbf5..8e2ec3b6c9b3 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -48,6 +48,7 @@ #include "recovery.h" #include "replicas.h" #include "sb-clean.h" +#include "snapshot.h" #include "subvolume.h" #include "super.h" #include "super-io.h" diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 1d4b0a583586..72389c7376d6 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -4,7 +4,7 @@ #include "bcachefs.h" #include "btree_update.h" #include "journal_reclaim.h" -#include "subvolume.h" +#include "snapshot.h" #include "tests.h" #include "linux/kthread.h" diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 3cec6171c58f..d34423352f60 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -842,6 +842,11 @@ static inline int u8_cmp(u8 l, u8 r) return cmp_int(l, r); } +static inline int cmp_le32(__le32 l, __le32 r) +{ + return cmp_int(le32_to_cpu(l), le32_to_cpu(r)); +} + #include #endif /* _BCACHEFS_UTIL_H */ -- cgit v1.2.3 From 7573041ab958a14407621ef9756be49548f937e6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 18 Aug 2023 17:44:21 -0400 Subject: bcachefs: Fix bch2_mount error path In the bch2_mount() error path, we were calling deactivate_locked_super(), which calls ->kill_sb(), which in our case was calling bch2_fs_free() without __bch2_fs_stop(). This changes bch2_mount() to just call bch2_fs_stop() directly. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 8 +++++++- fs/bcachefs/super.c | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index d2f93a8af4ac..48431700b83e 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1906,7 +1906,10 @@ out: return dget(sb->s_root); err_put_super: + sb->s_fs_info = NULL; + c->vfs_sb = NULL; deactivate_locked_super(sb); + bch2_fs_stop(c); return ERR_PTR(bch2_err_class(ret)); } @@ -1914,8 +1917,11 @@ static void bch2_kill_sb(struct super_block *sb) { struct bch_fs *c = sb->s_fs_info; + if (c) + c->vfs_sb = NULL; generic_shutdown_super(sb); - bch2_fs_free(c); + if (c) + bch2_fs_free(c); } static struct file_system_type bcache_fs_type = { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 8e2ec3b6c9b3..60424865980d 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -581,6 +581,8 @@ void bch2_fs_free(struct bch_fs *c) { unsigned i; + BUG_ON(!test_bit(BCH_FS_STOPPING, &c->flags)); + mutex_lock(&bch_fs_list_lock); list_del(&c->list); mutex_unlock(&bch_fs_list_lock); -- cgit v1.2.3 From e46c181af9e230c4c5dbc701fdadc295d6191eec Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 11 Sep 2023 01:37:34 -0400 Subject: bcachefs: Convert more code to bch_err_msg() Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- fs/bcachefs/btree_gc.c | 12 ++++----- fs/bcachefs/btree_update_interior.c | 5 ++-- fs/bcachefs/ec.c | 3 +-- fs/bcachefs/fs.c | 2 +- fs/bcachefs/fsck.c | 50 ++++++++++++++++--------------------- fs/bcachefs/journal.c | 2 +- fs/bcachefs/journal_reclaim.c | 2 +- fs/bcachefs/migrate.c | 3 +-- fs/bcachefs/movinggc.c | 7 +++--- fs/bcachefs/rebalance.c | 2 +- fs/bcachefs/replicas.c | 2 +- fs/bcachefs/snapshot.c | 6 ++--- fs/bcachefs/subvolume.c | 5 ++-- fs/bcachefs/super.c | 43 ++++++++++++++++--------------- 15 files changed, 67 insertions(+), 79 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index e36426b52a4a..fcb3d53bb6f3 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1931,7 +1931,7 @@ bkey_err: bch2_trans_exit(&trans); if (ret < 0) { - bch_err(ca, "error initializing free space: %s", bch2_err_str(ret)); + bch_err_msg(ca, ret, "initializing free space"); return ret; } diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index dac2eb76c985..844ac0024683 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1483,7 +1483,7 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only) bch2_alloc_write_key(&trans, &iter, k, metadata_only)); if (ret < 0) { - bch_err(c, "error writing alloc info: %s", bch2_err_str(ret)); + bch_err_fn(c, ret); percpu_ref_put(&ca->ref); break; } @@ -1548,7 +1548,7 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) bch2_trans_exit(&trans); if (ret) - bch_err(c, "error reading alloc info at gc start: %s", bch2_err_str(ret)); + bch_err_fn(c, ret); return ret; } @@ -1998,7 +1998,7 @@ int bch2_gc_gens(struct bch_fs *c) BTREE_INSERT_NOFAIL, gc_btree_gens_key(&trans, &iter, k)); if (ret && !bch2_err_matches(ret, EROFS)) - bch_err(c, "error recalculating oldest_gen: %s", bch2_err_str(ret)); + bch_err_fn(c, ret); if (ret) goto err; } @@ -2011,7 +2011,7 @@ int bch2_gc_gens(struct bch_fs *c) BTREE_INSERT_NOFAIL, bch2_alloc_write_oldest_gen(&trans, &iter, k)); if (ret && !bch2_err_matches(ret, EROFS)) - bch_err(c, "error writing oldest_gen: %s", bch2_err_str(ret)); + bch_err_fn(c, ret); if (ret) goto err; @@ -2083,7 +2083,7 @@ static int bch2_gc_thread(void *arg) ret = bch2_gc_gens(c); #endif if (ret < 0) - bch_err(c, "btree gc failed: %s", bch2_err_str(ret)); + bch_err_fn(c, ret); debug_check_no_locks_held(); } @@ -2113,7 +2113,7 @@ int bch2_gc_thread_start(struct bch_fs *c) p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name); if (IS_ERR(p)) { - bch_err(c, "error creating gc thread: %s", bch2_err_str(PTR_ERR(p))); + bch_err_fn(c, PTR_ERR(p)); return PTR_ERR(p); } diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 73c950d2788e..c5b571f8333c 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2057,7 +2057,7 @@ static void async_btree_node_rewrite_work(struct work_struct *work) ret = bch2_trans_do(c, NULL, NULL, 0, async_btree_node_rewrite_trans(&trans, a)); if (ret) - bch_err(c, "%s: error %s", __func__, bch2_err_str(ret)); + bch_err_fn(c, ret); bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite); kfree(a); } @@ -2096,8 +2096,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) ret = bch2_fs_read_write_early(c); if (ret) { - bch_err(c, "%s: error going read-write: %s", - __func__, bch2_err_str(ret)); + bch_err_msg(c, ret, "going read-write"); kfree(a); return; } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index f58e84a2bf88..67a5453a36d9 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1133,8 +1133,7 @@ static void ec_stripe_create(struct ec_stripe_new *s) ret = ec_stripe_update_extents(c, &s->new_stripe); if (ret) { - bch_err(c, "error creating stripe: error updating pointers: %s", - bch2_err_str(ret)); + bch_err_msg(c, ret, "creating stripe: error updating pointers"); goto err; } err: diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 48431700b83e..08f810992a1b 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1890,7 +1890,7 @@ got_sb: vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); ret = PTR_ERR_OR_ZERO(vinode); if (ret) { - bch_err(c, "error mounting: error getting root inode: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "mounting: error getting root inode"); goto err_put_super; } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index ded9711e44dd..26e0a1ced68a 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -80,7 +80,7 @@ static int __snapshot_lookup_subvol(struct btree_trans *trans, u32 snapshot, if (!ret) *subvol = le32_to_cpu(s.subvol); else if (bch2_err_matches(ret, ENOENT)) - bch_err(trans->c, "snapshot %u not fonud", snapshot); + bch_err(trans->c, "snapshot %u not found", snapshot); return ret; } @@ -127,8 +127,7 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, ret = bch2_inode_unpack(k, inode); err: if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) - bch_err(trans->c, "error fetching inode %llu: %s", - inode_nr, bch2_err_str(ret)); + bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -154,8 +153,7 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr, *snapshot = iter.pos.snapshot; err: if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) - bch_err(trans->c, "error fetching inode %llu:%u: %s", - inode_nr, *snapshot, bch2_err_str(ret)); + bch_err_msg(trans->c, ret, "fetching inode %llu:%u", inode_nr, *snapshot); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -206,17 +204,16 @@ static int __write_inode(struct btree_trans *trans, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); } -static int write_inode(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 snapshot) +static int fsck_write_inode(struct btree_trans *trans, + struct bch_inode_unpacked *inode, + u32 snapshot) { int ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, __write_inode(trans, inode, snapshot)); if (ret) - bch_err(trans->c, "error in fsck: error updating inode: %s", - bch2_err_str(ret)); + bch_err_fn(trans->c, ret); return ret; } @@ -278,7 +275,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 subvol, } if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) - bch_err(c, "error looking up lost+found: %s", bch2_err_str(ret)); + bch_err_fn(c, ret); if (ret) return ret; @@ -301,7 +298,7 @@ create_lostfound: 0, 0, S_IFDIR|0700, 0, NULL, NULL, (subvol_inum) { }, 0); if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) - bch_err(c, "error creating lost+found: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "creating lost+found"); return ret; } @@ -365,8 +362,7 @@ static int reattach_inode(struct btree_trans *trans, BTREE_INSERT_NOFAIL, __reattach_inode(trans, inode, inode_snapshot)); if (ret) { - bch_err(trans->c, "error reattaching inode %llu: %s", - inode->bi_inum, bch2_err_str(ret)); + bch_err_msg(trans->c, ret, "reattaching inode %llu", inode->bi_inum); return ret; } @@ -819,7 +815,7 @@ bad_hash: bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) - bch_err(c, "hash_redo_key err %s", bch2_err_str(ret)); + bch_err_fn(c, ret); if (ret) return ret; ret = -BCH_ERR_transaction_restart_nested; @@ -883,7 +879,8 @@ static int check_inode(struct btree_trans *trans, ret = __write_inode(trans, &u, iter->pos.snapshot); if (ret) { - bch_err_msg(c, ret, "in fsck: error updating inode"); + if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) + bch_err_msg(c, ret, "in fsck updating inode"); return ret; } @@ -901,8 +898,7 @@ static int check_inode(struct btree_trans *trans, ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) - bch_err(c, "error in fsck: error while deleting inode: %s", - bch2_err_str(ret)); + bch_err_msg(c, ret, "in fsck deleting inode"); return ret; } @@ -925,8 +921,7 @@ static int check_inode(struct btree_trans *trans, POS(u.bi_inum, U64_MAX), 0, NULL); if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) - bch_err(c, "error in fsck: error truncating inode: %s", - bch2_err_str(ret)); + bch_err_msg(c, ret, "in fsck truncating inode"); if (ret) return ret; @@ -951,8 +946,7 @@ static int check_inode(struct btree_trans *trans, sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot); if (sectors < 0) { - bch_err(c, "error in fsck: error recounting inode sectors: %s", - bch2_err_str(sectors)); + bch_err_msg(c, sectors, "fsck recounting inode sectors"); return sectors; } @@ -971,13 +965,13 @@ static int check_inode(struct btree_trans *trans, if (do_update) { ret = __write_inode(trans, &u, iter->pos.snapshot); if (ret) { - bch_err_msg(c, ret, "in fsck: error updating inode"); + bch_err_msg(c, ret, "in fsck updating inode"); return ret; } } err: fsck_err: - if (ret) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch_err_fn(c, ret); return ret; } @@ -1078,7 +1072,7 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) w->last_pos.inode, i->snapshot, i->inode.bi_sectors, i->count)) { i->inode.bi_sectors = i->count; - ret = write_inode(trans, &i->inode, i->snapshot); + ret = fsck_write_inode(trans, &i->inode, i->snapshot); if (ret) break; } @@ -1496,7 +1490,7 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) "directory %llu:%u with wrong i_nlink: got %u, should be %llu", w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { i->inode.bi_nlink = i->count; - ret = write_inode(trans, &i->inode, i->snapshot); + ret = fsck_write_inode(trans, &i->inode, i->snapshot); if (ret) break; } @@ -1923,7 +1917,7 @@ static int check_root_trans(struct btree_trans *trans) __bch2_btree_insert(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0)); if (ret) { - bch_err(c, "error writing root subvol: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "writing root subvol"); goto err; } @@ -1942,7 +1936,7 @@ static int check_root_trans(struct btree_trans *trans) ret = __write_inode(trans, &root_inode, snapshot); if (ret) - bch_err(c, "error writing root inode: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "writing root inode"); } err: fsck_err: diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 055920c26da6..4b9295a15837 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -834,7 +834,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, ca->mi.bucket_size)); if (ret) { bch2_open_bucket_put(c, ob[nr_got]); - bch_err(c, "error marking new journal buckets: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "marking new journal buckets"); break; } diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 10e1860dad79..73d135a8f37a 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -758,7 +758,7 @@ int bch2_journal_reclaim_start(struct journal *j) "bch-reclaim/%s", c->name); ret = PTR_ERR_OR_ZERO(p); if (ret) { - bch_err(c, "error creating journal reclaim thread: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "creating journal reclaim thread"); return ret; } diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 81c8cdbac285..3d7c5b919421 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -148,8 +148,7 @@ retry: } if (ret) { - bch_err(c, "Error updating btree node key: %s", - bch2_err_str(ret)); + bch_err_msg(c, ret, "updating btree node key"); break; } next: diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 256431a6dc0c..ac658e99bf57 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -242,7 +242,7 @@ err: ret = 0; if (ret < 0 && !bch2_err_matches(ret, EROFS)) - bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "from bch2_move_data()"); moved = atomic64_read(&ctxt->stats->sectors_moved) - moved; trace_and_count(c, copygc, c, moved, 0, 0, 0); @@ -320,8 +320,7 @@ static int bch2_copygc_thread(void *arg) ret = rhashtable_init(&move_buckets.table, &bch_move_bucket_params); if (ret) { - bch_err(c, "error allocating copygc buckets in flight: %s", - bch2_err_str(ret)); + bch_err_msg(c, ret, "allocating copygc buckets in flight"); return ret; } @@ -404,7 +403,7 @@ int bch2_copygc_start(struct bch_fs *c) t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name); ret = PTR_ERR_OR_ZERO(t); if (ret) { - bch_err(c, "error creating copygc thread: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "creating copygc thread"); return ret; } diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 15ce3ecba0ba..016cf0834b3d 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -350,7 +350,7 @@ int bch2_rebalance_start(struct bch_fs *c) p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); ret = PTR_ERR_OR_ZERO(p); if (ret) { - bch_err(c, "error creating rebalance thread: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "creating rebalance thread"); return ret; } diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 5b591c59bc3e..dbef41cd8593 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -429,7 +429,7 @@ out: return ret; err: - bch_err(c, "error adding replicas entry: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "adding replicas entry"); goto out; } diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 3ca61ede28d5..9bab9860b20b 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1385,7 +1385,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) if (!test_bit(BCH_FS_STARTED, &c->flags)) { ret = bch2_fs_read_write_early(c); if (ret) { - bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "error deleleting dead snapshots: error going rw"); return ret; } } @@ -1401,7 +1401,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) NULL, NULL, 0, bch2_delete_redundant_snapshot(&trans, &iter, k)); if (ret) { - bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "deleting redundant snapshots"); goto err; } @@ -1409,7 +1409,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) POS_MIN, 0, k, bch2_snapshot_set_equiv(&trans, k)); if (ret) { - bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "in bch2_snapshots_set_equiv"); goto err; } diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 0214a98deb4f..b6015a8060ec 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -41,8 +41,7 @@ static int check_subvol(struct btree_trans *trans, ret = bch2_subvolume_delete(trans, iter->pos.offset); if (ret) - bch_err(c, "error deleting subvolume %llu: %s", - iter->pos.offset, bch2_err_str(ret)); + bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); return ret ?: -BCH_ERR_transaction_restart_nested; } @@ -296,7 +295,7 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor for (id = s.data; id < s.data + s.nr; id++) { ret = bch2_trans_run(c, bch2_subvolume_delete(&trans, *id)); if (ret) { - bch_err(c, "error deleting subvolume %u: %s", *id, bch2_err_str(ret)); + bch_err_msg(c, ret, "deleting subvolume %u", *id); break; } } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 29cd71445a94..1b1a9e539f65 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -994,7 +994,7 @@ out: up_write(&c->state_lock); return ret; err: - bch_err(c, "error starting filesystem: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "starting filesystem"); goto out; } @@ -1459,7 +1459,7 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) bch2_btree_delete_range(c, BTREE_ID_bucket_gens, start, end, BTREE_TRIGGER_NORUN, NULL); if (ret) - bch_err(c, "error removing dev alloc info: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "removing dev alloc info"); return ret; } @@ -1488,31 +1488,31 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) ret = bch2_dev_data_drop(c, ca->dev_idx, flags); if (ret) { - bch_err(ca, "Remove failed: error dropping data: %s", bch2_err_str(ret)); + bch_err_msg(ca, ret, "dropping data"); goto err; } ret = bch2_dev_remove_alloc(c, ca); if (ret) { - bch_err(ca, "Remove failed, error deleting alloc info"); + bch_err_msg(ca, ret, "deleting alloc info"); goto err; } ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx); if (ret) { - bch_err(ca, "Remove failed: error flushing journal: %s", bch2_err_str(ret)); + bch_err_msg(ca, ret, "flushing journal"); goto err; } ret = bch2_journal_flush(&c->journal); if (ret) { - bch_err(ca, "Remove failed, journal error"); + bch_err(ca, "journal error"); goto err; } ret = bch2_replicas_gc2(c); if (ret) { - bch_err(ca, "Remove failed: error from replicas gc: %s", bch2_err_str(ret)); + bch_err_msg(ca, ret, "in replicas_gc2()"); goto err; } @@ -1587,7 +1587,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) ret = bch2_read_super(path, &opts, &sb); if (ret) { - bch_err(c, "device add error: error reading super: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "reading super"); goto err; } @@ -1603,7 +1603,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) ret = bch2_dev_may_add(sb.sb, c); if (ret) { - bch_err(c, "device add error: %s", bch2_err_str(ret)); + bch_err_fn(c, ret); goto err; } @@ -1624,7 +1624,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) ret = bch2_dev_journal_alloc(ca); if (ret) { - bch_err(c, "device add error: journal alloc failed"); + bch_err_msg(c, ret, "allocating journal"); goto err; } @@ -1633,7 +1633,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) ret = bch2_sb_from_fs(c, ca); if (ret) { - bch_err(c, "device add error: new device superblock too small"); + bch_err_msg(c, ret, "setting up new superblock"); goto err_unlock; } @@ -1642,8 +1642,8 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (!bch2_sb_resize_members(&ca->disk_sb, le32_to_cpu(mi->field.u64s) + sizeof(dev_mi) / sizeof(u64))) { - bch_err(c, "device add error: new device superblock too small"); ret = -BCH_ERR_ENOSPC_sb_members; + bch_err_msg(c, ret, "setting up new superblock"); goto err_unlock; } @@ -1655,8 +1655,8 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (!bch2_dev_exists(c->disk_sb.sb, mi, dev_idx)) goto have_slot; no_slot: - bch_err(c, "device add error: already have maximum number of devices"); ret = -BCH_ERR_ENOSPC_sb_members; + bch_err_msg(c, ret, "setting up new superblock"); goto err_unlock; have_slot: @@ -1666,8 +1666,8 @@ have_slot: mi = bch2_sb_resize_members(&c->disk_sb, u64s); if (!mi) { - bch_err(c, "device add error: no room in superblock for member info"); ret = -BCH_ERR_ENOSPC_sb_members; + bch_err_msg(c, ret, "setting up new superblock"); goto err_unlock; } @@ -1683,7 +1683,7 @@ have_slot: if (BCH_MEMBER_GROUP(&dev_mi)) { ret = __bch2_dev_group_set(c, ca, label.buf); if (ret) { - bch_err(c, "device add error: error setting label"); + bch_err_msg(c, ret, "creating new label"); goto err_unlock; } } @@ -1695,13 +1695,13 @@ have_slot: ret = bch2_trans_mark_dev_sb(c, ca); if (ret) { - bch_err(c, "device add error: error marking new superblock: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "marking new superblock"); goto err_late; } ret = bch2_fs_freespace_init(c); if (ret) { - bch_err(c, "device add error: error initializing free space: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "initializing free space"); goto err_late; } @@ -1751,7 +1751,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path) ret = bch2_dev_in_fs(c->disk_sb.sb, sb.sb); if (ret) { - bch_err(c, "error bringing %s online: %s", path, bch2_err_str(ret)); + bch_err_msg(c, ret, "bringing %s online", path); goto err; } @@ -1763,8 +1763,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path) ret = bch2_trans_mark_dev_sb(c, ca); if (ret) { - bch_err(c, "error bringing %s online: error from bch2_trans_mark_dev_sb: %s", - path, bch2_err_str(ret)); + bch_err_msg(c, ret, "bringing %s online: error from bch2_trans_mark_dev_sb", path); goto err; } @@ -1782,7 +1781,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path) ret = bch2_fs_freespace_init(c); if (ret) - bch_err(c, "device add error: error initializing free space: %s", bch2_err_str(ret)); + bch_err_msg(c, ret, "initializing free space"); up_write(&c->state_lock); return 0; @@ -1837,7 +1836,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) ret = bch2_dev_buckets_resize(c, ca, nbuckets); if (ret) { - bch_err(ca, "Resize error: %s", bch2_err_str(ret)); + bch_err_msg(ca, ret, "resizing buckets"); goto err; } -- cgit v1.2.3 From 1809b8cba756d32bd6e976ed4ee64efdf66c6d94 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 10 Sep 2023 18:05:17 -0400 Subject: bcachefs: Break up io.c More reorganization, this splits up io.c into - io_read.c - io_misc.c - fallocate, fpunch, truncate - io_write.c Signed-off-by: Kent Overstreet --- fs/bcachefs/Makefile | 4 +- fs/bcachefs/alloc_foreground.c | 2 +- fs/bcachefs/btree_io.c | 2 +- fs/bcachefs/btree_io.h | 2 +- fs/bcachefs/compress.c | 1 - fs/bcachefs/data_update.c | 2 +- fs/bcachefs/data_update.h | 2 +- fs/bcachefs/debug.c | 1 - fs/bcachefs/ec.c | 3 +- fs/bcachefs/errcode.c | 7 + fs/bcachefs/errcode.h | 4 + fs/bcachefs/error.c | 1 - fs/bcachefs/fs-io-buffered.c | 3 +- fs/bcachefs/fs-io-direct.c | 3 +- fs/bcachefs/fs-io.c | 3 +- fs/bcachefs/fs-io.h | 2 +- fs/bcachefs/fs.c | 2 +- fs/bcachefs/io.c | 3051 ---------------------------------------- fs/bcachefs/io.h | 202 --- fs/bcachefs/io_misc.c | 215 +++ fs/bcachefs/io_misc.h | 12 + fs/bcachefs/io_read.c | 1207 ++++++++++++++++ fs/bcachefs/io_read.h | 158 +++ fs/bcachefs/io_types.h | 165 --- fs/bcachefs/io_write.c | 1670 ++++++++++++++++++++++ fs/bcachefs/io_write.h | 110 ++ fs/bcachefs/io_write_types.h | 96 ++ fs/bcachefs/journal_io.c | 1 - fs/bcachefs/migrate.c | 2 +- fs/bcachefs/move.c | 3 +- fs/bcachefs/move.h | 1 + fs/bcachefs/movinggc.c | 8 - fs/bcachefs/rebalance.c | 2 - fs/bcachefs/reflink.c | 4 +- fs/bcachefs/super-io.c | 1 - fs/bcachefs/super.c | 9 +- 36 files changed, 3510 insertions(+), 3451 deletions(-) delete mode 100644 fs/bcachefs/io.c delete mode 100644 fs/bcachefs/io.h create mode 100644 fs/bcachefs/io_misc.c create mode 100644 fs/bcachefs/io_misc.h create mode 100644 fs/bcachefs/io_read.c create mode 100644 fs/bcachefs/io_read.h delete mode 100644 fs/bcachefs/io_types.h create mode 100644 fs/bcachefs/io_write.c create mode 100644 fs/bcachefs/io_write.h create mode 100644 fs/bcachefs/io_write_types.h (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 0a4d2fed66c1..9c00dabb26ac 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -46,7 +46,9 @@ bcachefs-y := \ fs-io-pagecache.o \ fsck.o \ inode.o \ - io.o \ + io_read.o \ + io_misc.o \ + io_write.o \ journal.o \ journal_io.o \ journal_reclaim.o \ diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index e02749ddc362..8e1888a89011 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -25,7 +25,7 @@ #include "disk_groups.h" #include "ec.h" #include "error.h" -#include "io.h" +#include "io_write.h" #include "journal.h" #include "movinggc.h" #include "nocow_locking.h" diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 0edbb73a5ec8..00f53cb5d44b 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -14,7 +14,7 @@ #include "debug.h" #include "error.h" #include "extents.h" -#include "io.h" +#include "io_write.h" #include "journal_reclaim.h" #include "journal_seq_blacklist.h" #include "recovery.h" diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index cd99bbb00a5a..7e03dd76fb38 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -7,7 +7,7 @@ #include "btree_locking.h" #include "checksum.h" #include "extents.h" -#include "io_types.h" +#include "io_write_types.h" struct bch_fs; struct btree_write; diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index 6b17f7cc5860..f1651807c2b7 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -3,7 +3,6 @@ #include "checksum.h" #include "compress.h" #include "extents.h" -#include "io.h" #include "super-io.h" #include diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 81518f20d37d..29576c4c109d 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -9,7 +9,7 @@ #include "ec.h" #include "error.h" #include "extents.h" -#include "io.h" +#include "io_write.h" #include "keylist.h" #include "move.h" #include "nocow_locking.h" diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 49e9055cbb52..7ca1f98d7e94 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -4,7 +4,7 @@ #define _BCACHEFS_DATA_UPDATE_H #include "bkey_buf.h" -#include "io_types.h" +#include "io_write_types.h" struct moving_context; diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index ae47e1854b80..5f3e65f9069e 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -19,7 +19,6 @@ #include "extents.h" #include "fsck.h" #include "inode.h" -#include "io.h" #include "super.h" #include diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 67a5453a36d9..40e72b96745a 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -11,10 +11,11 @@ #include "btree_update.h" #include "btree_write_buffer.h" #include "buckets.h" +#include "checksum.h" #include "disk_groups.h" #include "ec.h" #include "error.h" -#include "io.h" +#include "io_read.h" #include "keylist.h" #include "recovery.h" #include "replicas.h" diff --git a/fs/bcachefs/errcode.c b/fs/bcachefs/errcode.c index dc906fc9176f..8d58f2cca260 100644 --- a/fs/bcachefs/errcode.c +++ b/fs/bcachefs/errcode.c @@ -61,3 +61,10 @@ int __bch2_err_class(int err) return -err; } + +const char *bch2_blk_status_to_str(blk_status_t status) +{ + if (status == BLK_STS_REMOVED) + return "device removed"; + return blk_status_to_str(status); +} diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index f7fa87442e98..379d9d7ed333 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -249,4 +249,8 @@ static inline long bch2_err_class(long err) return err < 0 ? __bch2_err_class(err) : err; } +#define BLK_STS_REMOVED ((__force blk_status_t)128) + +const char *bch2_blk_status_to_str(blk_status_t); + #endif /* _BCACHFES_ERRCODE_H */ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 39009cf0c448..2a5af8872613 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "error.h" -#include "io.h" #include "super.h" #define FSCK_ERR_RATELIMIT_NR 10 diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index dc22182d532f..2034d635c718 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -8,7 +8,8 @@ #include "fs-io-buffered.h" #include "fs-io-direct.h" #include "fs-io-pagecache.h" -#include "io.h" +#include "io_read.h" +#include "io_write.h" #include #include diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 2b29abd24d56..219bc1124477 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -7,7 +7,8 @@ #include "fs-io.h" #include "fs-io-direct.h" #include "fs-io-pagecache.h" -#include "io.h" +#include "io_read.h" +#include "io_write.h" #include #include diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index ceab12fb8a8f..0b0b3b0d6c7d 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -3,6 +3,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" #include "clock.h" @@ -16,7 +17,7 @@ #include "fsck.h" #include "inode.h" #include "journal.h" -#include "io.h" +#include "io_misc.h" #include "keylist.h" #include "quota.h" #include "reflink.h" diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h index bb5b709fa8cf..bc6e8439d40b 100644 --- a/fs/bcachefs/fs-io.h +++ b/fs/bcachefs/fs-io.h @@ -6,7 +6,7 @@ #include "buckets.h" #include "fs.h" -#include "io_types.h" +#include "io_write_types.h" #include "quota.h" #include diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 08f810992a1b..0648874d54f3 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -19,7 +19,7 @@ #include "fs-io-pagecache.h" #include "fsck.h" #include "inode.h" -#include "io.h" +#include "io_read.h" #include "journal.h" #include "keylist.h" #include "quota.h" diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c deleted file mode 100644 index 3c614c864b6e..000000000000 --- a/fs/bcachefs/io.c +++ /dev/null @@ -1,3051 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Some low level IO code, and hacks for various block layer limitations - * - * Copyright 2010, 2011 Kent Overstreet - * Copyright 2012 Google, Inc. - */ - -#include "bcachefs.h" -#include "alloc_background.h" -#include "alloc_foreground.h" -#include "bkey_buf.h" -#include "bset.h" -#include "btree_update.h" -#include "buckets.h" -#include "checksum.h" -#include "compress.h" -#include "clock.h" -#include "data_update.h" -#include "debug.h" -#include "disk_groups.h" -#include "ec.h" -#include "error.h" -#include "extent_update.h" -#include "inode.h" -#include "io.h" -#include "journal.h" -#include "keylist.h" -#include "move.h" -#include "nocow_locking.h" -#include "rebalance.h" -#include "subvolume.h" -#include "super.h" -#include "super-io.h" -#include "trace.h" - -#include -#include -#include -#include - -const char *bch2_blk_status_to_str(blk_status_t status) -{ - if (status == BLK_STS_REMOVED) - return "device removed"; - return blk_status_to_str(status); -} - -#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT - -static bool bch2_target_congested(struct bch_fs *c, u16 target) -{ - const struct bch_devs_mask *devs; - unsigned d, nr = 0, total = 0; - u64 now = local_clock(), last; - s64 congested; - struct bch_dev *ca; - - if (!target) - return false; - - rcu_read_lock(); - devs = bch2_target_to_mask(c, target) ?: - &c->rw_devs[BCH_DATA_user]; - - for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) { - ca = rcu_dereference(c->devs[d]); - if (!ca) - continue; - - congested = atomic_read(&ca->congested); - last = READ_ONCE(ca->congested_last); - if (time_after64(now, last)) - congested -= (now - last) >> 12; - - total += max(congested, 0LL); - nr++; - } - rcu_read_unlock(); - - return bch2_rand_range(nr * CONGESTED_MAX) < total; -} - -static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency, - u64 now, int rw) -{ - u64 latency_capable = - ca->io_latency[rw].quantiles.entries[QUANTILE_IDX(1)].m; - /* ideally we'd be taking into account the device's variance here: */ - u64 latency_threshold = latency_capable << (rw == READ ? 2 : 3); - s64 latency_over = io_latency - latency_threshold; - - if (latency_threshold && latency_over > 0) { - /* - * bump up congested by approximately latency_over * 4 / - * latency_threshold - we don't need much accuracy here so don't - * bother with the divide: - */ - if (atomic_read(&ca->congested) < CONGESTED_MAX) - atomic_add(latency_over >> - max_t(int, ilog2(latency_threshold) - 2, 0), - &ca->congested); - - ca->congested_last = now; - } else if (atomic_read(&ca->congested) > 0) { - atomic_dec(&ca->congested); - } -} - -void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) -{ - atomic64_t *latency = &ca->cur_latency[rw]; - u64 now = local_clock(); - u64 io_latency = time_after64(now, submit_time) - ? now - submit_time - : 0; - u64 old, new, v = atomic64_read(latency); - - do { - old = v; - - /* - * If the io latency was reasonably close to the current - * latency, skip doing the update and atomic operation - most of - * the time: - */ - if (abs((int) (old - io_latency)) < (old >> 1) && - now & ~(~0U << 5)) - break; - - new = ewma_add(old, io_latency, 5); - } while ((v = atomic64_cmpxchg(latency, old, new)) != old); - - bch2_congested_acct(ca, io_latency, now, rw); - - __bch2_time_stats_update(&ca->io_latency[rw], submit_time, now); -} - -#else - -static bool bch2_target_congested(struct bch_fs *c, u16 target) -{ - return false; -} - -#endif - -/* Allocate, free from mempool: */ - -void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio) -{ - struct bvec_iter_all iter; - struct bio_vec *bv; - - bio_for_each_segment_all(bv, bio, iter) - if (bv->bv_page != ZERO_PAGE(0)) - mempool_free(bv->bv_page, &c->bio_bounce_pages); - bio->bi_vcnt = 0; -} - -static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool) -{ - struct page *page; - - if (likely(!*using_mempool)) { - page = alloc_page(GFP_NOFS); - if (unlikely(!page)) { - mutex_lock(&c->bio_bounce_pages_lock); - *using_mempool = true; - goto pool_alloc; - - } - } else { -pool_alloc: - page = mempool_alloc(&c->bio_bounce_pages, GFP_NOFS); - } - - return page; -} - -void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio, - size_t size) -{ - bool using_mempool = false; - - while (size) { - struct page *page = __bio_alloc_page_pool(c, &using_mempool); - unsigned len = min_t(size_t, PAGE_SIZE, size); - - BUG_ON(!bio_add_page(bio, page, len, 0)); - size -= len; - } - - if (using_mempool) - mutex_unlock(&c->bio_bounce_pages_lock); -} - -/* Extent update path: */ - -int bch2_sum_sector_overwrites(struct btree_trans *trans, - struct btree_iter *extent_iter, - struct bkey_i *new, - bool *usage_increasing, - s64 *i_sectors_delta, - s64 *disk_sectors_delta) -{ - struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_s_c old; - unsigned new_replicas = bch2_bkey_replicas(c, bkey_i_to_s_c(new)); - bool new_compressed = bch2_bkey_sectors_compressed(bkey_i_to_s_c(new)); - int ret = 0; - - *usage_increasing = false; - *i_sectors_delta = 0; - *disk_sectors_delta = 0; - - bch2_trans_copy_iter(&iter, extent_iter); - - for_each_btree_key_upto_continue_norestart(iter, - new->k.p, BTREE_ITER_SLOTS, old, ret) { - s64 sectors = min(new->k.p.offset, old.k->p.offset) - - max(bkey_start_offset(&new->k), - bkey_start_offset(old.k)); - - *i_sectors_delta += sectors * - (bkey_extent_is_allocation(&new->k) - - bkey_extent_is_allocation(old.k)); - - *disk_sectors_delta += sectors * bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new)); - *disk_sectors_delta -= new->k.p.snapshot == old.k->p.snapshot - ? sectors * bch2_bkey_nr_ptrs_fully_allocated(old) - : 0; - - if (!*usage_increasing && - (new->k.p.snapshot != old.k->p.snapshot || - new_replicas > bch2_bkey_replicas(c, old) || - (!new_compressed && bch2_bkey_sectors_compressed(old)))) - *usage_increasing = true; - - if (bkey_ge(old.k->p, new->k.p)) - break; - } - - bch2_trans_iter_exit(trans, &iter); - return ret; -} - -static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, - struct btree_iter *extent_iter, - u64 new_i_size, - s64 i_sectors_delta) -{ - struct btree_iter iter; - struct bkey_i *k; - struct bkey_i_inode_v3 *inode; - unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL; - int ret; - - k = bch2_bkey_get_mut_noupdate(trans, &iter, BTREE_ID_inodes, - SPOS(0, - extent_iter->pos.inode, - extent_iter->snapshot), - BTREE_ITER_CACHED); - ret = PTR_ERR_OR_ZERO(k); - if (unlikely(ret)) - return ret; - - if (unlikely(k->k.type != KEY_TYPE_inode_v3)) { - k = bch2_inode_to_v3(trans, k); - ret = PTR_ERR_OR_ZERO(k); - if (unlikely(ret)) - goto err; - } - - inode = bkey_i_to_inode_v3(k); - - if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_I_SIZE_DIRTY) && - new_i_size > le64_to_cpu(inode->v.bi_size)) { - inode->v.bi_size = cpu_to_le64(new_i_size); - inode_update_flags = 0; - } - - if (i_sectors_delta) { - le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta); - inode_update_flags = 0; - } - - if (inode->k.p.snapshot != iter.snapshot) { - inode->k.p.snapshot = iter.snapshot; - inode_update_flags = 0; - } - - ret = bch2_trans_update(trans, &iter, &inode->k_i, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| - inode_update_flags); -err: - bch2_trans_iter_exit(trans, &iter); - return ret; -} - -int bch2_extent_update(struct btree_trans *trans, - subvol_inum inum, - struct btree_iter *iter, - struct bkey_i *k, - struct disk_reservation *disk_res, - u64 new_i_size, - s64 *i_sectors_delta_total, - bool check_enospc) -{ - struct bpos next_pos; - bool usage_increasing; - s64 i_sectors_delta = 0, disk_sectors_delta = 0; - int ret; - - /* - * This traverses us the iterator without changing iter->path->pos to - * search_key() (which is pos + 1 for extents): we want there to be a - * path already traversed at iter->pos because - * bch2_trans_extent_update() will use it to attempt extent merging - */ - ret = __bch2_btree_iter_traverse(iter); - if (ret) - return ret; - - ret = bch2_extent_trim_atomic(trans, iter, k); - if (ret) - return ret; - - next_pos = k->k.p; - - ret = bch2_sum_sector_overwrites(trans, iter, k, - &usage_increasing, - &i_sectors_delta, - &disk_sectors_delta); - if (ret) - return ret; - - if (disk_res && - disk_sectors_delta > (s64) disk_res->sectors) { - ret = bch2_disk_reservation_add(trans->c, disk_res, - disk_sectors_delta - disk_res->sectors, - !check_enospc || !usage_increasing - ? BCH_DISK_RESERVATION_NOFAIL : 0); - if (ret) - return ret; - } - - /* - * Note: - * We always have to do an inode update - even when i_size/i_sectors - * aren't changing - for fsync to work properly; fsync relies on - * inode->bi_journal_seq which is updated by the trigger code: - */ - ret = bch2_extent_update_i_size_sectors(trans, iter, - min(k->k.p.offset << 9, new_i_size), - i_sectors_delta) ?: - bch2_trans_update(trans, iter, k, 0) ?: - bch2_trans_commit(trans, disk_res, NULL, - BTREE_INSERT_NOCHECK_RW| - BTREE_INSERT_NOFAIL); - if (unlikely(ret)) - return ret; - - if (i_sectors_delta_total) - *i_sectors_delta_total += i_sectors_delta; - bch2_btree_iter_set_pos(iter, next_pos); - return 0; -} - -/* Overwrites whatever was present with zeroes: */ -int bch2_extent_fallocate(struct btree_trans *trans, - subvol_inum inum, - struct btree_iter *iter, - unsigned sectors, - struct bch_io_opts opts, - s64 *i_sectors_delta, - struct write_point_specifier write_point) -{ - struct bch_fs *c = trans->c; - struct disk_reservation disk_res = { 0 }; - struct closure cl; - struct open_buckets open_buckets = { 0 }; - struct bkey_s_c k; - struct bkey_buf old, new; - unsigned sectors_allocated = 0; - bool have_reservation = false; - bool unwritten = opts.nocow && - c->sb.version >= bcachefs_metadata_version_unwritten_extents; - int ret; - - bch2_bkey_buf_init(&old); - bch2_bkey_buf_init(&new); - closure_init_stack(&cl); - - k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) - return ret; - - sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset); - - if (!have_reservation) { - unsigned new_replicas = - max(0, (int) opts.data_replicas - - (int) bch2_bkey_nr_ptrs_fully_allocated(k)); - /* - * Get a disk reservation before (in the nocow case) calling - * into the allocator: - */ - ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); - if (unlikely(ret)) - goto err; - - bch2_bkey_buf_reassemble(&old, c, k); - } - - if (have_reservation) { - if (!bch2_extents_match(k, bkey_i_to_s_c(old.k))) - goto err; - - bch2_key_resize(&new.k->k, sectors); - } else if (!unwritten) { - struct bkey_i_reservation *reservation; - - bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64)); - reservation = bkey_reservation_init(new.k); - reservation->k.p = iter->pos; - bch2_key_resize(&reservation->k, sectors); - reservation->v.nr_replicas = opts.data_replicas; - } else { - struct bkey_i_extent *e; - struct bch_devs_list devs_have; - struct write_point *wp; - struct bch_extent_ptr *ptr; - - devs_have.nr = 0; - - bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX); - - e = bkey_extent_init(new.k); - e->k.p = iter->pos; - - ret = bch2_alloc_sectors_start_trans(trans, - opts.foreground_target, - false, - write_point, - &devs_have, - opts.data_replicas, - opts.data_replicas, - BCH_WATERMARK_normal, 0, &cl, &wp); - if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) - ret = -BCH_ERR_transaction_restart_nested; - if (ret) - goto err; - - sectors = min(sectors, wp->sectors_free); - sectors_allocated = sectors; - - bch2_key_resize(&e->k, sectors); - - bch2_open_bucket_get(c, wp, &open_buckets); - bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false); - bch2_alloc_sectors_done(c, wp); - - extent_for_each_ptr(extent_i_to_s(e), ptr) - ptr->unwritten = true; - } - - have_reservation = true; - - ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res, - 0, i_sectors_delta, true); -err: - if (!ret && sectors_allocated) - bch2_increment_clock(c, sectors_allocated, WRITE); - - bch2_open_buckets_put(c, &open_buckets); - bch2_disk_reservation_put(c, &disk_res); - bch2_bkey_buf_exit(&new, c); - bch2_bkey_buf_exit(&old, c); - - if (closure_nr_remaining(&cl) != 1) { - bch2_trans_unlock(trans); - closure_sync(&cl); - } - - return ret; -} - -/* - * Returns -BCH_ERR_transacton_restart if we had to drop locks: - */ -int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, - subvol_inum inum, u64 end, - s64 *i_sectors_delta) -{ - struct bch_fs *c = trans->c; - unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); - struct bpos end_pos = POS(inum.inum, end); - struct bkey_s_c k; - int ret = 0, ret2 = 0; - u32 snapshot; - - while (!ret || - bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - struct disk_reservation disk_res = - bch2_disk_reservation_init(c, 0); - struct bkey_i delete; - - if (ret) - ret2 = ret; - - bch2_trans_begin(trans); - - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); - if (ret) - continue; - - bch2_btree_iter_set_snapshot(iter, snapshot); - - /* - * peek_upto() doesn't have ideal semantics for extents: - */ - k = bch2_btree_iter_peek_upto(iter, end_pos); - if (!k.k) - break; - - ret = bkey_err(k); - if (ret) - continue; - - bkey_init(&delete.k); - delete.k.p = iter->pos; - - /* create the biggest key we can */ - bch2_key_resize(&delete.k, max_sectors); - bch2_cut_back(end_pos, &delete); - - ret = bch2_extent_update(trans, inum, iter, &delete, - &disk_res, 0, i_sectors_delta, false); - bch2_disk_reservation_put(c, &disk_res); - } - - return ret ?: ret2; -} - -int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, - s64 *i_sectors_delta) -{ - struct btree_trans trans; - struct btree_iter iter; - int ret; - - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, - POS(inum.inum, start), - BTREE_ITER_INTENT); - - ret = bch2_fpunch_at(&trans, &iter, inum, end, i_sectors_delta); - - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - ret = 0; - - return ret; -} - -static int bch2_write_index_default(struct bch_write_op *op) -{ - struct bch_fs *c = op->c; - struct bkey_buf sk; - struct keylist *keys = &op->insert_keys; - struct bkey_i *k = bch2_keylist_front(keys); - struct btree_trans trans; - struct btree_iter iter; - subvol_inum inum = { - .subvol = op->subvol, - .inum = k->k.p.inode, - }; - int ret; - - BUG_ON(!inum.subvol); - - bch2_bkey_buf_init(&sk); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - - do { - bch2_trans_begin(&trans); - - k = bch2_keylist_front(keys); - bch2_bkey_buf_copy(&sk, c, k); - - ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, - &sk.k->k.p.snapshot); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) - break; - - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, - bkey_start_pos(&sk.k->k), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - - ret = bch2_extent_update(&trans, inum, &iter, sk.k, - &op->res, - op->new_i_size, &op->i_sectors_delta, - op->flags & BCH_WRITE_CHECK_ENOSPC); - bch2_trans_iter_exit(&trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret) - break; - - if (bkey_ge(iter.pos, k->k.p)) - bch2_keylist_pop_front(&op->insert_keys); - else - bch2_cut_front(iter.pos, k); - } while (!bch2_keylist_empty(keys)); - - bch2_trans_exit(&trans); - bch2_bkey_buf_exit(&sk, c); - - return ret; -} - -/* Writes */ - -void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, - enum bch_data_type type, - const struct bkey_i *k, - bool nocow) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k)); - const struct bch_extent_ptr *ptr; - struct bch_write_bio *n; - struct bch_dev *ca; - - BUG_ON(c->opts.nochanges); - - bkey_for_each_ptr(ptrs, ptr) { - BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX || - !c->devs[ptr->dev]); - - ca = bch_dev_bkey_exists(c, ptr->dev); - - if (to_entry(ptr + 1) < ptrs.end) { - n = to_wbio(bio_alloc_clone(NULL, &wbio->bio, - GFP_NOFS, &ca->replica_set)); - - n->bio.bi_end_io = wbio->bio.bi_end_io; - n->bio.bi_private = wbio->bio.bi_private; - n->parent = wbio; - n->split = true; - n->bounce = false; - n->put_bio = true; - n->bio.bi_opf = wbio->bio.bi_opf; - bio_inc_remaining(&wbio->bio); - } else { - n = wbio; - n->split = false; - } - - n->c = c; - n->dev = ptr->dev; - n->have_ioref = nocow || bch2_dev_get_ioref(ca, - type == BCH_DATA_btree ? READ : WRITE); - n->nocow = nocow; - n->submit_time = local_clock(); - n->inode_offset = bkey_start_offset(&k->k); - n->bio.bi_iter.bi_sector = ptr->offset; - - if (likely(n->have_ioref)) { - this_cpu_add(ca->io_done->sectors[WRITE][type], - bio_sectors(&n->bio)); - - bio_set_dev(&n->bio, ca->disk_sb.bdev); - - if (type != BCH_DATA_btree && unlikely(c->opts.no_data_io)) { - bio_endio(&n->bio); - continue; - } - - submit_bio(&n->bio); - } else { - n->bio.bi_status = BLK_STS_REMOVED; - bio_endio(&n->bio); - } - } -} - -static void __bch2_write(struct bch_write_op *); - -static void bch2_write_done(struct closure *cl) -{ - struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); - struct bch_fs *c = op->c; - - EBUG_ON(op->open_buckets.nr); - - bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time); - bch2_disk_reservation_put(c, &op->res); - - if (!(op->flags & BCH_WRITE_MOVE)) - bch2_write_ref_put(c, BCH_WRITE_REF_write); - bch2_keylist_free(&op->insert_keys, op->inline_keys); - - EBUG_ON(cl->parent); - closure_debug_destroy(cl); - if (op->end_io) - op->end_io(op); -} - -static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op) -{ - struct keylist *keys = &op->insert_keys; - struct bch_extent_ptr *ptr; - struct bkey_i *src, *dst = keys->keys, *n; - - for (src = keys->keys; src != keys->top; src = n) { - n = bkey_next(src); - - if (bkey_extent_is_direct_data(&src->k)) { - bch2_bkey_drop_ptrs(bkey_i_to_s(src), ptr, - test_bit(ptr->dev, op->failed.d)); - - if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src))) - return -EIO; - } - - if (dst != src) - memmove_u64s_down(dst, src, src->k.u64s); - dst = bkey_next(dst); - } - - keys->top = dst; - return 0; -} - -/** - * bch_write_index - after a write, update index to point to new data - */ -static void __bch2_write_index(struct bch_write_op *op) -{ - struct bch_fs *c = op->c; - struct keylist *keys = &op->insert_keys; - struct bkey_i *k; - unsigned dev; - int ret = 0; - - if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) { - ret = bch2_write_drop_io_error_ptrs(op); - if (ret) - goto err; - } - - /* - * probably not the ideal place to hook this in, but I don't - * particularly want to plumb io_opts all the way through the btree - * update stack right now - */ - for_each_keylist_key(keys, k) - bch2_rebalance_add_key(c, bkey_i_to_s_c(k), &op->opts); - - if (!bch2_keylist_empty(keys)) { - u64 sectors_start = keylist_sectors(keys); - - ret = !(op->flags & BCH_WRITE_MOVE) - ? bch2_write_index_default(op) - : bch2_data_update_index_update(op); - - BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); - BUG_ON(keylist_sectors(keys) && !ret); - - op->written += sectors_start - keylist_sectors(keys); - - if (ret && !bch2_err_matches(ret, EROFS)) { - struct bkey_i *k = bch2_keylist_front(&op->insert_keys); - - bch_err_inum_offset_ratelimited(c, - k->k.p.inode, k->k.p.offset << 9, - "write error while doing btree update: %s", - bch2_err_str(ret)); - } - - if (ret) - goto err; - } -out: - /* If some a bucket wasn't written, we can't erasure code it: */ - for_each_set_bit(dev, op->failed.d, BCH_SB_MEMBERS_MAX) - bch2_open_bucket_write_error(c, &op->open_buckets, dev); - - bch2_open_buckets_put(c, &op->open_buckets); - return; -err: - keys->top = keys->keys; - op->error = ret; - op->flags |= BCH_WRITE_DONE; - goto out; -} - -static inline void __wp_update_state(struct write_point *wp, enum write_point_state state) -{ - if (state != wp->state) { - u64 now = ktime_get_ns(); - - if (wp->last_state_change && - time_after64(now, wp->last_state_change)) - wp->time[wp->state] += now - wp->last_state_change; - wp->state = state; - wp->last_state_change = now; - } -} - -static inline void wp_update_state(struct write_point *wp, bool running) -{ - enum write_point_state state; - - state = running ? WRITE_POINT_running : - !list_empty(&wp->writes) ? WRITE_POINT_waiting_io - : WRITE_POINT_stopped; - - __wp_update_state(wp, state); -} - -static void bch2_write_index(struct closure *cl) -{ - struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); - struct write_point *wp = op->wp; - struct workqueue_struct *wq = index_update_wq(op); - unsigned long flags; - - if ((op->flags & BCH_WRITE_DONE) && - (op->flags & BCH_WRITE_MOVE)) - bch2_bio_free_pages_pool(op->c, &op->wbio.bio); - - spin_lock_irqsave(&wp->writes_lock, flags); - if (wp->state == WRITE_POINT_waiting_io) - __wp_update_state(wp, WRITE_POINT_waiting_work); - list_add_tail(&op->wp_list, &wp->writes); - spin_unlock_irqrestore (&wp->writes_lock, flags); - - queue_work(wq, &wp->index_update_work); -} - -static inline void bch2_write_queue(struct bch_write_op *op, struct write_point *wp) -{ - op->wp = wp; - - if (wp->state == WRITE_POINT_stopped) { - spin_lock_irq(&wp->writes_lock); - __wp_update_state(wp, WRITE_POINT_waiting_io); - spin_unlock_irq(&wp->writes_lock); - } -} - -void bch2_write_point_do_index_updates(struct work_struct *work) -{ - struct write_point *wp = - container_of(work, struct write_point, index_update_work); - struct bch_write_op *op; - - while (1) { - spin_lock_irq(&wp->writes_lock); - op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list); - if (op) - list_del(&op->wp_list); - wp_update_state(wp, op != NULL); - spin_unlock_irq(&wp->writes_lock); - - if (!op) - break; - - op->flags |= BCH_WRITE_IN_WORKER; - - __bch2_write_index(op); - - if (!(op->flags & BCH_WRITE_DONE)) - __bch2_write(op); - else - bch2_write_done(&op->cl); - } -} - -static void bch2_write_endio(struct bio *bio) -{ - struct closure *cl = bio->bi_private; - struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); - struct bch_write_bio *wbio = to_wbio(bio); - struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL; - struct bch_fs *c = wbio->c; - struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev); - - if (bch2_dev_inum_io_err_on(bio->bi_status, ca, - op->pos.inode, - wbio->inode_offset << 9, - "data write error: %s", - bch2_blk_status_to_str(bio->bi_status))) { - set_bit(wbio->dev, op->failed.d); - op->flags |= BCH_WRITE_IO_ERROR; - } - - if (wbio->nocow) - set_bit(wbio->dev, op->devs_need_flush->d); - - if (wbio->have_ioref) { - bch2_latency_acct(ca, wbio->submit_time, WRITE); - percpu_ref_put(&ca->io_ref); - } - - if (wbio->bounce) - bch2_bio_free_pages_pool(c, bio); - - if (wbio->put_bio) - bio_put(bio); - - if (parent) - bio_endio(&parent->bio); - else - closure_put(cl); -} - -static void init_append_extent(struct bch_write_op *op, - struct write_point *wp, - struct bversion version, - struct bch_extent_crc_unpacked crc) -{ - struct bkey_i_extent *e; - - op->pos.offset += crc.uncompressed_size; - - e = bkey_extent_init(op->insert_keys.top); - e->k.p = op->pos; - e->k.size = crc.uncompressed_size; - e->k.version = version; - - if (crc.csum_type || - crc.compression_type || - crc.nonce) - bch2_extent_crc_append(&e->k_i, crc); - - bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size, - op->flags & BCH_WRITE_CACHED); - - bch2_keylist_push(&op->insert_keys); -} - -static struct bio *bch2_write_bio_alloc(struct bch_fs *c, - struct write_point *wp, - struct bio *src, - bool *page_alloc_failed, - void *buf) -{ - struct bch_write_bio *wbio; - struct bio *bio; - unsigned output_available = - min(wp->sectors_free << 9, src->bi_iter.bi_size); - unsigned pages = DIV_ROUND_UP(output_available + - (buf - ? ((unsigned long) buf & (PAGE_SIZE - 1)) - : 0), PAGE_SIZE); - - pages = min(pages, BIO_MAX_VECS); - - bio = bio_alloc_bioset(NULL, pages, 0, - GFP_NOFS, &c->bio_write); - wbio = wbio_init(bio); - wbio->put_bio = true; - /* copy WRITE_SYNC flag */ - wbio->bio.bi_opf = src->bi_opf; - - if (buf) { - bch2_bio_map(bio, buf, output_available); - return bio; - } - - wbio->bounce = true; - - /* - * We can't use mempool for more than c->sb.encoded_extent_max - * worth of pages, but we'd like to allocate more if we can: - */ - bch2_bio_alloc_pages_pool(c, bio, - min_t(unsigned, output_available, - c->opts.encoded_extent_max)); - - if (bio->bi_iter.bi_size < output_available) - *page_alloc_failed = - bch2_bio_alloc_pages(bio, - output_available - - bio->bi_iter.bi_size, - GFP_NOFS) != 0; - - return bio; -} - -static int bch2_write_rechecksum(struct bch_fs *c, - struct bch_write_op *op, - unsigned new_csum_type) -{ - struct bio *bio = &op->wbio.bio; - struct bch_extent_crc_unpacked new_crc; - int ret; - - /* bch2_rechecksum_bio() can't encrypt or decrypt data: */ - - if (bch2_csum_type_is_encryption(op->crc.csum_type) != - bch2_csum_type_is_encryption(new_csum_type)) - new_csum_type = op->crc.csum_type; - - ret = bch2_rechecksum_bio(c, bio, op->version, op->crc, - NULL, &new_crc, - op->crc.offset, op->crc.live_size, - new_csum_type); - if (ret) - return ret; - - bio_advance(bio, op->crc.offset << 9); - bio->bi_iter.bi_size = op->crc.live_size << 9; - op->crc = new_crc; - return 0; -} - -static int bch2_write_decrypt(struct bch_write_op *op) -{ - struct bch_fs *c = op->c; - struct nonce nonce = extent_nonce(op->version, op->crc); - struct bch_csum csum; - int ret; - - if (!bch2_csum_type_is_encryption(op->crc.csum_type)) - return 0; - - /* - * If we need to decrypt data in the write path, we'll no longer be able - * to verify the existing checksum (poly1305 mac, in this case) after - * it's decrypted - this is the last point we'll be able to reverify the - * checksum: - */ - csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio); - if (bch2_crc_cmp(op->crc.csum, csum)) - return -EIO; - - ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio); - op->crc.csum_type = 0; - op->crc.csum = (struct bch_csum) { 0, 0 }; - return ret; -} - -static enum prep_encoded_ret { - PREP_ENCODED_OK, - PREP_ENCODED_ERR, - PREP_ENCODED_CHECKSUM_ERR, - PREP_ENCODED_DO_WRITE, -} bch2_write_prep_encoded_data(struct bch_write_op *op, struct write_point *wp) -{ - struct bch_fs *c = op->c; - struct bio *bio = &op->wbio.bio; - - if (!(op->flags & BCH_WRITE_DATA_ENCODED)) - return PREP_ENCODED_OK; - - BUG_ON(bio_sectors(bio) != op->crc.compressed_size); - - /* Can we just write the entire extent as is? */ - if (op->crc.uncompressed_size == op->crc.live_size && - op->crc.compressed_size <= wp->sectors_free && - (op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) || - op->incompressible)) { - if (!crc_is_compressed(op->crc) && - op->csum_type != op->crc.csum_type && - bch2_write_rechecksum(c, op, op->csum_type) && - !c->opts.no_data_io) - return PREP_ENCODED_CHECKSUM_ERR; - - return PREP_ENCODED_DO_WRITE; - } - - /* - * If the data is compressed and we couldn't write the entire extent as - * is, we have to decompress it: - */ - if (crc_is_compressed(op->crc)) { - struct bch_csum csum; - - if (bch2_write_decrypt(op)) - return PREP_ENCODED_CHECKSUM_ERR; - - /* Last point we can still verify checksum: */ - csum = bch2_checksum_bio(c, op->crc.csum_type, - extent_nonce(op->version, op->crc), - bio); - if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io) - return PREP_ENCODED_CHECKSUM_ERR; - - if (bch2_bio_uncompress_inplace(c, bio, &op->crc)) - return PREP_ENCODED_ERR; - } - - /* - * No longer have compressed data after this point - data might be - * encrypted: - */ - - /* - * If the data is checksummed and we're only writing a subset, - * rechecksum and adjust bio to point to currently live data: - */ - if ((op->crc.live_size != op->crc.uncompressed_size || - op->crc.csum_type != op->csum_type) && - bch2_write_rechecksum(c, op, op->csum_type) && - !c->opts.no_data_io) - return PREP_ENCODED_CHECKSUM_ERR; - - /* - * If we want to compress the data, it has to be decrypted: - */ - if ((op->compression_opt || - bch2_csum_type_is_encryption(op->crc.csum_type) != - bch2_csum_type_is_encryption(op->csum_type)) && - bch2_write_decrypt(op)) - return PREP_ENCODED_CHECKSUM_ERR; - - return PREP_ENCODED_OK; -} - -static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, - struct bio **_dst) -{ - struct bch_fs *c = op->c; - struct bio *src = &op->wbio.bio, *dst = src; - struct bvec_iter saved_iter; - void *ec_buf; - unsigned total_output = 0, total_input = 0; - bool bounce = false; - bool page_alloc_failed = false; - int ret, more = 0; - - BUG_ON(!bio_sectors(src)); - - ec_buf = bch2_writepoint_ec_buf(c, wp); - - switch (bch2_write_prep_encoded_data(op, wp)) { - case PREP_ENCODED_OK: - break; - case PREP_ENCODED_ERR: - ret = -EIO; - goto err; - case PREP_ENCODED_CHECKSUM_ERR: - goto csum_err; - case PREP_ENCODED_DO_WRITE: - /* XXX look for bug here */ - if (ec_buf) { - dst = bch2_write_bio_alloc(c, wp, src, - &page_alloc_failed, - ec_buf); - bio_copy_data(dst, src); - bounce = true; - } - init_append_extent(op, wp, op->version, op->crc); - goto do_write; - } - - if (ec_buf || - op->compression_opt || - (op->csum_type && - !(op->flags & BCH_WRITE_PAGES_STABLE)) || - (bch2_csum_type_is_encryption(op->csum_type) && - !(op->flags & BCH_WRITE_PAGES_OWNED))) { - dst = bch2_write_bio_alloc(c, wp, src, - &page_alloc_failed, - ec_buf); - bounce = true; - } - - saved_iter = dst->bi_iter; - - do { - struct bch_extent_crc_unpacked crc = { 0 }; - struct bversion version = op->version; - size_t dst_len, src_len; - - if (page_alloc_failed && - dst->bi_iter.bi_size < (wp->sectors_free << 9) && - dst->bi_iter.bi_size < c->opts.encoded_extent_max) - break; - - BUG_ON(op->compression_opt && - (op->flags & BCH_WRITE_DATA_ENCODED) && - bch2_csum_type_is_encryption(op->crc.csum_type)); - BUG_ON(op->compression_opt && !bounce); - - crc.compression_type = op->incompressible - ? BCH_COMPRESSION_TYPE_incompressible - : op->compression_opt - ? bch2_bio_compress(c, dst, &dst_len, src, &src_len, - op->compression_opt) - : 0; - if (!crc_is_compressed(crc)) { - dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size); - dst_len = min_t(unsigned, dst_len, wp->sectors_free << 9); - - if (op->csum_type) - dst_len = min_t(unsigned, dst_len, - c->opts.encoded_extent_max); - - if (bounce) { - swap(dst->bi_iter.bi_size, dst_len); - bio_copy_data(dst, src); - swap(dst->bi_iter.bi_size, dst_len); - } - - src_len = dst_len; - } - - BUG_ON(!src_len || !dst_len); - - if (bch2_csum_type_is_encryption(op->csum_type)) { - if (bversion_zero(version)) { - version.lo = atomic64_inc_return(&c->key_version); - } else { - crc.nonce = op->nonce; - op->nonce += src_len >> 9; - } - } - - if ((op->flags & BCH_WRITE_DATA_ENCODED) && - !crc_is_compressed(crc) && - bch2_csum_type_is_encryption(op->crc.csum_type) == - bch2_csum_type_is_encryption(op->csum_type)) { - u8 compression_type = crc.compression_type; - u16 nonce = crc.nonce; - /* - * Note: when we're using rechecksum(), we need to be - * checksumming @src because it has all the data our - * existing checksum covers - if we bounced (because we - * were trying to compress), @dst will only have the - * part of the data the new checksum will cover. - * - * But normally we want to be checksumming post bounce, - * because part of the reason for bouncing is so the - * data can't be modified (by userspace) while it's in - * flight. - */ - if (bch2_rechecksum_bio(c, src, version, op->crc, - &crc, &op->crc, - src_len >> 9, - bio_sectors(src) - (src_len >> 9), - op->csum_type)) - goto csum_err; - /* - * rchecksum_bio sets compression_type on crc from op->crc, - * this isn't always correct as sometimes we're changing - * an extent from uncompressed to incompressible. - */ - crc.compression_type = compression_type; - crc.nonce = nonce; - } else { - if ((op->flags & BCH_WRITE_DATA_ENCODED) && - bch2_rechecksum_bio(c, src, version, op->crc, - NULL, &op->crc, - src_len >> 9, - bio_sectors(src) - (src_len >> 9), - op->crc.csum_type)) - goto csum_err; - - crc.compressed_size = dst_len >> 9; - crc.uncompressed_size = src_len >> 9; - crc.live_size = src_len >> 9; - - swap(dst->bi_iter.bi_size, dst_len); - ret = bch2_encrypt_bio(c, op->csum_type, - extent_nonce(version, crc), dst); - if (ret) - goto err; - - crc.csum = bch2_checksum_bio(c, op->csum_type, - extent_nonce(version, crc), dst); - crc.csum_type = op->csum_type; - swap(dst->bi_iter.bi_size, dst_len); - } - - init_append_extent(op, wp, version, crc); - - if (dst != src) - bio_advance(dst, dst_len); - bio_advance(src, src_len); - total_output += dst_len; - total_input += src_len; - } while (dst->bi_iter.bi_size && - src->bi_iter.bi_size && - wp->sectors_free && - !bch2_keylist_realloc(&op->insert_keys, - op->inline_keys, - ARRAY_SIZE(op->inline_keys), - BKEY_EXTENT_U64s_MAX)); - - more = src->bi_iter.bi_size != 0; - - dst->bi_iter = saved_iter; - - if (dst == src && more) { - BUG_ON(total_output != total_input); - - dst = bio_split(src, total_input >> 9, - GFP_NOFS, &c->bio_write); - wbio_init(dst)->put_bio = true; - /* copy WRITE_SYNC flag */ - dst->bi_opf = src->bi_opf; - } - - dst->bi_iter.bi_size = total_output; -do_write: - *_dst = dst; - return more; -csum_err: - bch_err(c, "error verifying existing checksum while rewriting existing data (memory corruption?)"); - ret = -EIO; -err: - if (to_wbio(dst)->bounce) - bch2_bio_free_pages_pool(c, dst); - if (to_wbio(dst)->put_bio) - bio_put(dst); - - return ret; -} - -static bool bch2_extent_is_writeable(struct bch_write_op *op, - struct bkey_s_c k) -{ - struct bch_fs *c = op->c; - struct bkey_s_c_extent e; - struct extent_ptr_decoded p; - const union bch_extent_entry *entry; - unsigned replicas = 0; - - if (k.k->type != KEY_TYPE_extent) - return false; - - e = bkey_s_c_to_extent(k); - extent_for_each_ptr_decode(e, p, entry) { - if (p.crc.csum_type || - crc_is_compressed(p.crc) || - p.has_ec) - return false; - - replicas += bch2_extent_ptr_durability(c, &p); - } - - return replicas >= op->opts.data_replicas; -} - -static inline void bch2_nocow_write_unlock(struct bch_write_op *op) -{ - struct bch_fs *c = op->c; - const struct bch_extent_ptr *ptr; - struct bkey_i *k; - - for_each_keylist_key(&op->insert_keys, k) { - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k)); - - bkey_for_each_ptr(ptrs, ptr) - bch2_bucket_nocow_unlock(&c->nocow_locks, - PTR_BUCKET_POS(c, ptr), - BUCKET_NOCOW_LOCK_UPDATE); - } -} - -static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_i *orig, - struct bkey_s_c k, - u64 new_i_size) -{ - struct bkey_i *new; - struct bkey_ptrs ptrs; - struct bch_extent_ptr *ptr; - int ret; - - if (!bch2_extents_match(bkey_i_to_s_c(orig), k)) { - /* trace this */ - return 0; - } - - new = bch2_bkey_make_mut_noupdate(trans, k); - ret = PTR_ERR_OR_ZERO(new); - if (ret) - return ret; - - bch2_cut_front(bkey_start_pos(&orig->k), new); - bch2_cut_back(orig->k.p, new); - - ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); - bkey_for_each_ptr(ptrs, ptr) - ptr->unwritten = 0; - - /* - * Note that we're not calling bch2_subvol_get_snapshot() in this path - - * that was done when we kicked off the write, and here it's important - * that we update the extent that we wrote to - even if a snapshot has - * since been created. The write is still outstanding, so we're ok - * w.r.t. snapshot atomicity: - */ - return bch2_extent_update_i_size_sectors(trans, iter, - min(new->k.p.offset << 9, new_i_size), 0) ?: - bch2_trans_update(trans, iter, new, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -} - -static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) -{ - struct bch_fs *c = op->c; - struct btree_trans trans; - struct btree_iter iter; - struct bkey_i *orig; - struct bkey_s_c k; - int ret; - - bch2_trans_init(&trans, c, 0, 0); - - for_each_keylist_key(&op->insert_keys, orig) { - ret = for_each_btree_key_upto_commit(&trans, iter, BTREE_ID_extents, - bkey_start_pos(&orig->k), orig->k.p, - BTREE_ITER_INTENT, k, - NULL, NULL, BTREE_INSERT_NOFAIL, ({ - bch2_nocow_write_convert_one_unwritten(&trans, &iter, orig, k, op->new_i_size); - })); - - if (ret && !bch2_err_matches(ret, EROFS)) { - struct bkey_i *k = bch2_keylist_front(&op->insert_keys); - - bch_err_inum_offset_ratelimited(c, - k->k.p.inode, k->k.p.offset << 9, - "write error while doing btree update: %s", - bch2_err_str(ret)); - } - - if (ret) { - op->error = ret; - break; - } - } - - bch2_trans_exit(&trans); -} - -static void __bch2_nocow_write_done(struct bch_write_op *op) -{ - bch2_nocow_write_unlock(op); - - if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) { - op->error = -EIO; - } else if (unlikely(op->flags & BCH_WRITE_CONVERT_UNWRITTEN)) - bch2_nocow_write_convert_unwritten(op); -} - -static void bch2_nocow_write_done(struct closure *cl) -{ - struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); - - __bch2_nocow_write_done(op); - bch2_write_done(cl); -} - -static void bch2_nocow_write(struct bch_write_op *op) -{ - struct bch_fs *c = op->c; - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_ptrs_c ptrs; - const struct bch_extent_ptr *ptr; - struct { - struct bpos b; - unsigned gen; - struct nocow_lock_bucket *l; - } buckets[BCH_REPLICAS_MAX]; - unsigned nr_buckets = 0; - u32 snapshot; - int ret, i; - - if (op->flags & BCH_WRITE_MOVE) - return; - - bch2_trans_init(&trans, c, 0, 0); -retry: - bch2_trans_begin(&trans); - - ret = bch2_subvolume_get_snapshot(&trans, op->subvol, &snapshot); - if (unlikely(ret)) - goto err; - - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, - SPOS(op->pos.inode, op->pos.offset, snapshot), - BTREE_ITER_SLOTS); - while (1) { - struct bio *bio = &op->wbio.bio; - - nr_buckets = 0; - - k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - break; - - /* fall back to normal cow write path? */ - if (unlikely(k.k->p.snapshot != snapshot || - !bch2_extent_is_writeable(op, k))) - break; - - if (bch2_keylist_realloc(&op->insert_keys, - op->inline_keys, - ARRAY_SIZE(op->inline_keys), - k.k->u64s)) - break; - - /* Get iorefs before dropping btree locks: */ - ptrs = bch2_bkey_ptrs_c(k); - bkey_for_each_ptr(ptrs, ptr) { - buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr); - buckets[nr_buckets].gen = ptr->gen; - buckets[nr_buckets].l = - bucket_nocow_lock(&c->nocow_locks, - bucket_to_u64(buckets[nr_buckets].b)); - - prefetch(buckets[nr_buckets].l); - - if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE))) - goto err_get_ioref; - - nr_buckets++; - - if (ptr->unwritten) - op->flags |= BCH_WRITE_CONVERT_UNWRITTEN; - } - - /* Unlock before taking nocow locks, doing IO: */ - bkey_reassemble(op->insert_keys.top, k); - bch2_trans_unlock(&trans); - - bch2_cut_front(op->pos, op->insert_keys.top); - if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN) - bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top); - - for (i = 0; i < nr_buckets; i++) { - struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode); - struct nocow_lock_bucket *l = buckets[i].l; - bool stale; - - __bch2_bucket_nocow_lock(&c->nocow_locks, l, - bucket_to_u64(buckets[i].b), - BUCKET_NOCOW_LOCK_UPDATE); - - rcu_read_lock(); - stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen); - rcu_read_unlock(); - - if (unlikely(stale)) - goto err_bucket_stale; - } - - bio = &op->wbio.bio; - if (k.k->p.offset < op->pos.offset + bio_sectors(bio)) { - bio = bio_split(bio, k.k->p.offset - op->pos.offset, - GFP_KERNEL, &c->bio_write); - wbio_init(bio)->put_bio = true; - bio->bi_opf = op->wbio.bio.bi_opf; - } else { - op->flags |= BCH_WRITE_DONE; - } - - op->pos.offset += bio_sectors(bio); - op->written += bio_sectors(bio); - - bio->bi_end_io = bch2_write_endio; - bio->bi_private = &op->cl; - bio->bi_opf |= REQ_OP_WRITE; - closure_get(&op->cl); - bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user, - op->insert_keys.top, true); - - bch2_keylist_push(&op->insert_keys); - if (op->flags & BCH_WRITE_DONE) - break; - bch2_btree_iter_advance(&iter); - } -out: - bch2_trans_iter_exit(&trans, &iter); -err: - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - - if (ret) { - bch_err_inum_offset_ratelimited(c, - op->pos.inode, - op->pos.offset << 9, - "%s: btree lookup error %s", - __func__, bch2_err_str(ret)); - op->error = ret; - op->flags |= BCH_WRITE_DONE; - } - - bch2_trans_exit(&trans); - - /* fallback to cow write path? */ - if (!(op->flags & BCH_WRITE_DONE)) { - closure_sync(&op->cl); - __bch2_nocow_write_done(op); - op->insert_keys.top = op->insert_keys.keys; - } else if (op->flags & BCH_WRITE_SYNC) { - closure_sync(&op->cl); - bch2_nocow_write_done(&op->cl); - } else { - /* - * XXX - * needs to run out of process context because ei_quota_lock is - * a mutex - */ - continue_at(&op->cl, bch2_nocow_write_done, index_update_wq(op)); - } - return; -err_get_ioref: - for (i = 0; i < nr_buckets; i++) - percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref); - - /* Fall back to COW path: */ - goto out; -err_bucket_stale: - while (--i >= 0) - bch2_bucket_nocow_unlock(&c->nocow_locks, - buckets[i].b, - BUCKET_NOCOW_LOCK_UPDATE); - for (i = 0; i < nr_buckets; i++) - percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref); - - /* We can retry this: */ - ret = -BCH_ERR_transaction_restart; - goto out; -} - -static void __bch2_write(struct bch_write_op *op) -{ - struct bch_fs *c = op->c; - struct write_point *wp = NULL; - struct bio *bio = NULL; - unsigned nofs_flags; - int ret; - - nofs_flags = memalloc_nofs_save(); - - if (unlikely(op->opts.nocow && c->opts.nocow_enabled)) { - bch2_nocow_write(op); - if (op->flags & BCH_WRITE_DONE) - goto out_nofs_restore; - } -again: - memset(&op->failed, 0, sizeof(op->failed)); - - do { - struct bkey_i *key_to_write; - unsigned key_to_write_offset = op->insert_keys.top_p - - op->insert_keys.keys_p; - - /* +1 for possible cache device: */ - if (op->open_buckets.nr + op->nr_replicas + 1 > - ARRAY_SIZE(op->open_buckets.v)) - break; - - if (bch2_keylist_realloc(&op->insert_keys, - op->inline_keys, - ARRAY_SIZE(op->inline_keys), - BKEY_EXTENT_U64s_MAX)) - break; - - /* - * The copygc thread is now global, which means it's no longer - * freeing up space on specific disks, which means that - * allocations for specific disks may hang arbitrarily long: - */ - ret = bch2_trans_do(c, NULL, NULL, 0, - bch2_alloc_sectors_start_trans(&trans, - op->target, - op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED), - op->write_point, - &op->devs_have, - op->nr_replicas, - op->nr_replicas_required, - op->watermark, - op->flags, - (op->flags & (BCH_WRITE_ALLOC_NOWAIT| - BCH_WRITE_ONLY_SPECIFIED_DEVS)) - ? NULL : &op->cl, &wp)); - if (unlikely(ret)) { - if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) - break; - - goto err; - } - - EBUG_ON(!wp); - - bch2_open_bucket_get(c, wp, &op->open_buckets); - ret = bch2_write_extent(op, wp, &bio); - - bch2_alloc_sectors_done_inlined(c, wp); -err: - if (ret <= 0) { - op->flags |= BCH_WRITE_DONE; - - if (ret < 0) { - op->error = ret; - break; - } - } - - bio->bi_end_io = bch2_write_endio; - bio->bi_private = &op->cl; - bio->bi_opf |= REQ_OP_WRITE; - - closure_get(bio->bi_private); - - key_to_write = (void *) (op->insert_keys.keys_p + - key_to_write_offset); - - bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user, - key_to_write, false); - } while (ret); - - /* - * Sync or no? - * - * If we're running asynchronously, wne may still want to block - * synchronously here if we weren't able to submit all of the IO at - * once, as that signals backpressure to the caller. - */ - if ((op->flags & BCH_WRITE_SYNC) || - (!(op->flags & BCH_WRITE_DONE) && - !(op->flags & BCH_WRITE_IN_WORKER))) { - closure_sync(&op->cl); - __bch2_write_index(op); - - if (!(op->flags & BCH_WRITE_DONE)) - goto again; - bch2_write_done(&op->cl); - } else { - bch2_write_queue(op, wp); - continue_at(&op->cl, bch2_write_index, NULL); - } -out_nofs_restore: - memalloc_nofs_restore(nofs_flags); -} - -static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) -{ - struct bio *bio = &op->wbio.bio; - struct bvec_iter iter; - struct bkey_i_inline_data *id; - unsigned sectors; - int ret; - - op->flags |= BCH_WRITE_WROTE_DATA_INLINE; - op->flags |= BCH_WRITE_DONE; - - bch2_check_set_feature(op->c, BCH_FEATURE_inline_data); - - ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys, - ARRAY_SIZE(op->inline_keys), - BKEY_U64s + DIV_ROUND_UP(data_len, 8)); - if (ret) { - op->error = ret; - goto err; - } - - sectors = bio_sectors(bio); - op->pos.offset += sectors; - - id = bkey_inline_data_init(op->insert_keys.top); - id->k.p = op->pos; - id->k.version = op->version; - id->k.size = sectors; - - iter = bio->bi_iter; - iter.bi_size = data_len; - memcpy_from_bio(id->v.data, bio, iter); - - while (data_len & 7) - id->v.data[data_len++] = '\0'; - set_bkey_val_bytes(&id->k, data_len); - bch2_keylist_push(&op->insert_keys); - - __bch2_write_index(op); -err: - bch2_write_done(&op->cl); -} - -/** - * bch_write - handle a write to a cache device or flash only volume - * - * This is the starting point for any data to end up in a cache device; it could - * be from a normal write, or a writeback write, or a write to a flash only - * volume - it's also used by the moving garbage collector to compact data in - * mostly empty buckets. - * - * It first writes the data to the cache, creating a list of keys to be inserted - * (if the data won't fit in a single open bucket, there will be multiple keys); - * after the data is written it calls bch_journal, and after the keys have been - * added to the next journal write they're inserted into the btree. - * - * If op->discard is true, instead of inserting the data it invalidates the - * region of the cache represented by op->bio and op->inode. - */ -void bch2_write(struct closure *cl) -{ - struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); - struct bio *bio = &op->wbio.bio; - struct bch_fs *c = op->c; - unsigned data_len; - - EBUG_ON(op->cl.parent); - BUG_ON(!op->nr_replicas); - BUG_ON(!op->write_point.v); - BUG_ON(bkey_eq(op->pos, POS_MAX)); - - op->start_time = local_clock(); - bch2_keylist_init(&op->insert_keys, op->inline_keys); - wbio_init(bio)->put_bio = false; - - if (bio->bi_iter.bi_size & (c->opts.block_size - 1)) { - bch_err_inum_offset_ratelimited(c, - op->pos.inode, - op->pos.offset << 9, - "misaligned write"); - op->error = -EIO; - goto err; - } - - if (c->opts.nochanges) { - op->error = -BCH_ERR_erofs_no_writes; - goto err; - } - - if (!(op->flags & BCH_WRITE_MOVE) && - !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) { - op->error = -BCH_ERR_erofs_no_writes; - goto err; - } - - this_cpu_add(c->counters[BCH_COUNTER_io_write], bio_sectors(bio)); - bch2_increment_clock(c, bio_sectors(bio), WRITE); - - data_len = min_t(u64, bio->bi_iter.bi_size, - op->new_i_size - (op->pos.offset << 9)); - - if (c->opts.inline_data && - data_len <= min(block_bytes(c) / 2, 1024U)) { - bch2_write_data_inline(op, data_len); - return; - } - - __bch2_write(op); - return; -err: - bch2_disk_reservation_put(c, &op->res); - - closure_debug_destroy(&op->cl); - if (op->end_io) - op->end_io(op); -} - -static const char * const bch2_write_flags[] = { -#define x(f) #f, - BCH_WRITE_FLAGS() -#undef x - NULL -}; - -void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op) -{ - prt_str(out, "pos: "); - bch2_bpos_to_text(out, op->pos); - prt_newline(out); - printbuf_indent_add(out, 2); - - prt_str(out, "started: "); - bch2_pr_time_units(out, local_clock() - op->start_time); - prt_newline(out); - - prt_str(out, "flags: "); - prt_bitflags(out, bch2_write_flags, op->flags); - prt_newline(out); - - prt_printf(out, "ref: %u", closure_nr_remaining(&op->cl)); - prt_newline(out); - - printbuf_indent_sub(out, 2); -} - -/* Cache promotion on read */ - -struct promote_op { - struct rcu_head rcu; - u64 start_time; - - struct rhash_head hash; - struct bpos pos; - - struct data_update write; - struct bio_vec bi_inline_vecs[0]; /* must be last */ -}; - -static const struct rhashtable_params bch_promote_params = { - .head_offset = offsetof(struct promote_op, hash), - .key_offset = offsetof(struct promote_op, pos), - .key_len = sizeof(struct bpos), -}; - -static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k, - struct bpos pos, - struct bch_io_opts opts, - unsigned flags) -{ - if (!(flags & BCH_READ_MAY_PROMOTE)) - return false; - - if (!opts.promote_target) - return false; - - if (bch2_bkey_has_target(c, k, opts.promote_target)) - return false; - - if (bkey_extent_is_unwritten(k)) - return false; - - if (bch2_target_congested(c, opts.promote_target)) { - /* XXX trace this */ - return false; - } - - if (rhashtable_lookup_fast(&c->promote_table, &pos, - bch_promote_params)) - return false; - - return true; -} - -static void promote_free(struct bch_fs *c, struct promote_op *op) -{ - int ret; - - bch2_data_update_exit(&op->write); - - ret = rhashtable_remove_fast(&c->promote_table, &op->hash, - bch_promote_params); - BUG_ON(ret); - bch2_write_ref_put(c, BCH_WRITE_REF_promote); - kfree_rcu(op, rcu); -} - -static void promote_done(struct bch_write_op *wop) -{ - struct promote_op *op = - container_of(wop, struct promote_op, write.op); - struct bch_fs *c = op->write.op.c; - - bch2_time_stats_update(&c->times[BCH_TIME_data_promote], - op->start_time); - promote_free(c, op); -} - -static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) -{ - struct bio *bio = &op->write.op.wbio.bio; - - trace_and_count(op->write.op.c, read_promote, &rbio->bio); - - /* we now own pages: */ - BUG_ON(!rbio->bounce); - BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs); - - memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec, - sizeof(struct bio_vec) * rbio->bio.bi_vcnt); - swap(bio->bi_vcnt, rbio->bio.bi_vcnt); - - bch2_data_update_read_done(&op->write, rbio->pick.crc); -} - -static struct promote_op *__promote_alloc(struct btree_trans *trans, - enum btree_id btree_id, - struct bkey_s_c k, - struct bpos pos, - struct extent_ptr_decoded *pick, - struct bch_io_opts opts, - unsigned sectors, - struct bch_read_bio **rbio) -{ - struct bch_fs *c = trans->c; - struct promote_op *op = NULL; - struct bio *bio; - unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); - int ret; - - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) - return NULL; - - op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOFS); - if (!op) - goto err; - - op->start_time = local_clock(); - op->pos = pos; - - /* - * We don't use the mempool here because extents that aren't - * checksummed or compressed can be too big for the mempool: - */ - *rbio = kzalloc(sizeof(struct bch_read_bio) + - sizeof(struct bio_vec) * pages, - GFP_NOFS); - if (!*rbio) - goto err; - - rbio_init(&(*rbio)->bio, opts); - bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0); - - if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, - GFP_NOFS)) - goto err; - - (*rbio)->bounce = true; - (*rbio)->split = true; - (*rbio)->kmalloc = true; - - if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, - bch_promote_params)) - goto err; - - bio = &op->write.op.wbio.bio; - bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0); - - ret = bch2_data_update_init(trans, NULL, &op->write, - writepoint_hashed((unsigned long) current), - opts, - (struct data_update_opts) { - .target = opts.promote_target, - .extra_replicas = 1, - .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED, - }, - btree_id, k); - /* - * possible errors: -BCH_ERR_nocow_lock_blocked, - * -BCH_ERR_ENOSPC_disk_reservation: - */ - if (ret) { - ret = rhashtable_remove_fast(&c->promote_table, &op->hash, - bch_promote_params); - BUG_ON(ret); - goto err; - } - - op->write.op.end_io = promote_done; - - return op; -err: - if (*rbio) - bio_free_pages(&(*rbio)->bio); - kfree(*rbio); - *rbio = NULL; - kfree(op); - bch2_write_ref_put(c, BCH_WRITE_REF_promote); - return NULL; -} - -noinline -static struct promote_op *promote_alloc(struct btree_trans *trans, - struct bvec_iter iter, - struct bkey_s_c k, - struct extent_ptr_decoded *pick, - struct bch_io_opts opts, - unsigned flags, - struct bch_read_bio **rbio, - bool *bounce, - bool *read_full) -{ - struct bch_fs *c = trans->c; - bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents); - /* data might have to be decompressed in the write path: */ - unsigned sectors = promote_full - ? max(pick->crc.compressed_size, pick->crc.live_size) - : bvec_iter_sectors(iter); - struct bpos pos = promote_full - ? bkey_start_pos(k.k) - : POS(k.k->p.inode, iter.bi_sector); - struct promote_op *promote; - - if (!should_promote(c, k, pos, opts, flags)) - return NULL; - - promote = __promote_alloc(trans, - k.k->type == KEY_TYPE_reflink_v - ? BTREE_ID_reflink - : BTREE_ID_extents, - k, pos, pick, opts, sectors, rbio); - if (!promote) - return NULL; - - *bounce = true; - *read_full = promote_full; - return promote; -} - -/* Read */ - -#define READ_RETRY_AVOID 1 -#define READ_RETRY 2 -#define READ_ERR 3 - -enum rbio_context { - RBIO_CONTEXT_NULL, - RBIO_CONTEXT_HIGHPRI, - RBIO_CONTEXT_UNBOUND, -}; - -static inline struct bch_read_bio * -bch2_rbio_parent(struct bch_read_bio *rbio) -{ - return rbio->split ? rbio->parent : rbio; -} - -__always_inline -static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn, - enum rbio_context context, - struct workqueue_struct *wq) -{ - if (context <= rbio->context) { - fn(&rbio->work); - } else { - rbio->work.func = fn; - rbio->context = context; - queue_work(wq, &rbio->work); - } -} - -static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) -{ - BUG_ON(rbio->bounce && !rbio->split); - - if (rbio->promote) - promote_free(rbio->c, rbio->promote); - rbio->promote = NULL; - - if (rbio->bounce) - bch2_bio_free_pages_pool(rbio->c, &rbio->bio); - - if (rbio->split) { - struct bch_read_bio *parent = rbio->parent; - - if (rbio->kmalloc) - kfree(rbio); - else - bio_put(&rbio->bio); - - rbio = parent; - } - - return rbio; -} - -/* - * Only called on a top level bch_read_bio to complete an entire read request, - * not a split: - */ -static void bch2_rbio_done(struct bch_read_bio *rbio) -{ - if (rbio->start_time) - bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read], - rbio->start_time); - bio_endio(&rbio->bio); -} - -static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio, - struct bvec_iter bvec_iter, - struct bch_io_failures *failed, - unsigned flags) -{ - struct btree_trans trans; - struct btree_iter iter; - struct bkey_buf sk; - struct bkey_s_c k; - int ret; - - flags &= ~BCH_READ_LAST_FRAGMENT; - flags |= BCH_READ_MUST_CLONE; - - bch2_bkey_buf_init(&sk); - bch2_trans_init(&trans, c, 0, 0); - - bch2_trans_iter_init(&trans, &iter, rbio->data_btree, - rbio->read_pos, BTREE_ITER_SLOTS); -retry: - rbio->bio.bi_status = 0; - - k = bch2_btree_iter_peek_slot(&iter); - if (bkey_err(k)) - goto err; - - bch2_bkey_buf_reassemble(&sk, c, k); - k = bkey_i_to_s_c(sk.k); - bch2_trans_unlock(&trans); - - if (!bch2_bkey_matches_ptr(c, k, - rbio->pick.ptr, - rbio->data_pos.offset - - rbio->pick.crc.offset)) { - /* extent we wanted to read no longer exists: */ - rbio->hole = true; - goto out; - } - - ret = __bch2_read_extent(&trans, rbio, bvec_iter, - rbio->read_pos, - rbio->data_btree, - k, 0, failed, flags); - if (ret == READ_RETRY) - goto retry; - if (ret) - goto err; -out: - bch2_rbio_done(rbio); - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); - bch2_bkey_buf_exit(&sk, c); - return; -err: - rbio->bio.bi_status = BLK_STS_IOERR; - goto out; -} - -static void bch2_rbio_retry(struct work_struct *work) -{ - struct bch_read_bio *rbio = - container_of(work, struct bch_read_bio, work); - struct bch_fs *c = rbio->c; - struct bvec_iter iter = rbio->bvec_iter; - unsigned flags = rbio->flags; - subvol_inum inum = { - .subvol = rbio->subvol, - .inum = rbio->read_pos.inode, - }; - struct bch_io_failures failed = { .nr = 0 }; - - trace_and_count(c, read_retry, &rbio->bio); - - if (rbio->retry == READ_RETRY_AVOID) - bch2_mark_io_failure(&failed, &rbio->pick); - - rbio->bio.bi_status = 0; - - rbio = bch2_rbio_free(rbio); - - flags |= BCH_READ_IN_RETRY; - flags &= ~BCH_READ_MAY_PROMOTE; - - if (flags & BCH_READ_NODECODE) { - bch2_read_retry_nodecode(c, rbio, iter, &failed, flags); - } else { - flags &= ~BCH_READ_LAST_FRAGMENT; - flags |= BCH_READ_MUST_CLONE; - - __bch2_read(c, rbio, iter, inum, &failed, flags); - } -} - -static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, - blk_status_t error) -{ - rbio->retry = retry; - - if (rbio->flags & BCH_READ_IN_RETRY) - return; - - if (retry == READ_ERR) { - rbio = bch2_rbio_free(rbio); - - rbio->bio.bi_status = error; - bch2_rbio_done(rbio); - } else { - bch2_rbio_punt(rbio, bch2_rbio_retry, - RBIO_CONTEXT_UNBOUND, system_unbound_wq); - } -} - -static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, - struct bch_read_bio *rbio) -{ - struct bch_fs *c = rbio->c; - u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; - struct bch_extent_crc_unpacked new_crc; - struct btree_iter iter; - struct bkey_i *new; - struct bkey_s_c k; - int ret = 0; - - if (crc_is_compressed(rbio->pick.crc)) - return 0; - - k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos, - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - if ((ret = bkey_err(k))) - goto out; - - if (bversion_cmp(k.k->version, rbio->version) || - !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) - goto out; - - /* Extent was merged? */ - if (bkey_start_offset(k.k) < data_offset || - k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size) - goto out; - - if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version, - rbio->pick.crc, NULL, &new_crc, - bkey_start_offset(k.k) - data_offset, k.k->size, - rbio->pick.crc.csum_type)) { - bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)"); - ret = 0; - goto out; - } - - /* - * going to be temporarily appending another checksum entry: - */ - new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + - sizeof(struct bch_extent_crc128)); - if ((ret = PTR_ERR_OR_ZERO(new))) - goto out; - - bkey_reassemble(new, k); - - if (!bch2_bkey_narrow_crcs(new, new_crc)) - goto out; - - ret = bch2_trans_update(trans, &iter, new, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); -out: - bch2_trans_iter_exit(trans, &iter); - return ret; -} - -static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) -{ - bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL, - __bch2_rbio_narrow_crcs(&trans, rbio)); -} - -/* Inner part that may run in process context */ -static void __bch2_read_endio(struct work_struct *work) -{ - struct bch_read_bio *rbio = - container_of(work, struct bch_read_bio, work); - struct bch_fs *c = rbio->c; - struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev); - struct bio *src = &rbio->bio; - struct bio *dst = &bch2_rbio_parent(rbio)->bio; - struct bvec_iter dst_iter = rbio->bvec_iter; - struct bch_extent_crc_unpacked crc = rbio->pick.crc; - struct nonce nonce = extent_nonce(rbio->version, crc); - unsigned nofs_flags; - struct bch_csum csum; - int ret; - - nofs_flags = memalloc_nofs_save(); - - /* Reset iterator for checksumming and copying bounced data: */ - if (rbio->bounce) { - src->bi_iter.bi_size = crc.compressed_size << 9; - src->bi_iter.bi_idx = 0; - src->bi_iter.bi_bvec_done = 0; - } else { - src->bi_iter = rbio->bvec_iter; - } - - csum = bch2_checksum_bio(c, crc.csum_type, nonce, src); - if (bch2_crc_cmp(csum, rbio->pick.crc.csum) && !c->opts.no_data_io) - goto csum_err; - - /* - * XXX - * We need to rework the narrow_crcs path to deliver the read completion - * first, and then punt to a different workqueue, otherwise we're - * holding up reads while doing btree updates which is bad for memory - * reclaim. - */ - if (unlikely(rbio->narrow_crcs)) - bch2_rbio_narrow_crcs(rbio); - - if (rbio->flags & BCH_READ_NODECODE) - goto nodecode; - - /* Adjust crc to point to subset of data we want: */ - crc.offset += rbio->offset_into_extent; - crc.live_size = bvec_iter_sectors(rbio->bvec_iter); - - if (crc_is_compressed(crc)) { - ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); - if (ret) - goto decrypt_err; - - if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) && - !c->opts.no_data_io) - goto decompression_err; - } else { - /* don't need to decrypt the entire bio: */ - nonce = nonce_add(nonce, crc.offset << 9); - bio_advance(src, crc.offset << 9); - - BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size); - src->bi_iter.bi_size = dst_iter.bi_size; - - ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); - if (ret) - goto decrypt_err; - - if (rbio->bounce) { - struct bvec_iter src_iter = src->bi_iter; - - bio_copy_data_iter(dst, &dst_iter, src, &src_iter); - } - } - - if (rbio->promote) { - /* - * Re encrypt data we decrypted, so it's consistent with - * rbio->crc: - */ - ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); - if (ret) - goto decrypt_err; - - promote_start(rbio->promote, rbio); - rbio->promote = NULL; - } -nodecode: - if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) { - rbio = bch2_rbio_free(rbio); - bch2_rbio_done(rbio); - } -out: - memalloc_nofs_restore(nofs_flags); - return; -csum_err: - /* - * Checksum error: if the bio wasn't bounced, we may have been - * reading into buffers owned by userspace (that userspace can - * scribble over) - retry the read, bouncing it this time: - */ - if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) { - rbio->flags |= BCH_READ_MUST_BOUNCE; - bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR); - goto out; - } - - bch_err_inum_offset_ratelimited(ca, - rbio->read_pos.inode, - rbio->read_pos.offset << 9, - "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)", - rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo, - csum.hi, csum.lo, bch2_csum_types[crc.csum_type]); - bch2_io_error(ca); - bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); - goto out; -decompression_err: - bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, - rbio->read_pos.offset << 9, - "decompression error"); - bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); - goto out; -decrypt_err: - bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, - rbio->read_pos.offset << 9, - "decrypt error"); - bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); - goto out; -} - -static void bch2_read_endio(struct bio *bio) -{ - struct bch_read_bio *rbio = - container_of(bio, struct bch_read_bio, bio); - struct bch_fs *c = rbio->c; - struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev); - struct workqueue_struct *wq = NULL; - enum rbio_context context = RBIO_CONTEXT_NULL; - - if (rbio->have_ioref) { - bch2_latency_acct(ca, rbio->submit_time, READ); - percpu_ref_put(&ca->io_ref); - } - - if (!rbio->split) - rbio->bio.bi_end_io = rbio->end_io; - - if (bch2_dev_inum_io_err_on(bio->bi_status, ca, - rbio->read_pos.inode, - rbio->read_pos.offset, - "data read error: %s", - bch2_blk_status_to_str(bio->bi_status))) { - bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); - return; - } - - if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) || - ptr_stale(ca, &rbio->pick.ptr)) { - trace_and_count(c, read_reuse_race, &rbio->bio); - - if (rbio->flags & BCH_READ_RETRY_IF_STALE) - bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN); - else - bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN); - return; - } - - if (rbio->narrow_crcs || - rbio->promote || - crc_is_compressed(rbio->pick.crc) || - bch2_csum_type_is_encryption(rbio->pick.crc.csum_type)) - context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq; - else if (rbio->pick.crc.csum_type) - context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq; - - bch2_rbio_punt(rbio, __bch2_read_endio, context, wq); -} - -int __bch2_read_indirect_extent(struct btree_trans *trans, - unsigned *offset_into_extent, - struct bkey_buf *orig_k) -{ - struct btree_iter iter; - struct bkey_s_c k; - u64 reflink_offset; - int ret; - - reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) + - *offset_into_extent; - - k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink, - POS(0, reflink_offset), 0); - ret = bkey_err(k); - if (ret) - goto err; - - if (k.k->type != KEY_TYPE_reflink_v && - k.k->type != KEY_TYPE_indirect_inline_data) { - bch_err_inum_offset_ratelimited(trans->c, - orig_k->k->k.p.inode, - orig_k->k->k.p.offset << 9, - "%llu len %u points to nonexistent indirect extent %llu", - orig_k->k->k.p.offset, - orig_k->k->k.size, - reflink_offset); - bch2_inconsistent_error(trans->c); - ret = -EIO; - goto err; - } - - *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); - bch2_bkey_buf_reassemble(orig_k, trans->c, k); -err: - bch2_trans_iter_exit(trans, &iter); - return ret; -} - -static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, - struct bkey_s_c k, - struct bch_extent_ptr ptr) -{ - struct bch_fs *c = trans->c; - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev); - struct btree_iter iter; - struct printbuf buf = PRINTBUF; - int ret; - - bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, - PTR_BUCKET_POS(c, &ptr), - BTREE_ITER_CACHED); - - prt_printf(&buf, "Attempting to read from stale dirty pointer:"); - printbuf_indent_add(&buf, 2); - prt_newline(&buf); - - bch2_bkey_val_to_text(&buf, c, k); - prt_newline(&buf); - - prt_printf(&buf, "memory gen: %u", *bucket_gen(ca, iter.pos.offset)); - - ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); - if (!ret) { - prt_newline(&buf); - bch2_bkey_val_to_text(&buf, c, k); - } - - bch2_fs_inconsistent(c, "%s", buf.buf); - - bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); -} - -int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - struct bvec_iter iter, struct bpos read_pos, - enum btree_id data_btree, struct bkey_s_c k, - unsigned offset_into_extent, - struct bch_io_failures *failed, unsigned flags) -{ - struct bch_fs *c = trans->c; - struct extent_ptr_decoded pick; - struct bch_read_bio *rbio = NULL; - struct bch_dev *ca = NULL; - struct promote_op *promote = NULL; - bool bounce = false, read_full = false, narrow_crcs = false; - struct bpos data_pos = bkey_start_pos(k.k); - int pick_ret; - - if (bkey_extent_is_inline_data(k.k)) { - unsigned bytes = min_t(unsigned, iter.bi_size, - bkey_inline_data_bytes(k.k)); - - swap(iter.bi_size, bytes); - memcpy_to_bio(&orig->bio, iter, bkey_inline_data_p(k)); - swap(iter.bi_size, bytes); - bio_advance_iter(&orig->bio, &iter, bytes); - zero_fill_bio_iter(&orig->bio, iter); - goto out_read_done; - } -retry_pick: - pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); - - /* hole or reservation - just zero fill: */ - if (!pick_ret) - goto hole; - - if (pick_ret < 0) { - bch_err_inum_offset_ratelimited(c, - read_pos.inode, read_pos.offset << 9, - "no device to read from"); - goto err; - } - - ca = bch_dev_bkey_exists(c, pick.ptr.dev); - - /* - * Stale dirty pointers are treated as IO errors, but @failed isn't - * allocated unless we're in the retry path - so if we're not in the - * retry path, don't check here, it'll be caught in bch2_read_endio() - * and we'll end up in the retry path: - */ - if ((flags & BCH_READ_IN_RETRY) && - !pick.ptr.cached && - unlikely(ptr_stale(ca, &pick.ptr))) { - read_from_stale_dirty_pointer(trans, k, pick.ptr); - bch2_mark_io_failure(failed, &pick); - goto retry_pick; - } - - /* - * Unlock the iterator while the btree node's lock is still in - * cache, before doing the IO: - */ - bch2_trans_unlock(trans); - - if (flags & BCH_READ_NODECODE) { - /* - * can happen if we retry, and the extent we were going to read - * has been merged in the meantime: - */ - if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS) - goto hole; - - iter.bi_size = pick.crc.compressed_size << 9; - goto get_bio; - } - - if (!(flags & BCH_READ_LAST_FRAGMENT) || - bio_flagged(&orig->bio, BIO_CHAIN)) - flags |= BCH_READ_MUST_CLONE; - - narrow_crcs = !(flags & BCH_READ_IN_RETRY) && - bch2_can_narrow_extent_crcs(k, pick.crc); - - if (narrow_crcs && (flags & BCH_READ_USER_MAPPED)) - flags |= BCH_READ_MUST_BOUNCE; - - EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); - - if (crc_is_compressed(pick.crc) || - (pick.crc.csum_type != BCH_CSUM_none && - (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || - (bch2_csum_type_is_encryption(pick.crc.csum_type) && - (flags & BCH_READ_USER_MAPPED)) || - (flags & BCH_READ_MUST_BOUNCE)))) { - read_full = true; - bounce = true; - } - - if (orig->opts.promote_target) - promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags, - &rbio, &bounce, &read_full); - - if (!read_full) { - EBUG_ON(crc_is_compressed(pick.crc)); - EBUG_ON(pick.crc.csum_type && - (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || - bvec_iter_sectors(iter) != pick.crc.live_size || - pick.crc.offset || - offset_into_extent)); - - data_pos.offset += offset_into_extent; - pick.ptr.offset += pick.crc.offset + - offset_into_extent; - offset_into_extent = 0; - pick.crc.compressed_size = bvec_iter_sectors(iter); - pick.crc.uncompressed_size = bvec_iter_sectors(iter); - pick.crc.offset = 0; - pick.crc.live_size = bvec_iter_sectors(iter); - offset_into_extent = 0; - } -get_bio: - if (rbio) { - /* - * promote already allocated bounce rbio: - * promote needs to allocate a bio big enough for uncompressing - * data in the write path, but we're not going to use it all - * here: - */ - EBUG_ON(rbio->bio.bi_iter.bi_size < - pick.crc.compressed_size << 9); - rbio->bio.bi_iter.bi_size = - pick.crc.compressed_size << 9; - } else if (bounce) { - unsigned sectors = pick.crc.compressed_size; - - rbio = rbio_init(bio_alloc_bioset(NULL, - DIV_ROUND_UP(sectors, PAGE_SECTORS), - 0, - GFP_NOFS, - &c->bio_read_split), - orig->opts); - - bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9); - rbio->bounce = true; - rbio->split = true; - } else if (flags & BCH_READ_MUST_CLONE) { - /* - * Have to clone if there were any splits, due to error - * reporting issues (if a split errored, and retrying didn't - * work, when it reports the error to its parent (us) we don't - * know if the error was from our bio, and we should retry, or - * from the whole bio, in which case we don't want to retry and - * lose the error) - */ - rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS, - &c->bio_read_split), - orig->opts); - rbio->bio.bi_iter = iter; - rbio->split = true; - } else { - rbio = orig; - rbio->bio.bi_iter = iter; - EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN)); - } - - EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size); - - rbio->c = c; - rbio->submit_time = local_clock(); - if (rbio->split) - rbio->parent = orig; - else - rbio->end_io = orig->bio.bi_end_io; - rbio->bvec_iter = iter; - rbio->offset_into_extent= offset_into_extent; - rbio->flags = flags; - rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ); - rbio->narrow_crcs = narrow_crcs; - rbio->hole = 0; - rbio->retry = 0; - rbio->context = 0; - /* XXX: only initialize this if needed */ - rbio->devs_have = bch2_bkey_devs(k); - rbio->pick = pick; - rbio->subvol = orig->subvol; - rbio->read_pos = read_pos; - rbio->data_btree = data_btree; - rbio->data_pos = data_pos; - rbio->version = k.k->version; - rbio->promote = promote; - INIT_WORK(&rbio->work, NULL); - - rbio->bio.bi_opf = orig->bio.bi_opf; - rbio->bio.bi_iter.bi_sector = pick.ptr.offset; - rbio->bio.bi_end_io = bch2_read_endio; - - if (rbio->bounce) - trace_and_count(c, read_bounce, &rbio->bio); - - this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio)); - bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); - - /* - * If it's being moved internally, we don't want to flag it as a cache - * hit: - */ - if (pick.ptr.cached && !(flags & BCH_READ_NODECODE)) - bch2_bucket_io_time_reset(trans, pick.ptr.dev, - PTR_BUCKET_NR(ca, &pick.ptr), READ); - - if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) { - bio_inc_remaining(&orig->bio); - trace_and_count(c, read_split, &orig->bio); - } - - if (!rbio->pick.idx) { - if (!rbio->have_ioref) { - bch_err_inum_offset_ratelimited(c, - read_pos.inode, - read_pos.offset << 9, - "no device to read from"); - bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); - goto out; - } - - this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user], - bio_sectors(&rbio->bio)); - bio_set_dev(&rbio->bio, ca->disk_sb.bdev); - - if (unlikely(c->opts.no_data_io)) { - if (likely(!(flags & BCH_READ_IN_RETRY))) - bio_endio(&rbio->bio); - } else { - if (likely(!(flags & BCH_READ_IN_RETRY))) - submit_bio(&rbio->bio); - else - submit_bio_wait(&rbio->bio); - } - - /* - * We just submitted IO which may block, we expect relock fail - * events and shouldn't count them: - */ - trans->notrace_relock_fail = true; - } else { - /* Attempting reconstruct read: */ - if (bch2_ec_read_extent(c, rbio)) { - bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); - goto out; - } - - if (likely(!(flags & BCH_READ_IN_RETRY))) - bio_endio(&rbio->bio); - } -out: - if (likely(!(flags & BCH_READ_IN_RETRY))) { - return 0; - } else { - int ret; - - rbio->context = RBIO_CONTEXT_UNBOUND; - bch2_read_endio(&rbio->bio); - - ret = rbio->retry; - rbio = bch2_rbio_free(rbio); - - if (ret == READ_RETRY_AVOID) { - bch2_mark_io_failure(failed, &pick); - ret = READ_RETRY; - } - - if (!ret) - goto out_read_done; - - return ret; - } - -err: - if (flags & BCH_READ_IN_RETRY) - return READ_ERR; - - orig->bio.bi_status = BLK_STS_IOERR; - goto out_read_done; - -hole: - /* - * won't normally happen in the BCH_READ_NODECODE - * (bch2_move_extent()) path, but if we retry and the extent we wanted - * to read no longer exists we have to signal that: - */ - if (flags & BCH_READ_NODECODE) - orig->hole = true; - - zero_fill_bio_iter(&orig->bio, iter); -out_read_done: - if (flags & BCH_READ_LAST_FRAGMENT) - bch2_rbio_done(orig); - return 0; -} - -void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - struct bvec_iter bvec_iter, subvol_inum inum, - struct bch_io_failures *failed, unsigned flags) -{ - struct btree_trans trans; - struct btree_iter iter; - struct bkey_buf sk; - struct bkey_s_c k; - u32 snapshot; - int ret; - - BUG_ON(flags & BCH_READ_NODECODE); - - bch2_bkey_buf_init(&sk); - bch2_trans_init(&trans, c, 0, 0); -retry: - bch2_trans_begin(&trans); - iter = (struct btree_iter) { NULL }; - - ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); - if (ret) - goto err; - - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, - SPOS(inum.inum, bvec_iter.bi_sector, snapshot), - BTREE_ITER_SLOTS); - while (1) { - unsigned bytes, sectors, offset_into_extent; - enum btree_id data_btree = BTREE_ID_extents; - - /* - * read_extent -> io_time_reset may cause a transaction restart - * without returning an error, we need to check for that here: - */ - ret = bch2_trans_relock(&trans); - if (ret) - break; - - bch2_btree_iter_set_pos(&iter, - POS(inum.inum, bvec_iter.bi_sector)); - - k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - break; - - offset_into_extent = iter.pos.offset - - bkey_start_offset(k.k); - sectors = k.k->size - offset_into_extent; - - bch2_bkey_buf_reassemble(&sk, c, k); - - ret = bch2_read_indirect_extent(&trans, &data_btree, - &offset_into_extent, &sk); - if (ret) - break; - - k = bkey_i_to_s_c(sk.k); - - /* - * With indirect extents, the amount of data to read is the min - * of the original extent and the indirect extent: - */ - sectors = min(sectors, k.k->size - offset_into_extent); - - bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; - swap(bvec_iter.bi_size, bytes); - - if (bvec_iter.bi_size == bytes) - flags |= BCH_READ_LAST_FRAGMENT; - - ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter.pos, - data_btree, k, - offset_into_extent, failed, flags); - if (ret) - break; - - if (flags & BCH_READ_LAST_FRAGMENT) - break; - - swap(bvec_iter.bi_size, bytes); - bio_advance_iter(&rbio->bio, &bvec_iter, bytes); - - ret = btree_trans_too_many_iters(&trans); - if (ret) - break; - } -err: - bch2_trans_iter_exit(&trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || - ret == READ_RETRY || - ret == READ_RETRY_AVOID) - goto retry; - - bch2_trans_exit(&trans); - bch2_bkey_buf_exit(&sk, c); - - if (ret) { - bch_err_inum_offset_ratelimited(c, inum.inum, - bvec_iter.bi_sector << 9, - "read error %i from btree lookup", ret); - rbio->bio.bi_status = BLK_STS_IOERR; - bch2_rbio_done(rbio); - } -} - -void bch2_fs_io_exit(struct bch_fs *c) -{ - if (c->promote_table.tbl) - rhashtable_destroy(&c->promote_table); - mempool_exit(&c->bio_bounce_pages); - bioset_exit(&c->bio_write); - bioset_exit(&c->bio_read_split); - bioset_exit(&c->bio_read); -} - -int bch2_fs_io_init(struct bch_fs *c) -{ - if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio), - BIOSET_NEED_BVECS)) - return -BCH_ERR_ENOMEM_bio_read_init; - - if (bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio), - BIOSET_NEED_BVECS)) - return -BCH_ERR_ENOMEM_bio_read_split_init; - - if (bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio), - BIOSET_NEED_BVECS)) - return -BCH_ERR_ENOMEM_bio_write_init; - - if (mempool_init_page_pool(&c->bio_bounce_pages, - max_t(unsigned, - c->opts.btree_node_size, - c->opts.encoded_extent_max) / - PAGE_SIZE, 0)) - return -BCH_ERR_ENOMEM_bio_bounce_pages_init; - - if (rhashtable_init(&c->promote_table, &bch_promote_params)) - return -BCH_ERR_ENOMEM_promote_table_init; - - return 0; -} diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h deleted file mode 100644 index 831e3f1b7e41..000000000000 --- a/fs/bcachefs/io.h +++ /dev/null @@ -1,202 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _BCACHEFS_IO_H -#define _BCACHEFS_IO_H - -#include "checksum.h" -#include "bkey_buf.h" -#include "io_types.h" - -#define to_wbio(_bio) \ - container_of((_bio), struct bch_write_bio, bio) - -#define to_rbio(_bio) \ - container_of((_bio), struct bch_read_bio, bio) - -void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *); -void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t); - -#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT -void bch2_latency_acct(struct bch_dev *, u64, int); -#else -static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {} -#endif - -void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *, - enum bch_data_type, const struct bkey_i *, bool); - -#define BLK_STS_REMOVED ((__force blk_status_t)128) - -const char *bch2_blk_status_to_str(blk_status_t); - -#define BCH_WRITE_FLAGS() \ - x(ALLOC_NOWAIT) \ - x(CACHED) \ - x(DATA_ENCODED) \ - x(PAGES_STABLE) \ - x(PAGES_OWNED) \ - x(ONLY_SPECIFIED_DEVS) \ - x(WROTE_DATA_INLINE) \ - x(FROM_INTERNAL) \ - x(CHECK_ENOSPC) \ - x(SYNC) \ - x(MOVE) \ - x(IN_WORKER) \ - x(DONE) \ - x(IO_ERROR) \ - x(CONVERT_UNWRITTEN) - -enum __bch_write_flags { -#define x(f) __BCH_WRITE_##f, - BCH_WRITE_FLAGS() -#undef x -}; - -enum bch_write_flags { -#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f), - BCH_WRITE_FLAGS() -#undef x -}; - -static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op) -{ - return op->watermark == BCH_WATERMARK_copygc - ? op->c->copygc_wq - : op->c->btree_update_wq; -} - -int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *, - struct bkey_i *, bool *, s64 *, s64 *); -int bch2_extent_update(struct btree_trans *, subvol_inum, - struct btree_iter *, struct bkey_i *, - struct disk_reservation *, u64, s64 *, bool); -int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *, - unsigned, struct bch_io_opts, s64 *, - struct write_point_specifier); - -int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, - subvol_inum, u64, s64 *); -int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); - -static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c, - struct bch_io_opts opts) -{ - op->c = c; - op->end_io = NULL; - op->flags = 0; - op->written = 0; - op->error = 0; - op->csum_type = bch2_data_checksum_type(c, opts); - op->compression_opt = opts.compression; - op->nr_replicas = 0; - op->nr_replicas_required = c->opts.data_replicas_required; - op->watermark = BCH_WATERMARK_normal; - op->incompressible = 0; - op->open_buckets.nr = 0; - op->devs_have.nr = 0; - op->target = 0; - op->opts = opts; - op->subvol = 0; - op->pos = POS_MAX; - op->version = ZERO_VERSION; - op->write_point = (struct write_point_specifier) { 0 }; - op->res = (struct disk_reservation) { 0 }; - op->new_i_size = U64_MAX; - op->i_sectors_delta = 0; - op->devs_need_flush = NULL; -} - -void bch2_write(struct closure *); - -void bch2_write_point_do_index_updates(struct work_struct *); - -static inline struct bch_write_bio *wbio_init(struct bio *bio) -{ - struct bch_write_bio *wbio = to_wbio(bio); - - memset(&wbio->wbio, 0, sizeof(wbio->wbio)); - return wbio; -} - -void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *); - -struct bch_devs_mask; -struct cache_promote_op; -struct extent_ptr_decoded; - -int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, - struct bkey_buf *); - -static inline int bch2_read_indirect_extent(struct btree_trans *trans, - enum btree_id *data_btree, - unsigned *offset_into_extent, - struct bkey_buf *k) -{ - if (k->k->k.type != KEY_TYPE_reflink_p) - return 0; - - *data_btree = BTREE_ID_reflink; - return __bch2_read_indirect_extent(trans, offset_into_extent, k); -} - -enum bch_read_flags { - BCH_READ_RETRY_IF_STALE = 1 << 0, - BCH_READ_MAY_PROMOTE = 1 << 1, - BCH_READ_USER_MAPPED = 1 << 2, - BCH_READ_NODECODE = 1 << 3, - BCH_READ_LAST_FRAGMENT = 1 << 4, - - /* internal: */ - BCH_READ_MUST_BOUNCE = 1 << 5, - BCH_READ_MUST_CLONE = 1 << 6, - BCH_READ_IN_RETRY = 1 << 7, -}; - -int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *, - struct bvec_iter, struct bpos, enum btree_id, - struct bkey_s_c, unsigned, - struct bch_io_failures *, unsigned); - -static inline void bch2_read_extent(struct btree_trans *trans, - struct bch_read_bio *rbio, struct bpos read_pos, - enum btree_id data_btree, struct bkey_s_c k, - unsigned offset_into_extent, unsigned flags) -{ - __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos, - data_btree, k, offset_into_extent, NULL, flags); -} - -void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter, - subvol_inum, struct bch_io_failures *, unsigned flags); - -static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, - subvol_inum inum) -{ - struct bch_io_failures failed = { .nr = 0 }; - - BUG_ON(rbio->_state); - - rbio->c = c; - rbio->start_time = local_clock(); - rbio->subvol = inum.subvol; - - __bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed, - BCH_READ_RETRY_IF_STALE| - BCH_READ_MAY_PROMOTE| - BCH_READ_USER_MAPPED); -} - -static inline struct bch_read_bio *rbio_init(struct bio *bio, - struct bch_io_opts opts) -{ - struct bch_read_bio *rbio = to_rbio(bio); - - rbio->_state = 0; - rbio->promote = NULL; - rbio->opts = opts; - return rbio; -} - -void bch2_fs_io_exit(struct bch_fs *); -int bch2_fs_io_init(struct bch_fs *); - -#endif /* _BCACHEFS_IO_H */ diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c new file mode 100644 index 000000000000..c04e5dacfc8d --- /dev/null +++ b/fs/bcachefs/io_misc.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * io_misc.c - fallocate, fpunch, truncate: + */ + +#include "bcachefs.h" +#include "alloc_foreground.h" +#include "bkey_buf.h" +#include "btree_update.h" +#include "buckets.h" +#include "clock.h" +#include "extents.h" +#include "io_misc.h" +#include "io_write.h" +#include "subvolume.h" + +/* Overwrites whatever was present with zeroes: */ +int bch2_extent_fallocate(struct btree_trans *trans, + subvol_inum inum, + struct btree_iter *iter, + unsigned sectors, + struct bch_io_opts opts, + s64 *i_sectors_delta, + struct write_point_specifier write_point) +{ + struct bch_fs *c = trans->c; + struct disk_reservation disk_res = { 0 }; + struct closure cl; + struct open_buckets open_buckets = { 0 }; + struct bkey_s_c k; + struct bkey_buf old, new; + unsigned sectors_allocated = 0; + bool have_reservation = false; + bool unwritten = opts.nocow && + c->sb.version >= bcachefs_metadata_version_unwritten_extents; + int ret; + + bch2_bkey_buf_init(&old); + bch2_bkey_buf_init(&new); + closure_init_stack(&cl); + + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); + if (ret) + return ret; + + sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset); + + if (!have_reservation) { + unsigned new_replicas = + max(0, (int) opts.data_replicas - + (int) bch2_bkey_nr_ptrs_fully_allocated(k)); + /* + * Get a disk reservation before (in the nocow case) calling + * into the allocator: + */ + ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); + if (unlikely(ret)) + goto err; + + bch2_bkey_buf_reassemble(&old, c, k); + } + + if (have_reservation) { + if (!bch2_extents_match(k, bkey_i_to_s_c(old.k))) + goto err; + + bch2_key_resize(&new.k->k, sectors); + } else if (!unwritten) { + struct bkey_i_reservation *reservation; + + bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64)); + reservation = bkey_reservation_init(new.k); + reservation->k.p = iter->pos; + bch2_key_resize(&reservation->k, sectors); + reservation->v.nr_replicas = opts.data_replicas; + } else { + struct bkey_i_extent *e; + struct bch_devs_list devs_have; + struct write_point *wp; + struct bch_extent_ptr *ptr; + + devs_have.nr = 0; + + bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX); + + e = bkey_extent_init(new.k); + e->k.p = iter->pos; + + ret = bch2_alloc_sectors_start_trans(trans, + opts.foreground_target, + false, + write_point, + &devs_have, + opts.data_replicas, + opts.data_replicas, + BCH_WATERMARK_normal, 0, &cl, &wp); + if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) + ret = -BCH_ERR_transaction_restart_nested; + if (ret) + goto err; + + sectors = min(sectors, wp->sectors_free); + sectors_allocated = sectors; + + bch2_key_resize(&e->k, sectors); + + bch2_open_bucket_get(c, wp, &open_buckets); + bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false); + bch2_alloc_sectors_done(c, wp); + + extent_for_each_ptr(extent_i_to_s(e), ptr) + ptr->unwritten = true; + } + + have_reservation = true; + + ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res, + 0, i_sectors_delta, true); +err: + if (!ret && sectors_allocated) + bch2_increment_clock(c, sectors_allocated, WRITE); + + bch2_open_buckets_put(c, &open_buckets); + bch2_disk_reservation_put(c, &disk_res); + bch2_bkey_buf_exit(&new, c); + bch2_bkey_buf_exit(&old, c); + + if (closure_nr_remaining(&cl) != 1) { + bch2_trans_unlock(trans); + closure_sync(&cl); + } + + return ret; +} + +/* + * Returns -BCH_ERR_transacton_restart if we had to drop locks: + */ +int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, + subvol_inum inum, u64 end, + s64 *i_sectors_delta) +{ + struct bch_fs *c = trans->c; + unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); + struct bpos end_pos = POS(inum.inum, end); + struct bkey_s_c k; + int ret = 0, ret2 = 0; + u32 snapshot; + + while (!ret || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + struct disk_reservation disk_res = + bch2_disk_reservation_init(c, 0); + struct bkey_i delete; + + if (ret) + ret2 = ret; + + bch2_trans_begin(trans); + + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + if (ret) + continue; + + bch2_btree_iter_set_snapshot(iter, snapshot); + + /* + * peek_upto() doesn't have ideal semantics for extents: + */ + k = bch2_btree_iter_peek_upto(iter, end_pos); + if (!k.k) + break; + + ret = bkey_err(k); + if (ret) + continue; + + bkey_init(&delete.k); + delete.k.p = iter->pos; + + /* create the biggest key we can */ + bch2_key_resize(&delete.k, max_sectors); + bch2_cut_back(end_pos, &delete); + + ret = bch2_extent_update(trans, inum, iter, &delete, + &disk_res, 0, i_sectors_delta, false); + bch2_disk_reservation_put(c, &disk_res); + } + + return ret ?: ret2; +} + +int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, + s64 *i_sectors_delta) +{ + struct btree_trans trans; + struct btree_iter iter; + int ret; + + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + POS(inum.inum, start), + BTREE_ITER_INTENT); + + ret = bch2_fpunch_at(&trans, &iter, inum, end, i_sectors_delta); + + bch2_trans_iter_exit(&trans, &iter); + bch2_trans_exit(&trans); + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + ret = 0; + + return ret; +} diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h new file mode 100644 index 000000000000..46e9ce3251d6 --- /dev/null +++ b/fs/bcachefs/io_misc.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_IO_MISC_H +#define _BCACHEFS_IO_MISC_H + +int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *, + unsigned, struct bch_io_opts, s64 *, + struct write_point_specifier); +int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, + subvol_inum, u64, s64 *); +int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); + +#endif /* _BCACHEFS_IO_MISC_H */ diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c new file mode 100644 index 000000000000..cd62bf730396 --- /dev/null +++ b/fs/bcachefs/io_read.c @@ -0,0 +1,1207 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Some low level IO code, and hacks for various block layer limitations + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcachefs.h" +#include "alloc_background.h" +#include "alloc_foreground.h" +#include "btree_update.h" +#include "buckets.h" +#include "checksum.h" +#include "clock.h" +#include "compress.h" +#include "data_update.h" +#include "disk_groups.h" +#include "ec.h" +#include "error.h" +#include "io_read.h" +#include "io_misc.h" +#include "io_write.h" +#include "subvolume.h" +#include "trace.h" + +#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT + +static bool bch2_target_congested(struct bch_fs *c, u16 target) +{ + const struct bch_devs_mask *devs; + unsigned d, nr = 0, total = 0; + u64 now = local_clock(), last; + s64 congested; + struct bch_dev *ca; + + if (!target) + return false; + + rcu_read_lock(); + devs = bch2_target_to_mask(c, target) ?: + &c->rw_devs[BCH_DATA_user]; + + for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) { + ca = rcu_dereference(c->devs[d]); + if (!ca) + continue; + + congested = atomic_read(&ca->congested); + last = READ_ONCE(ca->congested_last); + if (time_after64(now, last)) + congested -= (now - last) >> 12; + + total += max(congested, 0LL); + nr++; + } + rcu_read_unlock(); + + return bch2_rand_range(nr * CONGESTED_MAX) < total; +} + +#else + +static bool bch2_target_congested(struct bch_fs *c, u16 target) +{ + return false; +} + +#endif + +/* Cache promotion on read */ + +struct promote_op { + struct rcu_head rcu; + u64 start_time; + + struct rhash_head hash; + struct bpos pos; + + struct data_update write; + struct bio_vec bi_inline_vecs[0]; /* must be last */ +}; + +static const struct rhashtable_params bch_promote_params = { + .head_offset = offsetof(struct promote_op, hash), + .key_offset = offsetof(struct promote_op, pos), + .key_len = sizeof(struct bpos), +}; + +static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k, + struct bpos pos, + struct bch_io_opts opts, + unsigned flags) +{ + if (!(flags & BCH_READ_MAY_PROMOTE)) + return false; + + if (!opts.promote_target) + return false; + + if (bch2_bkey_has_target(c, k, opts.promote_target)) + return false; + + if (bkey_extent_is_unwritten(k)) + return false; + + if (bch2_target_congested(c, opts.promote_target)) { + /* XXX trace this */ + return false; + } + + if (rhashtable_lookup_fast(&c->promote_table, &pos, + bch_promote_params)) + return false; + + return true; +} + +static void promote_free(struct bch_fs *c, struct promote_op *op) +{ + int ret; + + bch2_data_update_exit(&op->write); + + ret = rhashtable_remove_fast(&c->promote_table, &op->hash, + bch_promote_params); + BUG_ON(ret); + bch2_write_ref_put(c, BCH_WRITE_REF_promote); + kfree_rcu(op, rcu); +} + +static void promote_done(struct bch_write_op *wop) +{ + struct promote_op *op = + container_of(wop, struct promote_op, write.op); + struct bch_fs *c = op->write.op.c; + + bch2_time_stats_update(&c->times[BCH_TIME_data_promote], + op->start_time); + promote_free(c, op); +} + +static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) +{ + struct bio *bio = &op->write.op.wbio.bio; + + trace_and_count(op->write.op.c, read_promote, &rbio->bio); + + /* we now own pages: */ + BUG_ON(!rbio->bounce); + BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs); + + memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec, + sizeof(struct bio_vec) * rbio->bio.bi_vcnt); + swap(bio->bi_vcnt, rbio->bio.bi_vcnt); + + bch2_data_update_read_done(&op->write, rbio->pick.crc); +} + +static struct promote_op *__promote_alloc(struct btree_trans *trans, + enum btree_id btree_id, + struct bkey_s_c k, + struct bpos pos, + struct extent_ptr_decoded *pick, + struct bch_io_opts opts, + unsigned sectors, + struct bch_read_bio **rbio) +{ + struct bch_fs *c = trans->c; + struct promote_op *op = NULL; + struct bio *bio; + unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); + int ret; + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) + return NULL; + + op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOFS); + if (!op) + goto err; + + op->start_time = local_clock(); + op->pos = pos; + + /* + * We don't use the mempool here because extents that aren't + * checksummed or compressed can be too big for the mempool: + */ + *rbio = kzalloc(sizeof(struct bch_read_bio) + + sizeof(struct bio_vec) * pages, + GFP_NOFS); + if (!*rbio) + goto err; + + rbio_init(&(*rbio)->bio, opts); + bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0); + + if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, + GFP_NOFS)) + goto err; + + (*rbio)->bounce = true; + (*rbio)->split = true; + (*rbio)->kmalloc = true; + + if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, + bch_promote_params)) + goto err; + + bio = &op->write.op.wbio.bio; + bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0); + + ret = bch2_data_update_init(trans, NULL, &op->write, + writepoint_hashed((unsigned long) current), + opts, + (struct data_update_opts) { + .target = opts.promote_target, + .extra_replicas = 1, + .write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED, + }, + btree_id, k); + /* + * possible errors: -BCH_ERR_nocow_lock_blocked, + * -BCH_ERR_ENOSPC_disk_reservation: + */ + if (ret) { + ret = rhashtable_remove_fast(&c->promote_table, &op->hash, + bch_promote_params); + BUG_ON(ret); + goto err; + } + + op->write.op.end_io = promote_done; + + return op; +err: + if (*rbio) + bio_free_pages(&(*rbio)->bio); + kfree(*rbio); + *rbio = NULL; + kfree(op); + bch2_write_ref_put(c, BCH_WRITE_REF_promote); + return NULL; +} + +noinline +static struct promote_op *promote_alloc(struct btree_trans *trans, + struct bvec_iter iter, + struct bkey_s_c k, + struct extent_ptr_decoded *pick, + struct bch_io_opts opts, + unsigned flags, + struct bch_read_bio **rbio, + bool *bounce, + bool *read_full) +{ + struct bch_fs *c = trans->c; + bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents); + /* data might have to be decompressed in the write path: */ + unsigned sectors = promote_full + ? max(pick->crc.compressed_size, pick->crc.live_size) + : bvec_iter_sectors(iter); + struct bpos pos = promote_full + ? bkey_start_pos(k.k) + : POS(k.k->p.inode, iter.bi_sector); + struct promote_op *promote; + + if (!should_promote(c, k, pos, opts, flags)) + return NULL; + + promote = __promote_alloc(trans, + k.k->type == KEY_TYPE_reflink_v + ? BTREE_ID_reflink + : BTREE_ID_extents, + k, pos, pick, opts, sectors, rbio); + if (!promote) + return NULL; + + *bounce = true; + *read_full = promote_full; + return promote; +} + +/* Read */ + +#define READ_RETRY_AVOID 1 +#define READ_RETRY 2 +#define READ_ERR 3 + +enum rbio_context { + RBIO_CONTEXT_NULL, + RBIO_CONTEXT_HIGHPRI, + RBIO_CONTEXT_UNBOUND, +}; + +static inline struct bch_read_bio * +bch2_rbio_parent(struct bch_read_bio *rbio) +{ + return rbio->split ? rbio->parent : rbio; +} + +__always_inline +static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn, + enum rbio_context context, + struct workqueue_struct *wq) +{ + if (context <= rbio->context) { + fn(&rbio->work); + } else { + rbio->work.func = fn; + rbio->context = context; + queue_work(wq, &rbio->work); + } +} + +static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) +{ + BUG_ON(rbio->bounce && !rbio->split); + + if (rbio->promote) + promote_free(rbio->c, rbio->promote); + rbio->promote = NULL; + + if (rbio->bounce) + bch2_bio_free_pages_pool(rbio->c, &rbio->bio); + + if (rbio->split) { + struct bch_read_bio *parent = rbio->parent; + + if (rbio->kmalloc) + kfree(rbio); + else + bio_put(&rbio->bio); + + rbio = parent; + } + + return rbio; +} + +/* + * Only called on a top level bch_read_bio to complete an entire read request, + * not a split: + */ +static void bch2_rbio_done(struct bch_read_bio *rbio) +{ + if (rbio->start_time) + bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read], + rbio->start_time); + bio_endio(&rbio->bio); +} + +static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio, + struct bvec_iter bvec_iter, + struct bch_io_failures *failed, + unsigned flags) +{ + struct btree_trans trans; + struct btree_iter iter; + struct bkey_buf sk; + struct bkey_s_c k; + int ret; + + flags &= ~BCH_READ_LAST_FRAGMENT; + flags |= BCH_READ_MUST_CLONE; + + bch2_bkey_buf_init(&sk); + bch2_trans_init(&trans, c, 0, 0); + + bch2_trans_iter_init(&trans, &iter, rbio->data_btree, + rbio->read_pos, BTREE_ITER_SLOTS); +retry: + rbio->bio.bi_status = 0; + + k = bch2_btree_iter_peek_slot(&iter); + if (bkey_err(k)) + goto err; + + bch2_bkey_buf_reassemble(&sk, c, k); + k = bkey_i_to_s_c(sk.k); + bch2_trans_unlock(&trans); + + if (!bch2_bkey_matches_ptr(c, k, + rbio->pick.ptr, + rbio->data_pos.offset - + rbio->pick.crc.offset)) { + /* extent we wanted to read no longer exists: */ + rbio->hole = true; + goto out; + } + + ret = __bch2_read_extent(&trans, rbio, bvec_iter, + rbio->read_pos, + rbio->data_btree, + k, 0, failed, flags); + if (ret == READ_RETRY) + goto retry; + if (ret) + goto err; +out: + bch2_rbio_done(rbio); + bch2_trans_iter_exit(&trans, &iter); + bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&sk, c); + return; +err: + rbio->bio.bi_status = BLK_STS_IOERR; + goto out; +} + +static void bch2_rbio_retry(struct work_struct *work) +{ + struct bch_read_bio *rbio = + container_of(work, struct bch_read_bio, work); + struct bch_fs *c = rbio->c; + struct bvec_iter iter = rbio->bvec_iter; + unsigned flags = rbio->flags; + subvol_inum inum = { + .subvol = rbio->subvol, + .inum = rbio->read_pos.inode, + }; + struct bch_io_failures failed = { .nr = 0 }; + + trace_and_count(c, read_retry, &rbio->bio); + + if (rbio->retry == READ_RETRY_AVOID) + bch2_mark_io_failure(&failed, &rbio->pick); + + rbio->bio.bi_status = 0; + + rbio = bch2_rbio_free(rbio); + + flags |= BCH_READ_IN_RETRY; + flags &= ~BCH_READ_MAY_PROMOTE; + + if (flags & BCH_READ_NODECODE) { + bch2_read_retry_nodecode(c, rbio, iter, &failed, flags); + } else { + flags &= ~BCH_READ_LAST_FRAGMENT; + flags |= BCH_READ_MUST_CLONE; + + __bch2_read(c, rbio, iter, inum, &failed, flags); + } +} + +static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, + blk_status_t error) +{ + rbio->retry = retry; + + if (rbio->flags & BCH_READ_IN_RETRY) + return; + + if (retry == READ_ERR) { + rbio = bch2_rbio_free(rbio); + + rbio->bio.bi_status = error; + bch2_rbio_done(rbio); + } else { + bch2_rbio_punt(rbio, bch2_rbio_retry, + RBIO_CONTEXT_UNBOUND, system_unbound_wq); + } +} + +static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, + struct bch_read_bio *rbio) +{ + struct bch_fs *c = rbio->c; + u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; + struct bch_extent_crc_unpacked new_crc; + struct btree_iter iter; + struct bkey_i *new; + struct bkey_s_c k; + int ret = 0; + + if (crc_is_compressed(rbio->pick.crc)) + return 0; + + k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos, + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + if ((ret = bkey_err(k))) + goto out; + + if (bversion_cmp(k.k->version, rbio->version) || + !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) + goto out; + + /* Extent was merged? */ + if (bkey_start_offset(k.k) < data_offset || + k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size) + goto out; + + if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version, + rbio->pick.crc, NULL, &new_crc, + bkey_start_offset(k.k) - data_offset, k.k->size, + rbio->pick.crc.csum_type)) { + bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)"); + ret = 0; + goto out; + } + + /* + * going to be temporarily appending another checksum entry: + */ + new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + + sizeof(struct bch_extent_crc128)); + if ((ret = PTR_ERR_OR_ZERO(new))) + goto out; + + bkey_reassemble(new, k); + + if (!bch2_bkey_narrow_crcs(new, new_crc)) + goto out; + + ret = bch2_trans_update(trans, &iter, new, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); +out: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) +{ + bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL, + __bch2_rbio_narrow_crcs(&trans, rbio)); +} + +/* Inner part that may run in process context */ +static void __bch2_read_endio(struct work_struct *work) +{ + struct bch_read_bio *rbio = + container_of(work, struct bch_read_bio, work); + struct bch_fs *c = rbio->c; + struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev); + struct bio *src = &rbio->bio; + struct bio *dst = &bch2_rbio_parent(rbio)->bio; + struct bvec_iter dst_iter = rbio->bvec_iter; + struct bch_extent_crc_unpacked crc = rbio->pick.crc; + struct nonce nonce = extent_nonce(rbio->version, crc); + unsigned nofs_flags; + struct bch_csum csum; + int ret; + + nofs_flags = memalloc_nofs_save(); + + /* Reset iterator for checksumming and copying bounced data: */ + if (rbio->bounce) { + src->bi_iter.bi_size = crc.compressed_size << 9; + src->bi_iter.bi_idx = 0; + src->bi_iter.bi_bvec_done = 0; + } else { + src->bi_iter = rbio->bvec_iter; + } + + csum = bch2_checksum_bio(c, crc.csum_type, nonce, src); + if (bch2_crc_cmp(csum, rbio->pick.crc.csum) && !c->opts.no_data_io) + goto csum_err; + + /* + * XXX + * We need to rework the narrow_crcs path to deliver the read completion + * first, and then punt to a different workqueue, otherwise we're + * holding up reads while doing btree updates which is bad for memory + * reclaim. + */ + if (unlikely(rbio->narrow_crcs)) + bch2_rbio_narrow_crcs(rbio); + + if (rbio->flags & BCH_READ_NODECODE) + goto nodecode; + + /* Adjust crc to point to subset of data we want: */ + crc.offset += rbio->offset_into_extent; + crc.live_size = bvec_iter_sectors(rbio->bvec_iter); + + if (crc_is_compressed(crc)) { + ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); + if (ret) + goto decrypt_err; + + if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) && + !c->opts.no_data_io) + goto decompression_err; + } else { + /* don't need to decrypt the entire bio: */ + nonce = nonce_add(nonce, crc.offset << 9); + bio_advance(src, crc.offset << 9); + + BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size); + src->bi_iter.bi_size = dst_iter.bi_size; + + ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); + if (ret) + goto decrypt_err; + + if (rbio->bounce) { + struct bvec_iter src_iter = src->bi_iter; + + bio_copy_data_iter(dst, &dst_iter, src, &src_iter); + } + } + + if (rbio->promote) { + /* + * Re encrypt data we decrypted, so it's consistent with + * rbio->crc: + */ + ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); + if (ret) + goto decrypt_err; + + promote_start(rbio->promote, rbio); + rbio->promote = NULL; + } +nodecode: + if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) { + rbio = bch2_rbio_free(rbio); + bch2_rbio_done(rbio); + } +out: + memalloc_nofs_restore(nofs_flags); + return; +csum_err: + /* + * Checksum error: if the bio wasn't bounced, we may have been + * reading into buffers owned by userspace (that userspace can + * scribble over) - retry the read, bouncing it this time: + */ + if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) { + rbio->flags |= BCH_READ_MUST_BOUNCE; + bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR); + goto out; + } + + bch_err_inum_offset_ratelimited(ca, + rbio->read_pos.inode, + rbio->read_pos.offset << 9, + "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)", + rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo, + csum.hi, csum.lo, bch2_csum_types[crc.csum_type]); + bch2_io_error(ca); + bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); + goto out; +decompression_err: + bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, + rbio->read_pos.offset << 9, + "decompression error"); + bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); + goto out; +decrypt_err: + bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, + rbio->read_pos.offset << 9, + "decrypt error"); + bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); + goto out; +} + +static void bch2_read_endio(struct bio *bio) +{ + struct bch_read_bio *rbio = + container_of(bio, struct bch_read_bio, bio); + struct bch_fs *c = rbio->c; + struct bch_dev *ca = bch_dev_bkey_exists(c, rbio->pick.ptr.dev); + struct workqueue_struct *wq = NULL; + enum rbio_context context = RBIO_CONTEXT_NULL; + + if (rbio->have_ioref) { + bch2_latency_acct(ca, rbio->submit_time, READ); + percpu_ref_put(&ca->io_ref); + } + + if (!rbio->split) + rbio->bio.bi_end_io = rbio->end_io; + + if (bch2_dev_inum_io_err_on(bio->bi_status, ca, + rbio->read_pos.inode, + rbio->read_pos.offset, + "data read error: %s", + bch2_blk_status_to_str(bio->bi_status))) { + bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); + return; + } + + if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) || + ptr_stale(ca, &rbio->pick.ptr)) { + trace_and_count(c, read_reuse_race, &rbio->bio); + + if (rbio->flags & BCH_READ_RETRY_IF_STALE) + bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN); + else + bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN); + return; + } + + if (rbio->narrow_crcs || + rbio->promote || + crc_is_compressed(rbio->pick.crc) || + bch2_csum_type_is_encryption(rbio->pick.crc.csum_type)) + context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq; + else if (rbio->pick.crc.csum_type) + context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq; + + bch2_rbio_punt(rbio, __bch2_read_endio, context, wq); +} + +int __bch2_read_indirect_extent(struct btree_trans *trans, + unsigned *offset_into_extent, + struct bkey_buf *orig_k) +{ + struct btree_iter iter; + struct bkey_s_c k; + u64 reflink_offset; + int ret; + + reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) + + *offset_into_extent; + + k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink, + POS(0, reflink_offset), 0); + ret = bkey_err(k); + if (ret) + goto err; + + if (k.k->type != KEY_TYPE_reflink_v && + k.k->type != KEY_TYPE_indirect_inline_data) { + bch_err_inum_offset_ratelimited(trans->c, + orig_k->k->k.p.inode, + orig_k->k->k.p.offset << 9, + "%llu len %u points to nonexistent indirect extent %llu", + orig_k->k->k.p.offset, + orig_k->k->k.size, + reflink_offset); + bch2_inconsistent_error(trans->c); + ret = -EIO; + goto err; + } + + *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); + bch2_bkey_buf_reassemble(orig_k, trans->c, k); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, + struct bkey_s_c k, + struct bch_extent_ptr ptr) +{ + struct bch_fs *c = trans->c; + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev); + struct btree_iter iter; + struct printbuf buf = PRINTBUF; + int ret; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, + PTR_BUCKET_POS(c, &ptr), + BTREE_ITER_CACHED); + + prt_printf(&buf, "Attempting to read from stale dirty pointer:"); + printbuf_indent_add(&buf, 2); + prt_newline(&buf); + + bch2_bkey_val_to_text(&buf, c, k); + prt_newline(&buf); + + prt_printf(&buf, "memory gen: %u", *bucket_gen(ca, iter.pos.offset)); + + ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); + if (!ret) { + prt_newline(&buf); + bch2_bkey_val_to_text(&buf, c, k); + } + + bch2_fs_inconsistent(c, "%s", buf.buf); + + bch2_trans_iter_exit(trans, &iter); + printbuf_exit(&buf); +} + +int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, + struct bvec_iter iter, struct bpos read_pos, + enum btree_id data_btree, struct bkey_s_c k, + unsigned offset_into_extent, + struct bch_io_failures *failed, unsigned flags) +{ + struct bch_fs *c = trans->c; + struct extent_ptr_decoded pick; + struct bch_read_bio *rbio = NULL; + struct bch_dev *ca = NULL; + struct promote_op *promote = NULL; + bool bounce = false, read_full = false, narrow_crcs = false; + struct bpos data_pos = bkey_start_pos(k.k); + int pick_ret; + + if (bkey_extent_is_inline_data(k.k)) { + unsigned bytes = min_t(unsigned, iter.bi_size, + bkey_inline_data_bytes(k.k)); + + swap(iter.bi_size, bytes); + memcpy_to_bio(&orig->bio, iter, bkey_inline_data_p(k)); + swap(iter.bi_size, bytes); + bio_advance_iter(&orig->bio, &iter, bytes); + zero_fill_bio_iter(&orig->bio, iter); + goto out_read_done; + } +retry_pick: + pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); + + /* hole or reservation - just zero fill: */ + if (!pick_ret) + goto hole; + + if (pick_ret < 0) { + bch_err_inum_offset_ratelimited(c, + read_pos.inode, read_pos.offset << 9, + "no device to read from"); + goto err; + } + + ca = bch_dev_bkey_exists(c, pick.ptr.dev); + + /* + * Stale dirty pointers are treated as IO errors, but @failed isn't + * allocated unless we're in the retry path - so if we're not in the + * retry path, don't check here, it'll be caught in bch2_read_endio() + * and we'll end up in the retry path: + */ + if ((flags & BCH_READ_IN_RETRY) && + !pick.ptr.cached && + unlikely(ptr_stale(ca, &pick.ptr))) { + read_from_stale_dirty_pointer(trans, k, pick.ptr); + bch2_mark_io_failure(failed, &pick); + goto retry_pick; + } + + /* + * Unlock the iterator while the btree node's lock is still in + * cache, before doing the IO: + */ + bch2_trans_unlock(trans); + + if (flags & BCH_READ_NODECODE) { + /* + * can happen if we retry, and the extent we were going to read + * has been merged in the meantime: + */ + if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS) + goto hole; + + iter.bi_size = pick.crc.compressed_size << 9; + goto get_bio; + } + + if (!(flags & BCH_READ_LAST_FRAGMENT) || + bio_flagged(&orig->bio, BIO_CHAIN)) + flags |= BCH_READ_MUST_CLONE; + + narrow_crcs = !(flags & BCH_READ_IN_RETRY) && + bch2_can_narrow_extent_crcs(k, pick.crc); + + if (narrow_crcs && (flags & BCH_READ_USER_MAPPED)) + flags |= BCH_READ_MUST_BOUNCE; + + EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); + + if (crc_is_compressed(pick.crc) || + (pick.crc.csum_type != BCH_CSUM_none && + (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || + (bch2_csum_type_is_encryption(pick.crc.csum_type) && + (flags & BCH_READ_USER_MAPPED)) || + (flags & BCH_READ_MUST_BOUNCE)))) { + read_full = true; + bounce = true; + } + + if (orig->opts.promote_target) + promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags, + &rbio, &bounce, &read_full); + + if (!read_full) { + EBUG_ON(crc_is_compressed(pick.crc)); + EBUG_ON(pick.crc.csum_type && + (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || + bvec_iter_sectors(iter) != pick.crc.live_size || + pick.crc.offset || + offset_into_extent)); + + data_pos.offset += offset_into_extent; + pick.ptr.offset += pick.crc.offset + + offset_into_extent; + offset_into_extent = 0; + pick.crc.compressed_size = bvec_iter_sectors(iter); + pick.crc.uncompressed_size = bvec_iter_sectors(iter); + pick.crc.offset = 0; + pick.crc.live_size = bvec_iter_sectors(iter); + offset_into_extent = 0; + } +get_bio: + if (rbio) { + /* + * promote already allocated bounce rbio: + * promote needs to allocate a bio big enough for uncompressing + * data in the write path, but we're not going to use it all + * here: + */ + EBUG_ON(rbio->bio.bi_iter.bi_size < + pick.crc.compressed_size << 9); + rbio->bio.bi_iter.bi_size = + pick.crc.compressed_size << 9; + } else if (bounce) { + unsigned sectors = pick.crc.compressed_size; + + rbio = rbio_init(bio_alloc_bioset(NULL, + DIV_ROUND_UP(sectors, PAGE_SECTORS), + 0, + GFP_NOFS, + &c->bio_read_split), + orig->opts); + + bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9); + rbio->bounce = true; + rbio->split = true; + } else if (flags & BCH_READ_MUST_CLONE) { + /* + * Have to clone if there were any splits, due to error + * reporting issues (if a split errored, and retrying didn't + * work, when it reports the error to its parent (us) we don't + * know if the error was from our bio, and we should retry, or + * from the whole bio, in which case we don't want to retry and + * lose the error) + */ + rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS, + &c->bio_read_split), + orig->opts); + rbio->bio.bi_iter = iter; + rbio->split = true; + } else { + rbio = orig; + rbio->bio.bi_iter = iter; + EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN)); + } + + EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size); + + rbio->c = c; + rbio->submit_time = local_clock(); + if (rbio->split) + rbio->parent = orig; + else + rbio->end_io = orig->bio.bi_end_io; + rbio->bvec_iter = iter; + rbio->offset_into_extent= offset_into_extent; + rbio->flags = flags; + rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ); + rbio->narrow_crcs = narrow_crcs; + rbio->hole = 0; + rbio->retry = 0; + rbio->context = 0; + /* XXX: only initialize this if needed */ + rbio->devs_have = bch2_bkey_devs(k); + rbio->pick = pick; + rbio->subvol = orig->subvol; + rbio->read_pos = read_pos; + rbio->data_btree = data_btree; + rbio->data_pos = data_pos; + rbio->version = k.k->version; + rbio->promote = promote; + INIT_WORK(&rbio->work, NULL); + + rbio->bio.bi_opf = orig->bio.bi_opf; + rbio->bio.bi_iter.bi_sector = pick.ptr.offset; + rbio->bio.bi_end_io = bch2_read_endio; + + if (rbio->bounce) + trace_and_count(c, read_bounce, &rbio->bio); + + this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio)); + bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); + + /* + * If it's being moved internally, we don't want to flag it as a cache + * hit: + */ + if (pick.ptr.cached && !(flags & BCH_READ_NODECODE)) + bch2_bucket_io_time_reset(trans, pick.ptr.dev, + PTR_BUCKET_NR(ca, &pick.ptr), READ); + + if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) { + bio_inc_remaining(&orig->bio); + trace_and_count(c, read_split, &orig->bio); + } + + if (!rbio->pick.idx) { + if (!rbio->have_ioref) { + bch_err_inum_offset_ratelimited(c, + read_pos.inode, + read_pos.offset << 9, + "no device to read from"); + bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); + goto out; + } + + this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user], + bio_sectors(&rbio->bio)); + bio_set_dev(&rbio->bio, ca->disk_sb.bdev); + + if (unlikely(c->opts.no_data_io)) { + if (likely(!(flags & BCH_READ_IN_RETRY))) + bio_endio(&rbio->bio); + } else { + if (likely(!(flags & BCH_READ_IN_RETRY))) + submit_bio(&rbio->bio); + else + submit_bio_wait(&rbio->bio); + } + + /* + * We just submitted IO which may block, we expect relock fail + * events and shouldn't count them: + */ + trans->notrace_relock_fail = true; + } else { + /* Attempting reconstruct read: */ + if (bch2_ec_read_extent(c, rbio)) { + bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); + goto out; + } + + if (likely(!(flags & BCH_READ_IN_RETRY))) + bio_endio(&rbio->bio); + } +out: + if (likely(!(flags & BCH_READ_IN_RETRY))) { + return 0; + } else { + int ret; + + rbio->context = RBIO_CONTEXT_UNBOUND; + bch2_read_endio(&rbio->bio); + + ret = rbio->retry; + rbio = bch2_rbio_free(rbio); + + if (ret == READ_RETRY_AVOID) { + bch2_mark_io_failure(failed, &pick); + ret = READ_RETRY; + } + + if (!ret) + goto out_read_done; + + return ret; + } + +err: + if (flags & BCH_READ_IN_RETRY) + return READ_ERR; + + orig->bio.bi_status = BLK_STS_IOERR; + goto out_read_done; + +hole: + /* + * won't normally happen in the BCH_READ_NODECODE + * (bch2_move_extent()) path, but if we retry and the extent we wanted + * to read no longer exists we have to signal that: + */ + if (flags & BCH_READ_NODECODE) + orig->hole = true; + + zero_fill_bio_iter(&orig->bio, iter); +out_read_done: + if (flags & BCH_READ_LAST_FRAGMENT) + bch2_rbio_done(orig); + return 0; +} + +void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, + struct bvec_iter bvec_iter, subvol_inum inum, + struct bch_io_failures *failed, unsigned flags) +{ + struct btree_trans trans; + struct btree_iter iter; + struct bkey_buf sk; + struct bkey_s_c k; + u32 snapshot; + int ret; + + BUG_ON(flags & BCH_READ_NODECODE); + + bch2_bkey_buf_init(&sk); + bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); + iter = (struct btree_iter) { NULL }; + + ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + if (ret) + goto err; + + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + SPOS(inum.inum, bvec_iter.bi_sector, snapshot), + BTREE_ITER_SLOTS); + while (1) { + unsigned bytes, sectors, offset_into_extent; + enum btree_id data_btree = BTREE_ID_extents; + + /* + * read_extent -> io_time_reset may cause a transaction restart + * without returning an error, we need to check for that here: + */ + ret = bch2_trans_relock(&trans); + if (ret) + break; + + bch2_btree_iter_set_pos(&iter, + POS(inum.inum, bvec_iter.bi_sector)); + + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + break; + + offset_into_extent = iter.pos.offset - + bkey_start_offset(k.k); + sectors = k.k->size - offset_into_extent; + + bch2_bkey_buf_reassemble(&sk, c, k); + + ret = bch2_read_indirect_extent(&trans, &data_btree, + &offset_into_extent, &sk); + if (ret) + break; + + k = bkey_i_to_s_c(sk.k); + + /* + * With indirect extents, the amount of data to read is the min + * of the original extent and the indirect extent: + */ + sectors = min(sectors, k.k->size - offset_into_extent); + + bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; + swap(bvec_iter.bi_size, bytes); + + if (bvec_iter.bi_size == bytes) + flags |= BCH_READ_LAST_FRAGMENT; + + ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter.pos, + data_btree, k, + offset_into_extent, failed, flags); + if (ret) + break; + + if (flags & BCH_READ_LAST_FRAGMENT) + break; + + swap(bvec_iter.bi_size, bytes); + bio_advance_iter(&rbio->bio, &bvec_iter, bytes); + + ret = btree_trans_too_many_iters(&trans); + if (ret) + break; + } +err: + bch2_trans_iter_exit(&trans, &iter); + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || + ret == READ_RETRY || + ret == READ_RETRY_AVOID) + goto retry; + + bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&sk, c); + + if (ret) { + bch_err_inum_offset_ratelimited(c, inum.inum, + bvec_iter.bi_sector << 9, + "read error %i from btree lookup", ret); + rbio->bio.bi_status = BLK_STS_IOERR; + bch2_rbio_done(rbio); + } +} + +void bch2_fs_io_read_exit(struct bch_fs *c) +{ + if (c->promote_table.tbl) + rhashtable_destroy(&c->promote_table); + bioset_exit(&c->bio_read_split); + bioset_exit(&c->bio_read); +} + +int bch2_fs_io_read_init(struct bch_fs *c) +{ + if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio), + BIOSET_NEED_BVECS)) + return -BCH_ERR_ENOMEM_bio_read_init; + + if (bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio), + BIOSET_NEED_BVECS)) + return -BCH_ERR_ENOMEM_bio_read_split_init; + + if (rhashtable_init(&c->promote_table, &bch_promote_params)) + return -BCH_ERR_ENOMEM_promote_table_init; + + return 0; +} diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h new file mode 100644 index 000000000000..d9c18bb7d403 --- /dev/null +++ b/fs/bcachefs/io_read.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_IO_READ_H +#define _BCACHEFS_IO_READ_H + +#include "bkey_buf.h" + +struct bch_read_bio { + struct bch_fs *c; + u64 start_time; + u64 submit_time; + + /* + * Reads will often have to be split, and if the extent being read from + * was checksummed or compressed we'll also have to allocate bounce + * buffers and copy the data back into the original bio. + * + * If we didn't have to split, we have to save and restore the original + * bi_end_io - @split below indicates which: + */ + union { + struct bch_read_bio *parent; + bio_end_io_t *end_io; + }; + + /* + * Saved copy of bio->bi_iter, from submission time - allows us to + * resubmit on IO error, and also to copy data back to the original bio + * when we're bouncing: + */ + struct bvec_iter bvec_iter; + + unsigned offset_into_extent; + + u16 flags; + union { + struct { + u16 bounce:1, + split:1, + kmalloc:1, + have_ioref:1, + narrow_crcs:1, + hole:1, + retry:2, + context:2; + }; + u16 _state; + }; + + struct bch_devs_list devs_have; + + struct extent_ptr_decoded pick; + + /* + * pos we read from - different from data_pos for indirect extents: + */ + u32 subvol; + struct bpos read_pos; + + /* + * start pos of data we read (may not be pos of data we want) - for + * promote, narrow extents paths: + */ + enum btree_id data_btree; + struct bpos data_pos; + struct bversion version; + + struct promote_op *promote; + + struct bch_io_opts opts; + + struct work_struct work; + + struct bio bio; +}; + +#define to_rbio(_bio) container_of((_bio), struct bch_read_bio, bio) + +struct bch_devs_mask; +struct cache_promote_op; +struct extent_ptr_decoded; + +int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, + struct bkey_buf *); + +static inline int bch2_read_indirect_extent(struct btree_trans *trans, + enum btree_id *data_btree, + unsigned *offset_into_extent, + struct bkey_buf *k) +{ + if (k->k->k.type != KEY_TYPE_reflink_p) + return 0; + + *data_btree = BTREE_ID_reflink; + return __bch2_read_indirect_extent(trans, offset_into_extent, k); +} + +enum bch_read_flags { + BCH_READ_RETRY_IF_STALE = 1 << 0, + BCH_READ_MAY_PROMOTE = 1 << 1, + BCH_READ_USER_MAPPED = 1 << 2, + BCH_READ_NODECODE = 1 << 3, + BCH_READ_LAST_FRAGMENT = 1 << 4, + + /* internal: */ + BCH_READ_MUST_BOUNCE = 1 << 5, + BCH_READ_MUST_CLONE = 1 << 6, + BCH_READ_IN_RETRY = 1 << 7, +}; + +int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *, + struct bvec_iter, struct bpos, enum btree_id, + struct bkey_s_c, unsigned, + struct bch_io_failures *, unsigned); + +static inline void bch2_read_extent(struct btree_trans *trans, + struct bch_read_bio *rbio, struct bpos read_pos, + enum btree_id data_btree, struct bkey_s_c k, + unsigned offset_into_extent, unsigned flags) +{ + __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos, + data_btree, k, offset_into_extent, NULL, flags); +} + +void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter, + subvol_inum, struct bch_io_failures *, unsigned flags); + +static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, + subvol_inum inum) +{ + struct bch_io_failures failed = { .nr = 0 }; + + BUG_ON(rbio->_state); + + rbio->c = c; + rbio->start_time = local_clock(); + rbio->subvol = inum.subvol; + + __bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed, + BCH_READ_RETRY_IF_STALE| + BCH_READ_MAY_PROMOTE| + BCH_READ_USER_MAPPED); +} + +static inline struct bch_read_bio *rbio_init(struct bio *bio, + struct bch_io_opts opts) +{ + struct bch_read_bio *rbio = to_rbio(bio); + + rbio->_state = 0; + rbio->promote = NULL; + rbio->opts = opts; + return rbio; +} + +void bch2_fs_io_read_exit(struct bch_fs *); +int bch2_fs_io_read_init(struct bch_fs *); + +#endif /* _BCACHEFS_IO_READ_H */ diff --git a/fs/bcachefs/io_types.h b/fs/bcachefs/io_types.h deleted file mode 100644 index 737f16d78c48..000000000000 --- a/fs/bcachefs/io_types.h +++ /dev/null @@ -1,165 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _BCACHEFS_IO_TYPES_H -#define _BCACHEFS_IO_TYPES_H - -#include "alloc_types.h" -#include "btree_types.h" -#include "buckets_types.h" -#include "extents_types.h" -#include "keylist_types.h" -#include "opts.h" -#include "super_types.h" - -#include -#include - -struct bch_read_bio { - struct bch_fs *c; - u64 start_time; - u64 submit_time; - - /* - * Reads will often have to be split, and if the extent being read from - * was checksummed or compressed we'll also have to allocate bounce - * buffers and copy the data back into the original bio. - * - * If we didn't have to split, we have to save and restore the original - * bi_end_io - @split below indicates which: - */ - union { - struct bch_read_bio *parent; - bio_end_io_t *end_io; - }; - - /* - * Saved copy of bio->bi_iter, from submission time - allows us to - * resubmit on IO error, and also to copy data back to the original bio - * when we're bouncing: - */ - struct bvec_iter bvec_iter; - - unsigned offset_into_extent; - - u16 flags; - union { - struct { - u16 bounce:1, - split:1, - kmalloc:1, - have_ioref:1, - narrow_crcs:1, - hole:1, - retry:2, - context:2; - }; - u16 _state; - }; - - struct bch_devs_list devs_have; - - struct extent_ptr_decoded pick; - - /* - * pos we read from - different from data_pos for indirect extents: - */ - u32 subvol; - struct bpos read_pos; - - /* - * start pos of data we read (may not be pos of data we want) - for - * promote, narrow extents paths: - */ - enum btree_id data_btree; - struct bpos data_pos; - struct bversion version; - - struct promote_op *promote; - - struct bch_io_opts opts; - - struct work_struct work; - - struct bio bio; -}; - -struct bch_write_bio { - struct_group(wbio, - struct bch_fs *c; - struct bch_write_bio *parent; - - u64 submit_time; - u64 inode_offset; - - struct bch_devs_list failed; - u8 dev; - - unsigned split:1, - bounce:1, - put_bio:1, - have_ioref:1, - nocow:1, - used_mempool:1, - first_btree_write:1; - ); - - struct bio bio; -}; - -struct bch_write_op { - struct closure cl; - struct bch_fs *c; - void (*end_io)(struct bch_write_op *); - u64 start_time; - - unsigned written; /* sectors */ - u16 flags; - s16 error; /* dio write path expects it to hold -ERESTARTSYS... */ - - unsigned compression_opt:8; - unsigned csum_type:4; - unsigned nr_replicas:4; - unsigned nr_replicas_required:4; - unsigned watermark:3; - unsigned incompressible:1; - unsigned stripe_waited:1; - - struct bch_devs_list devs_have; - u16 target; - u16 nonce; - struct bch_io_opts opts; - - u32 subvol; - struct bpos pos; - struct bversion version; - - /* For BCH_WRITE_DATA_ENCODED: */ - struct bch_extent_crc_unpacked crc; - - struct write_point_specifier write_point; - - struct write_point *wp; - struct list_head wp_list; - - struct disk_reservation res; - - struct open_buckets open_buckets; - - u64 new_i_size; - s64 i_sectors_delta; - - struct bch_devs_mask failed; - - struct keylist insert_keys; - u64 inline_keys[BKEY_EXTENT_U64s_MAX * 2]; - - /* - * Bitmask of devices that have had nocow writes issued to them since - * last flush: - */ - struct bch_devs_mask *devs_need_flush; - - /* Must be last: */ - struct bch_write_bio wbio; -}; - -#endif /* _BCACHEFS_IO_TYPES_H */ diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c new file mode 100644 index 000000000000..7f29fd2f05b1 --- /dev/null +++ b/fs/bcachefs/io_write.c @@ -0,0 +1,1670 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcachefs.h" +#include "alloc_foreground.h" +#include "bkey_buf.h" +#include "bset.h" +#include "btree_update.h" +#include "buckets.h" +#include "checksum.h" +#include "clock.h" +#include "compress.h" +#include "debug.h" +#include "ec.h" +#include "error.h" +#include "extent_update.h" +#include "inode.h" +#include "io_write.h" +#include "journal.h" +#include "keylist.h" +#include "move.h" +#include "nocow_locking.h" +#include "rebalance.h" +#include "subvolume.h" +#include "super.h" +#include "super-io.h" +#include "trace.h" + +#include +#include +#include +#include + +#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT + +static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency, + u64 now, int rw) +{ + u64 latency_capable = + ca->io_latency[rw].quantiles.entries[QUANTILE_IDX(1)].m; + /* ideally we'd be taking into account the device's variance here: */ + u64 latency_threshold = latency_capable << (rw == READ ? 2 : 3); + s64 latency_over = io_latency - latency_threshold; + + if (latency_threshold && latency_over > 0) { + /* + * bump up congested by approximately latency_over * 4 / + * latency_threshold - we don't need much accuracy here so don't + * bother with the divide: + */ + if (atomic_read(&ca->congested) < CONGESTED_MAX) + atomic_add(latency_over >> + max_t(int, ilog2(latency_threshold) - 2, 0), + &ca->congested); + + ca->congested_last = now; + } else if (atomic_read(&ca->congested) > 0) { + atomic_dec(&ca->congested); + } +} + +void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) +{ + atomic64_t *latency = &ca->cur_latency[rw]; + u64 now = local_clock(); + u64 io_latency = time_after64(now, submit_time) + ? now - submit_time + : 0; + u64 old, new, v = atomic64_read(latency); + + do { + old = v; + + /* + * If the io latency was reasonably close to the current + * latency, skip doing the update and atomic operation - most of + * the time: + */ + if (abs((int) (old - io_latency)) < (old >> 1) && + now & ~(~0U << 5)) + break; + + new = ewma_add(old, io_latency, 5); + } while ((v = atomic64_cmpxchg(latency, old, new)) != old); + + bch2_congested_acct(ca, io_latency, now, rw); + + __bch2_time_stats_update(&ca->io_latency[rw], submit_time, now); +} + +#endif + +/* Allocate, free from mempool: */ + +void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio) +{ + struct bvec_iter_all iter; + struct bio_vec *bv; + + bio_for_each_segment_all(bv, bio, iter) + if (bv->bv_page != ZERO_PAGE(0)) + mempool_free(bv->bv_page, &c->bio_bounce_pages); + bio->bi_vcnt = 0; +} + +static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool) +{ + struct page *page; + + if (likely(!*using_mempool)) { + page = alloc_page(GFP_NOFS); + if (unlikely(!page)) { + mutex_lock(&c->bio_bounce_pages_lock); + *using_mempool = true; + goto pool_alloc; + + } + } else { +pool_alloc: + page = mempool_alloc(&c->bio_bounce_pages, GFP_NOFS); + } + + return page; +} + +void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio, + size_t size) +{ + bool using_mempool = false; + + while (size) { + struct page *page = __bio_alloc_page_pool(c, &using_mempool); + unsigned len = min_t(size_t, PAGE_SIZE, size); + + BUG_ON(!bio_add_page(bio, page, len, 0)); + size -= len; + } + + if (using_mempool) + mutex_unlock(&c->bio_bounce_pages_lock); +} + +/* Extent update path: */ + +int bch2_sum_sector_overwrites(struct btree_trans *trans, + struct btree_iter *extent_iter, + struct bkey_i *new, + bool *usage_increasing, + s64 *i_sectors_delta, + s64 *disk_sectors_delta) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_s_c old; + unsigned new_replicas = bch2_bkey_replicas(c, bkey_i_to_s_c(new)); + bool new_compressed = bch2_bkey_sectors_compressed(bkey_i_to_s_c(new)); + int ret = 0; + + *usage_increasing = false; + *i_sectors_delta = 0; + *disk_sectors_delta = 0; + + bch2_trans_copy_iter(&iter, extent_iter); + + for_each_btree_key_upto_continue_norestart(iter, + new->k.p, BTREE_ITER_SLOTS, old, ret) { + s64 sectors = min(new->k.p.offset, old.k->p.offset) - + max(bkey_start_offset(&new->k), + bkey_start_offset(old.k)); + + *i_sectors_delta += sectors * + (bkey_extent_is_allocation(&new->k) - + bkey_extent_is_allocation(old.k)); + + *disk_sectors_delta += sectors * bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new)); + *disk_sectors_delta -= new->k.p.snapshot == old.k->p.snapshot + ? sectors * bch2_bkey_nr_ptrs_fully_allocated(old) + : 0; + + if (!*usage_increasing && + (new->k.p.snapshot != old.k->p.snapshot || + new_replicas > bch2_bkey_replicas(c, old) || + (!new_compressed && bch2_bkey_sectors_compressed(old)))) + *usage_increasing = true; + + if (bkey_ge(old.k->p, new->k.p)) + break; + } + + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, + struct btree_iter *extent_iter, + u64 new_i_size, + s64 i_sectors_delta) +{ + struct btree_iter iter; + struct bkey_i *k; + struct bkey_i_inode_v3 *inode; + unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL; + int ret; + + k = bch2_bkey_get_mut_noupdate(trans, &iter, BTREE_ID_inodes, + SPOS(0, + extent_iter->pos.inode, + extent_iter->snapshot), + BTREE_ITER_CACHED); + ret = PTR_ERR_OR_ZERO(k); + if (unlikely(ret)) + return ret; + + if (unlikely(k->k.type != KEY_TYPE_inode_v3)) { + k = bch2_inode_to_v3(trans, k); + ret = PTR_ERR_OR_ZERO(k); + if (unlikely(ret)) + goto err; + } + + inode = bkey_i_to_inode_v3(k); + + if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_I_SIZE_DIRTY) && + new_i_size > le64_to_cpu(inode->v.bi_size)) { + inode->v.bi_size = cpu_to_le64(new_i_size); + inode_update_flags = 0; + } + + if (i_sectors_delta) { + le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta); + inode_update_flags = 0; + } + + if (inode->k.p.snapshot != iter.snapshot) { + inode->k.p.snapshot = iter.snapshot; + inode_update_flags = 0; + } + + ret = bch2_trans_update(trans, &iter, &inode->k_i, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| + inode_update_flags); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +int bch2_extent_update(struct btree_trans *trans, + subvol_inum inum, + struct btree_iter *iter, + struct bkey_i *k, + struct disk_reservation *disk_res, + u64 new_i_size, + s64 *i_sectors_delta_total, + bool check_enospc) +{ + struct bpos next_pos; + bool usage_increasing; + s64 i_sectors_delta = 0, disk_sectors_delta = 0; + int ret; + + /* + * This traverses us the iterator without changing iter->path->pos to + * search_key() (which is pos + 1 for extents): we want there to be a + * path already traversed at iter->pos because + * bch2_trans_extent_update() will use it to attempt extent merging + */ + ret = __bch2_btree_iter_traverse(iter); + if (ret) + return ret; + + ret = bch2_extent_trim_atomic(trans, iter, k); + if (ret) + return ret; + + next_pos = k->k.p; + + ret = bch2_sum_sector_overwrites(trans, iter, k, + &usage_increasing, + &i_sectors_delta, + &disk_sectors_delta); + if (ret) + return ret; + + if (disk_res && + disk_sectors_delta > (s64) disk_res->sectors) { + ret = bch2_disk_reservation_add(trans->c, disk_res, + disk_sectors_delta - disk_res->sectors, + !check_enospc || !usage_increasing + ? BCH_DISK_RESERVATION_NOFAIL : 0); + if (ret) + return ret; + } + + /* + * Note: + * We always have to do an inode update - even when i_size/i_sectors + * aren't changing - for fsync to work properly; fsync relies on + * inode->bi_journal_seq which is updated by the trigger code: + */ + ret = bch2_extent_update_i_size_sectors(trans, iter, + min(k->k.p.offset << 9, new_i_size), + i_sectors_delta) ?: + bch2_trans_update(trans, iter, k, 0) ?: + bch2_trans_commit(trans, disk_res, NULL, + BTREE_INSERT_NOCHECK_RW| + BTREE_INSERT_NOFAIL); + if (unlikely(ret)) + return ret; + + if (i_sectors_delta_total) + *i_sectors_delta_total += i_sectors_delta; + bch2_btree_iter_set_pos(iter, next_pos); + return 0; +} + +static int bch2_write_index_default(struct bch_write_op *op) +{ + struct bch_fs *c = op->c; + struct bkey_buf sk; + struct keylist *keys = &op->insert_keys; + struct bkey_i *k = bch2_keylist_front(keys); + struct btree_trans trans; + struct btree_iter iter; + subvol_inum inum = { + .subvol = op->subvol, + .inum = k->k.p.inode, + }; + int ret; + + BUG_ON(!inum.subvol); + + bch2_bkey_buf_init(&sk); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); + + do { + bch2_trans_begin(&trans); + + k = bch2_keylist_front(keys); + bch2_bkey_buf_copy(&sk, c, k); + + ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, + &sk.k->k.p.snapshot); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) + break; + + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + bkey_start_pos(&sk.k->k), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + + ret = bch2_extent_update(&trans, inum, &iter, sk.k, + &op->res, + op->new_i_size, &op->i_sectors_delta, + op->flags & BCH_WRITE_CHECK_ENOSPC); + bch2_trans_iter_exit(&trans, &iter); + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + continue; + if (ret) + break; + + if (bkey_ge(iter.pos, k->k.p)) + bch2_keylist_pop_front(&op->insert_keys); + else + bch2_cut_front(iter.pos, k); + } while (!bch2_keylist_empty(keys)); + + bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&sk, c); + + return ret; +} + +/* Writes */ + +void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, + enum bch_data_type type, + const struct bkey_i *k, + bool nocow) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k)); + const struct bch_extent_ptr *ptr; + struct bch_write_bio *n; + struct bch_dev *ca; + + BUG_ON(c->opts.nochanges); + + bkey_for_each_ptr(ptrs, ptr) { + BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX || + !c->devs[ptr->dev]); + + ca = bch_dev_bkey_exists(c, ptr->dev); + + if (to_entry(ptr + 1) < ptrs.end) { + n = to_wbio(bio_alloc_clone(NULL, &wbio->bio, + GFP_NOFS, &ca->replica_set)); + + n->bio.bi_end_io = wbio->bio.bi_end_io; + n->bio.bi_private = wbio->bio.bi_private; + n->parent = wbio; + n->split = true; + n->bounce = false; + n->put_bio = true; + n->bio.bi_opf = wbio->bio.bi_opf; + bio_inc_remaining(&wbio->bio); + } else { + n = wbio; + n->split = false; + } + + n->c = c; + n->dev = ptr->dev; + n->have_ioref = nocow || bch2_dev_get_ioref(ca, + type == BCH_DATA_btree ? READ : WRITE); + n->nocow = nocow; + n->submit_time = local_clock(); + n->inode_offset = bkey_start_offset(&k->k); + n->bio.bi_iter.bi_sector = ptr->offset; + + if (likely(n->have_ioref)) { + this_cpu_add(ca->io_done->sectors[WRITE][type], + bio_sectors(&n->bio)); + + bio_set_dev(&n->bio, ca->disk_sb.bdev); + + if (type != BCH_DATA_btree && unlikely(c->opts.no_data_io)) { + bio_endio(&n->bio); + continue; + } + + submit_bio(&n->bio); + } else { + n->bio.bi_status = BLK_STS_REMOVED; + bio_endio(&n->bio); + } + } +} + +static void __bch2_write(struct bch_write_op *); + +static void bch2_write_done(struct closure *cl) +{ + struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); + struct bch_fs *c = op->c; + + EBUG_ON(op->open_buckets.nr); + + bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time); + bch2_disk_reservation_put(c, &op->res); + + if (!(op->flags & BCH_WRITE_MOVE)) + bch2_write_ref_put(c, BCH_WRITE_REF_write); + bch2_keylist_free(&op->insert_keys, op->inline_keys); + + EBUG_ON(cl->parent); + closure_debug_destroy(cl); + if (op->end_io) + op->end_io(op); +} + +static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op) +{ + struct keylist *keys = &op->insert_keys; + struct bch_extent_ptr *ptr; + struct bkey_i *src, *dst = keys->keys, *n; + + for (src = keys->keys; src != keys->top; src = n) { + n = bkey_next(src); + + if (bkey_extent_is_direct_data(&src->k)) { + bch2_bkey_drop_ptrs(bkey_i_to_s(src), ptr, + test_bit(ptr->dev, op->failed.d)); + + if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src))) + return -EIO; + } + + if (dst != src) + memmove_u64s_down(dst, src, src->k.u64s); + dst = bkey_next(dst); + } + + keys->top = dst; + return 0; +} + +/** + * bch_write_index - after a write, update index to point to new data + */ +static void __bch2_write_index(struct bch_write_op *op) +{ + struct bch_fs *c = op->c; + struct keylist *keys = &op->insert_keys; + struct bkey_i *k; + unsigned dev; + int ret = 0; + + if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) { + ret = bch2_write_drop_io_error_ptrs(op); + if (ret) + goto err; + } + + /* + * probably not the ideal place to hook this in, but I don't + * particularly want to plumb io_opts all the way through the btree + * update stack right now + */ + for_each_keylist_key(keys, k) + bch2_rebalance_add_key(c, bkey_i_to_s_c(k), &op->opts); + + if (!bch2_keylist_empty(keys)) { + u64 sectors_start = keylist_sectors(keys); + + ret = !(op->flags & BCH_WRITE_MOVE) + ? bch2_write_index_default(op) + : bch2_data_update_index_update(op); + + BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); + BUG_ON(keylist_sectors(keys) && !ret); + + op->written += sectors_start - keylist_sectors(keys); + + if (ret && !bch2_err_matches(ret, EROFS)) { + struct bkey_i *k = bch2_keylist_front(&op->insert_keys); + + bch_err_inum_offset_ratelimited(c, + k->k.p.inode, k->k.p.offset << 9, + "write error while doing btree update: %s", + bch2_err_str(ret)); + } + + if (ret) + goto err; + } +out: + /* If some a bucket wasn't written, we can't erasure code it: */ + for_each_set_bit(dev, op->failed.d, BCH_SB_MEMBERS_MAX) + bch2_open_bucket_write_error(c, &op->open_buckets, dev); + + bch2_open_buckets_put(c, &op->open_buckets); + return; +err: + keys->top = keys->keys; + op->error = ret; + op->flags |= BCH_WRITE_DONE; + goto out; +} + +static inline void __wp_update_state(struct write_point *wp, enum write_point_state state) +{ + if (state != wp->state) { + u64 now = ktime_get_ns(); + + if (wp->last_state_change && + time_after64(now, wp->last_state_change)) + wp->time[wp->state] += now - wp->last_state_change; + wp->state = state; + wp->last_state_change = now; + } +} + +static inline void wp_update_state(struct write_point *wp, bool running) +{ + enum write_point_state state; + + state = running ? WRITE_POINT_running : + !list_empty(&wp->writes) ? WRITE_POINT_waiting_io + : WRITE_POINT_stopped; + + __wp_update_state(wp, state); +} + +static void bch2_write_index(struct closure *cl) +{ + struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); + struct write_point *wp = op->wp; + struct workqueue_struct *wq = index_update_wq(op); + unsigned long flags; + + if ((op->flags & BCH_WRITE_DONE) && + (op->flags & BCH_WRITE_MOVE)) + bch2_bio_free_pages_pool(op->c, &op->wbio.bio); + + spin_lock_irqsave(&wp->writes_lock, flags); + if (wp->state == WRITE_POINT_waiting_io) + __wp_update_state(wp, WRITE_POINT_waiting_work); + list_add_tail(&op->wp_list, &wp->writes); + spin_unlock_irqrestore (&wp->writes_lock, flags); + + queue_work(wq, &wp->index_update_work); +} + +static inline void bch2_write_queue(struct bch_write_op *op, struct write_point *wp) +{ + op->wp = wp; + + if (wp->state == WRITE_POINT_stopped) { + spin_lock_irq(&wp->writes_lock); + __wp_update_state(wp, WRITE_POINT_waiting_io); + spin_unlock_irq(&wp->writes_lock); + } +} + +void bch2_write_point_do_index_updates(struct work_struct *work) +{ + struct write_point *wp = + container_of(work, struct write_point, index_update_work); + struct bch_write_op *op; + + while (1) { + spin_lock_irq(&wp->writes_lock); + op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list); + if (op) + list_del(&op->wp_list); + wp_update_state(wp, op != NULL); + spin_unlock_irq(&wp->writes_lock); + + if (!op) + break; + + op->flags |= BCH_WRITE_IN_WORKER; + + __bch2_write_index(op); + + if (!(op->flags & BCH_WRITE_DONE)) + __bch2_write(op); + else + bch2_write_done(&op->cl); + } +} + +static void bch2_write_endio(struct bio *bio) +{ + struct closure *cl = bio->bi_private; + struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); + struct bch_write_bio *wbio = to_wbio(bio); + struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL; + struct bch_fs *c = wbio->c; + struct bch_dev *ca = bch_dev_bkey_exists(c, wbio->dev); + + if (bch2_dev_inum_io_err_on(bio->bi_status, ca, + op->pos.inode, + wbio->inode_offset << 9, + "data write error: %s", + bch2_blk_status_to_str(bio->bi_status))) { + set_bit(wbio->dev, op->failed.d); + op->flags |= BCH_WRITE_IO_ERROR; + } + + if (wbio->nocow) + set_bit(wbio->dev, op->devs_need_flush->d); + + if (wbio->have_ioref) { + bch2_latency_acct(ca, wbio->submit_time, WRITE); + percpu_ref_put(&ca->io_ref); + } + + if (wbio->bounce) + bch2_bio_free_pages_pool(c, bio); + + if (wbio->put_bio) + bio_put(bio); + + if (parent) + bio_endio(&parent->bio); + else + closure_put(cl); +} + +static void init_append_extent(struct bch_write_op *op, + struct write_point *wp, + struct bversion version, + struct bch_extent_crc_unpacked crc) +{ + struct bkey_i_extent *e; + + op->pos.offset += crc.uncompressed_size; + + e = bkey_extent_init(op->insert_keys.top); + e->k.p = op->pos; + e->k.size = crc.uncompressed_size; + e->k.version = version; + + if (crc.csum_type || + crc.compression_type || + crc.nonce) + bch2_extent_crc_append(&e->k_i, crc); + + bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size, + op->flags & BCH_WRITE_CACHED); + + bch2_keylist_push(&op->insert_keys); +} + +static struct bio *bch2_write_bio_alloc(struct bch_fs *c, + struct write_point *wp, + struct bio *src, + bool *page_alloc_failed, + void *buf) +{ + struct bch_write_bio *wbio; + struct bio *bio; + unsigned output_available = + min(wp->sectors_free << 9, src->bi_iter.bi_size); + unsigned pages = DIV_ROUND_UP(output_available + + (buf + ? ((unsigned long) buf & (PAGE_SIZE - 1)) + : 0), PAGE_SIZE); + + pages = min(pages, BIO_MAX_VECS); + + bio = bio_alloc_bioset(NULL, pages, 0, + GFP_NOFS, &c->bio_write); + wbio = wbio_init(bio); + wbio->put_bio = true; + /* copy WRITE_SYNC flag */ + wbio->bio.bi_opf = src->bi_opf; + + if (buf) { + bch2_bio_map(bio, buf, output_available); + return bio; + } + + wbio->bounce = true; + + /* + * We can't use mempool for more than c->sb.encoded_extent_max + * worth of pages, but we'd like to allocate more if we can: + */ + bch2_bio_alloc_pages_pool(c, bio, + min_t(unsigned, output_available, + c->opts.encoded_extent_max)); + + if (bio->bi_iter.bi_size < output_available) + *page_alloc_failed = + bch2_bio_alloc_pages(bio, + output_available - + bio->bi_iter.bi_size, + GFP_NOFS) != 0; + + return bio; +} + +static int bch2_write_rechecksum(struct bch_fs *c, + struct bch_write_op *op, + unsigned new_csum_type) +{ + struct bio *bio = &op->wbio.bio; + struct bch_extent_crc_unpacked new_crc; + int ret; + + /* bch2_rechecksum_bio() can't encrypt or decrypt data: */ + + if (bch2_csum_type_is_encryption(op->crc.csum_type) != + bch2_csum_type_is_encryption(new_csum_type)) + new_csum_type = op->crc.csum_type; + + ret = bch2_rechecksum_bio(c, bio, op->version, op->crc, + NULL, &new_crc, + op->crc.offset, op->crc.live_size, + new_csum_type); + if (ret) + return ret; + + bio_advance(bio, op->crc.offset << 9); + bio->bi_iter.bi_size = op->crc.live_size << 9; + op->crc = new_crc; + return 0; +} + +static int bch2_write_decrypt(struct bch_write_op *op) +{ + struct bch_fs *c = op->c; + struct nonce nonce = extent_nonce(op->version, op->crc); + struct bch_csum csum; + int ret; + + if (!bch2_csum_type_is_encryption(op->crc.csum_type)) + return 0; + + /* + * If we need to decrypt data in the write path, we'll no longer be able + * to verify the existing checksum (poly1305 mac, in this case) after + * it's decrypted - this is the last point we'll be able to reverify the + * checksum: + */ + csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio); + if (bch2_crc_cmp(op->crc.csum, csum)) + return -EIO; + + ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio); + op->crc.csum_type = 0; + op->crc.csum = (struct bch_csum) { 0, 0 }; + return ret; +} + +static enum prep_encoded_ret { + PREP_ENCODED_OK, + PREP_ENCODED_ERR, + PREP_ENCODED_CHECKSUM_ERR, + PREP_ENCODED_DO_WRITE, +} bch2_write_prep_encoded_data(struct bch_write_op *op, struct write_point *wp) +{ + struct bch_fs *c = op->c; + struct bio *bio = &op->wbio.bio; + + if (!(op->flags & BCH_WRITE_DATA_ENCODED)) + return PREP_ENCODED_OK; + + BUG_ON(bio_sectors(bio) != op->crc.compressed_size); + + /* Can we just write the entire extent as is? */ + if (op->crc.uncompressed_size == op->crc.live_size && + op->crc.compressed_size <= wp->sectors_free && + (op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) || + op->incompressible)) { + if (!crc_is_compressed(op->crc) && + op->csum_type != op->crc.csum_type && + bch2_write_rechecksum(c, op, op->csum_type) && + !c->opts.no_data_io) + return PREP_ENCODED_CHECKSUM_ERR; + + return PREP_ENCODED_DO_WRITE; + } + + /* + * If the data is compressed and we couldn't write the entire extent as + * is, we have to decompress it: + */ + if (crc_is_compressed(op->crc)) { + struct bch_csum csum; + + if (bch2_write_decrypt(op)) + return PREP_ENCODED_CHECKSUM_ERR; + + /* Last point we can still verify checksum: */ + csum = bch2_checksum_bio(c, op->crc.csum_type, + extent_nonce(op->version, op->crc), + bio); + if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io) + return PREP_ENCODED_CHECKSUM_ERR; + + if (bch2_bio_uncompress_inplace(c, bio, &op->crc)) + return PREP_ENCODED_ERR; + } + + /* + * No longer have compressed data after this point - data might be + * encrypted: + */ + + /* + * If the data is checksummed and we're only writing a subset, + * rechecksum and adjust bio to point to currently live data: + */ + if ((op->crc.live_size != op->crc.uncompressed_size || + op->crc.csum_type != op->csum_type) && + bch2_write_rechecksum(c, op, op->csum_type) && + !c->opts.no_data_io) + return PREP_ENCODED_CHECKSUM_ERR; + + /* + * If we want to compress the data, it has to be decrypted: + */ + if ((op->compression_opt || + bch2_csum_type_is_encryption(op->crc.csum_type) != + bch2_csum_type_is_encryption(op->csum_type)) && + bch2_write_decrypt(op)) + return PREP_ENCODED_CHECKSUM_ERR; + + return PREP_ENCODED_OK; +} + +static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, + struct bio **_dst) +{ + struct bch_fs *c = op->c; + struct bio *src = &op->wbio.bio, *dst = src; + struct bvec_iter saved_iter; + void *ec_buf; + unsigned total_output = 0, total_input = 0; + bool bounce = false; + bool page_alloc_failed = false; + int ret, more = 0; + + BUG_ON(!bio_sectors(src)); + + ec_buf = bch2_writepoint_ec_buf(c, wp); + + switch (bch2_write_prep_encoded_data(op, wp)) { + case PREP_ENCODED_OK: + break; + case PREP_ENCODED_ERR: + ret = -EIO; + goto err; + case PREP_ENCODED_CHECKSUM_ERR: + goto csum_err; + case PREP_ENCODED_DO_WRITE: + /* XXX look for bug here */ + if (ec_buf) { + dst = bch2_write_bio_alloc(c, wp, src, + &page_alloc_failed, + ec_buf); + bio_copy_data(dst, src); + bounce = true; + } + init_append_extent(op, wp, op->version, op->crc); + goto do_write; + } + + if (ec_buf || + op->compression_opt || + (op->csum_type && + !(op->flags & BCH_WRITE_PAGES_STABLE)) || + (bch2_csum_type_is_encryption(op->csum_type) && + !(op->flags & BCH_WRITE_PAGES_OWNED))) { + dst = bch2_write_bio_alloc(c, wp, src, + &page_alloc_failed, + ec_buf); + bounce = true; + } + + saved_iter = dst->bi_iter; + + do { + struct bch_extent_crc_unpacked crc = { 0 }; + struct bversion version = op->version; + size_t dst_len, src_len; + + if (page_alloc_failed && + dst->bi_iter.bi_size < (wp->sectors_free << 9) && + dst->bi_iter.bi_size < c->opts.encoded_extent_max) + break; + + BUG_ON(op->compression_opt && + (op->flags & BCH_WRITE_DATA_ENCODED) && + bch2_csum_type_is_encryption(op->crc.csum_type)); + BUG_ON(op->compression_opt && !bounce); + + crc.compression_type = op->incompressible + ? BCH_COMPRESSION_TYPE_incompressible + : op->compression_opt + ? bch2_bio_compress(c, dst, &dst_len, src, &src_len, + op->compression_opt) + : 0; + if (!crc_is_compressed(crc)) { + dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size); + dst_len = min_t(unsigned, dst_len, wp->sectors_free << 9); + + if (op->csum_type) + dst_len = min_t(unsigned, dst_len, + c->opts.encoded_extent_max); + + if (bounce) { + swap(dst->bi_iter.bi_size, dst_len); + bio_copy_data(dst, src); + swap(dst->bi_iter.bi_size, dst_len); + } + + src_len = dst_len; + } + + BUG_ON(!src_len || !dst_len); + + if (bch2_csum_type_is_encryption(op->csum_type)) { + if (bversion_zero(version)) { + version.lo = atomic64_inc_return(&c->key_version); + } else { + crc.nonce = op->nonce; + op->nonce += src_len >> 9; + } + } + + if ((op->flags & BCH_WRITE_DATA_ENCODED) && + !crc_is_compressed(crc) && + bch2_csum_type_is_encryption(op->crc.csum_type) == + bch2_csum_type_is_encryption(op->csum_type)) { + u8 compression_type = crc.compression_type; + u16 nonce = crc.nonce; + /* + * Note: when we're using rechecksum(), we need to be + * checksumming @src because it has all the data our + * existing checksum covers - if we bounced (because we + * were trying to compress), @dst will only have the + * part of the data the new checksum will cover. + * + * But normally we want to be checksumming post bounce, + * because part of the reason for bouncing is so the + * data can't be modified (by userspace) while it's in + * flight. + */ + if (bch2_rechecksum_bio(c, src, version, op->crc, + &crc, &op->crc, + src_len >> 9, + bio_sectors(src) - (src_len >> 9), + op->csum_type)) + goto csum_err; + /* + * rchecksum_bio sets compression_type on crc from op->crc, + * this isn't always correct as sometimes we're changing + * an extent from uncompressed to incompressible. + */ + crc.compression_type = compression_type; + crc.nonce = nonce; + } else { + if ((op->flags & BCH_WRITE_DATA_ENCODED) && + bch2_rechecksum_bio(c, src, version, op->crc, + NULL, &op->crc, + src_len >> 9, + bio_sectors(src) - (src_len >> 9), + op->crc.csum_type)) + goto csum_err; + + crc.compressed_size = dst_len >> 9; + crc.uncompressed_size = src_len >> 9; + crc.live_size = src_len >> 9; + + swap(dst->bi_iter.bi_size, dst_len); + ret = bch2_encrypt_bio(c, op->csum_type, + extent_nonce(version, crc), dst); + if (ret) + goto err; + + crc.csum = bch2_checksum_bio(c, op->csum_type, + extent_nonce(version, crc), dst); + crc.csum_type = op->csum_type; + swap(dst->bi_iter.bi_size, dst_len); + } + + init_append_extent(op, wp, version, crc); + + if (dst != src) + bio_advance(dst, dst_len); + bio_advance(src, src_len); + total_output += dst_len; + total_input += src_len; + } while (dst->bi_iter.bi_size && + src->bi_iter.bi_size && + wp->sectors_free && + !bch2_keylist_realloc(&op->insert_keys, + op->inline_keys, + ARRAY_SIZE(op->inline_keys), + BKEY_EXTENT_U64s_MAX)); + + more = src->bi_iter.bi_size != 0; + + dst->bi_iter = saved_iter; + + if (dst == src && more) { + BUG_ON(total_output != total_input); + + dst = bio_split(src, total_input >> 9, + GFP_NOFS, &c->bio_write); + wbio_init(dst)->put_bio = true; + /* copy WRITE_SYNC flag */ + dst->bi_opf = src->bi_opf; + } + + dst->bi_iter.bi_size = total_output; +do_write: + *_dst = dst; + return more; +csum_err: + bch_err(c, "error verifying existing checksum while rewriting existing data (memory corruption?)"); + ret = -EIO; +err: + if (to_wbio(dst)->bounce) + bch2_bio_free_pages_pool(c, dst); + if (to_wbio(dst)->put_bio) + bio_put(dst); + + return ret; +} + +static bool bch2_extent_is_writeable(struct bch_write_op *op, + struct bkey_s_c k) +{ + struct bch_fs *c = op->c; + struct bkey_s_c_extent e; + struct extent_ptr_decoded p; + const union bch_extent_entry *entry; + unsigned replicas = 0; + + if (k.k->type != KEY_TYPE_extent) + return false; + + e = bkey_s_c_to_extent(k); + extent_for_each_ptr_decode(e, p, entry) { + if (p.crc.csum_type || + crc_is_compressed(p.crc) || + p.has_ec) + return false; + + replicas += bch2_extent_ptr_durability(c, &p); + } + + return replicas >= op->opts.data_replicas; +} + +static inline void bch2_nocow_write_unlock(struct bch_write_op *op) +{ + struct bch_fs *c = op->c; + const struct bch_extent_ptr *ptr; + struct bkey_i *k; + + for_each_keylist_key(&op->insert_keys, k) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k)); + + bkey_for_each_ptr(ptrs, ptr) + bch2_bucket_nocow_unlock(&c->nocow_locks, + PTR_BUCKET_POS(c, ptr), + BUCKET_NOCOW_LOCK_UPDATE); + } +} + +static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_i *orig, + struct bkey_s_c k, + u64 new_i_size) +{ + struct bkey_i *new; + struct bkey_ptrs ptrs; + struct bch_extent_ptr *ptr; + int ret; + + if (!bch2_extents_match(bkey_i_to_s_c(orig), k)) { + /* trace this */ + return 0; + } + + new = bch2_bkey_make_mut_noupdate(trans, k); + ret = PTR_ERR_OR_ZERO(new); + if (ret) + return ret; + + bch2_cut_front(bkey_start_pos(&orig->k), new); + bch2_cut_back(orig->k.p, new); + + ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); + bkey_for_each_ptr(ptrs, ptr) + ptr->unwritten = 0; + + /* + * Note that we're not calling bch2_subvol_get_snapshot() in this path - + * that was done when we kicked off the write, and here it's important + * that we update the extent that we wrote to - even if a snapshot has + * since been created. The write is still outstanding, so we're ok + * w.r.t. snapshot atomicity: + */ + return bch2_extent_update_i_size_sectors(trans, iter, + min(new->k.p.offset << 9, new_i_size), 0) ?: + bch2_trans_update(trans, iter, new, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); +} + +static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) +{ + struct bch_fs *c = op->c; + struct btree_trans trans; + struct btree_iter iter; + struct bkey_i *orig; + struct bkey_s_c k; + int ret; + + bch2_trans_init(&trans, c, 0, 0); + + for_each_keylist_key(&op->insert_keys, orig) { + ret = for_each_btree_key_upto_commit(&trans, iter, BTREE_ID_extents, + bkey_start_pos(&orig->k), orig->k.p, + BTREE_ITER_INTENT, k, + NULL, NULL, BTREE_INSERT_NOFAIL, ({ + bch2_nocow_write_convert_one_unwritten(&trans, &iter, orig, k, op->new_i_size); + })); + + if (ret && !bch2_err_matches(ret, EROFS)) { + struct bkey_i *k = bch2_keylist_front(&op->insert_keys); + + bch_err_inum_offset_ratelimited(c, + k->k.p.inode, k->k.p.offset << 9, + "write error while doing btree update: %s", + bch2_err_str(ret)); + } + + if (ret) { + op->error = ret; + break; + } + } + + bch2_trans_exit(&trans); +} + +static void __bch2_nocow_write_done(struct bch_write_op *op) +{ + bch2_nocow_write_unlock(op); + + if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) { + op->error = -EIO; + } else if (unlikely(op->flags & BCH_WRITE_CONVERT_UNWRITTEN)) + bch2_nocow_write_convert_unwritten(op); +} + +static void bch2_nocow_write_done(struct closure *cl) +{ + struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); + + __bch2_nocow_write_done(op); + bch2_write_done(cl); +} + +static void bch2_nocow_write(struct bch_write_op *op) +{ + struct bch_fs *c = op->c; + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_ptrs_c ptrs; + const struct bch_extent_ptr *ptr; + struct { + struct bpos b; + unsigned gen; + struct nocow_lock_bucket *l; + } buckets[BCH_REPLICAS_MAX]; + unsigned nr_buckets = 0; + u32 snapshot; + int ret, i; + + if (op->flags & BCH_WRITE_MOVE) + return; + + bch2_trans_init(&trans, c, 0, 0); +retry: + bch2_trans_begin(&trans); + + ret = bch2_subvolume_get_snapshot(&trans, op->subvol, &snapshot); + if (unlikely(ret)) + goto err; + + bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + SPOS(op->pos.inode, op->pos.offset, snapshot), + BTREE_ITER_SLOTS); + while (1) { + struct bio *bio = &op->wbio.bio; + + nr_buckets = 0; + + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + break; + + /* fall back to normal cow write path? */ + if (unlikely(k.k->p.snapshot != snapshot || + !bch2_extent_is_writeable(op, k))) + break; + + if (bch2_keylist_realloc(&op->insert_keys, + op->inline_keys, + ARRAY_SIZE(op->inline_keys), + k.k->u64s)) + break; + + /* Get iorefs before dropping btree locks: */ + ptrs = bch2_bkey_ptrs_c(k); + bkey_for_each_ptr(ptrs, ptr) { + buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr); + buckets[nr_buckets].gen = ptr->gen; + buckets[nr_buckets].l = + bucket_nocow_lock(&c->nocow_locks, + bucket_to_u64(buckets[nr_buckets].b)); + + prefetch(buckets[nr_buckets].l); + + if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE))) + goto err_get_ioref; + + nr_buckets++; + + if (ptr->unwritten) + op->flags |= BCH_WRITE_CONVERT_UNWRITTEN; + } + + /* Unlock before taking nocow locks, doing IO: */ + bkey_reassemble(op->insert_keys.top, k); + bch2_trans_unlock(&trans); + + bch2_cut_front(op->pos, op->insert_keys.top); + if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN) + bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top); + + for (i = 0; i < nr_buckets; i++) { + struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode); + struct nocow_lock_bucket *l = buckets[i].l; + bool stale; + + __bch2_bucket_nocow_lock(&c->nocow_locks, l, + bucket_to_u64(buckets[i].b), + BUCKET_NOCOW_LOCK_UPDATE); + + rcu_read_lock(); + stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen); + rcu_read_unlock(); + + if (unlikely(stale)) + goto err_bucket_stale; + } + + bio = &op->wbio.bio; + if (k.k->p.offset < op->pos.offset + bio_sectors(bio)) { + bio = bio_split(bio, k.k->p.offset - op->pos.offset, + GFP_KERNEL, &c->bio_write); + wbio_init(bio)->put_bio = true; + bio->bi_opf = op->wbio.bio.bi_opf; + } else { + op->flags |= BCH_WRITE_DONE; + } + + op->pos.offset += bio_sectors(bio); + op->written += bio_sectors(bio); + + bio->bi_end_io = bch2_write_endio; + bio->bi_private = &op->cl; + bio->bi_opf |= REQ_OP_WRITE; + closure_get(&op->cl); + bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user, + op->insert_keys.top, true); + + bch2_keylist_push(&op->insert_keys); + if (op->flags & BCH_WRITE_DONE) + break; + bch2_btree_iter_advance(&iter); + } +out: + bch2_trans_iter_exit(&trans, &iter); +err: + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto retry; + + if (ret) { + bch_err_inum_offset_ratelimited(c, + op->pos.inode, + op->pos.offset << 9, + "%s: btree lookup error %s", + __func__, bch2_err_str(ret)); + op->error = ret; + op->flags |= BCH_WRITE_DONE; + } + + bch2_trans_exit(&trans); + + /* fallback to cow write path? */ + if (!(op->flags & BCH_WRITE_DONE)) { + closure_sync(&op->cl); + __bch2_nocow_write_done(op); + op->insert_keys.top = op->insert_keys.keys; + } else if (op->flags & BCH_WRITE_SYNC) { + closure_sync(&op->cl); + bch2_nocow_write_done(&op->cl); + } else { + /* + * XXX + * needs to run out of process context because ei_quota_lock is + * a mutex + */ + continue_at(&op->cl, bch2_nocow_write_done, index_update_wq(op)); + } + return; +err_get_ioref: + for (i = 0; i < nr_buckets; i++) + percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref); + + /* Fall back to COW path: */ + goto out; +err_bucket_stale: + while (--i >= 0) + bch2_bucket_nocow_unlock(&c->nocow_locks, + buckets[i].b, + BUCKET_NOCOW_LOCK_UPDATE); + for (i = 0; i < nr_buckets; i++) + percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref); + + /* We can retry this: */ + ret = -BCH_ERR_transaction_restart; + goto out; +} + +static void __bch2_write(struct bch_write_op *op) +{ + struct bch_fs *c = op->c; + struct write_point *wp = NULL; + struct bio *bio = NULL; + unsigned nofs_flags; + int ret; + + nofs_flags = memalloc_nofs_save(); + + if (unlikely(op->opts.nocow && c->opts.nocow_enabled)) { + bch2_nocow_write(op); + if (op->flags & BCH_WRITE_DONE) + goto out_nofs_restore; + } +again: + memset(&op->failed, 0, sizeof(op->failed)); + + do { + struct bkey_i *key_to_write; + unsigned key_to_write_offset = op->insert_keys.top_p - + op->insert_keys.keys_p; + + /* +1 for possible cache device: */ + if (op->open_buckets.nr + op->nr_replicas + 1 > + ARRAY_SIZE(op->open_buckets.v)) + break; + + if (bch2_keylist_realloc(&op->insert_keys, + op->inline_keys, + ARRAY_SIZE(op->inline_keys), + BKEY_EXTENT_U64s_MAX)) + break; + + /* + * The copygc thread is now global, which means it's no longer + * freeing up space on specific disks, which means that + * allocations for specific disks may hang arbitrarily long: + */ + ret = bch2_trans_do(c, NULL, NULL, 0, + bch2_alloc_sectors_start_trans(&trans, + op->target, + op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED), + op->write_point, + &op->devs_have, + op->nr_replicas, + op->nr_replicas_required, + op->watermark, + op->flags, + (op->flags & (BCH_WRITE_ALLOC_NOWAIT| + BCH_WRITE_ONLY_SPECIFIED_DEVS)) + ? NULL : &op->cl, &wp)); + if (unlikely(ret)) { + if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) + break; + + goto err; + } + + EBUG_ON(!wp); + + bch2_open_bucket_get(c, wp, &op->open_buckets); + ret = bch2_write_extent(op, wp, &bio); + + bch2_alloc_sectors_done_inlined(c, wp); +err: + if (ret <= 0) { + op->flags |= BCH_WRITE_DONE; + + if (ret < 0) { + op->error = ret; + break; + } + } + + bio->bi_end_io = bch2_write_endio; + bio->bi_private = &op->cl; + bio->bi_opf |= REQ_OP_WRITE; + + closure_get(bio->bi_private); + + key_to_write = (void *) (op->insert_keys.keys_p + + key_to_write_offset); + + bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user, + key_to_write, false); + } while (ret); + + /* + * Sync or no? + * + * If we're running asynchronously, wne may still want to block + * synchronously here if we weren't able to submit all of the IO at + * once, as that signals backpressure to the caller. + */ + if ((op->flags & BCH_WRITE_SYNC) || + (!(op->flags & BCH_WRITE_DONE) && + !(op->flags & BCH_WRITE_IN_WORKER))) { + closure_sync(&op->cl); + __bch2_write_index(op); + + if (!(op->flags & BCH_WRITE_DONE)) + goto again; + bch2_write_done(&op->cl); + } else { + bch2_write_queue(op, wp); + continue_at(&op->cl, bch2_write_index, NULL); + } +out_nofs_restore: + memalloc_nofs_restore(nofs_flags); +} + +static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) +{ + struct bio *bio = &op->wbio.bio; + struct bvec_iter iter; + struct bkey_i_inline_data *id; + unsigned sectors; + int ret; + + op->flags |= BCH_WRITE_WROTE_DATA_INLINE; + op->flags |= BCH_WRITE_DONE; + + bch2_check_set_feature(op->c, BCH_FEATURE_inline_data); + + ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys, + ARRAY_SIZE(op->inline_keys), + BKEY_U64s + DIV_ROUND_UP(data_len, 8)); + if (ret) { + op->error = ret; + goto err; + } + + sectors = bio_sectors(bio); + op->pos.offset += sectors; + + id = bkey_inline_data_init(op->insert_keys.top); + id->k.p = op->pos; + id->k.version = op->version; + id->k.size = sectors; + + iter = bio->bi_iter; + iter.bi_size = data_len; + memcpy_from_bio(id->v.data, bio, iter); + + while (data_len & 7) + id->v.data[data_len++] = '\0'; + set_bkey_val_bytes(&id->k, data_len); + bch2_keylist_push(&op->insert_keys); + + __bch2_write_index(op); +err: + bch2_write_done(&op->cl); +} + +/** + * bch_write - handle a write to a cache device or flash only volume + * + * This is the starting point for any data to end up in a cache device; it could + * be from a normal write, or a writeback write, or a write to a flash only + * volume - it's also used by the moving garbage collector to compact data in + * mostly empty buckets. + * + * It first writes the data to the cache, creating a list of keys to be inserted + * (if the data won't fit in a single open bucket, there will be multiple keys); + * after the data is written it calls bch_journal, and after the keys have been + * added to the next journal write they're inserted into the btree. + * + * If op->discard is true, instead of inserting the data it invalidates the + * region of the cache represented by op->bio and op->inode. + */ +void bch2_write(struct closure *cl) +{ + struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); + struct bio *bio = &op->wbio.bio; + struct bch_fs *c = op->c; + unsigned data_len; + + EBUG_ON(op->cl.parent); + BUG_ON(!op->nr_replicas); + BUG_ON(!op->write_point.v); + BUG_ON(bkey_eq(op->pos, POS_MAX)); + + op->start_time = local_clock(); + bch2_keylist_init(&op->insert_keys, op->inline_keys); + wbio_init(bio)->put_bio = false; + + if (bio->bi_iter.bi_size & (c->opts.block_size - 1)) { + bch_err_inum_offset_ratelimited(c, + op->pos.inode, + op->pos.offset << 9, + "misaligned write"); + op->error = -EIO; + goto err; + } + + if (c->opts.nochanges) { + op->error = -BCH_ERR_erofs_no_writes; + goto err; + } + + if (!(op->flags & BCH_WRITE_MOVE) && + !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) { + op->error = -BCH_ERR_erofs_no_writes; + goto err; + } + + this_cpu_add(c->counters[BCH_COUNTER_io_write], bio_sectors(bio)); + bch2_increment_clock(c, bio_sectors(bio), WRITE); + + data_len = min_t(u64, bio->bi_iter.bi_size, + op->new_i_size - (op->pos.offset << 9)); + + if (c->opts.inline_data && + data_len <= min(block_bytes(c) / 2, 1024U)) { + bch2_write_data_inline(op, data_len); + return; + } + + __bch2_write(op); + return; +err: + bch2_disk_reservation_put(c, &op->res); + + closure_debug_destroy(&op->cl); + if (op->end_io) + op->end_io(op); +} + +static const char * const bch2_write_flags[] = { +#define x(f) #f, + BCH_WRITE_FLAGS() +#undef x + NULL +}; + +void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op) +{ + prt_str(out, "pos: "); + bch2_bpos_to_text(out, op->pos); + prt_newline(out); + printbuf_indent_add(out, 2); + + prt_str(out, "started: "); + bch2_pr_time_units(out, local_clock() - op->start_time); + prt_newline(out); + + prt_str(out, "flags: "); + prt_bitflags(out, bch2_write_flags, op->flags); + prt_newline(out); + + prt_printf(out, "ref: %u", closure_nr_remaining(&op->cl)); + prt_newline(out); + + printbuf_indent_sub(out, 2); +} + +void bch2_fs_io_write_exit(struct bch_fs *c) +{ + mempool_exit(&c->bio_bounce_pages); + bioset_exit(&c->bio_write); +} + +int bch2_fs_io_write_init(struct bch_fs *c) +{ + if (bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio), + BIOSET_NEED_BVECS)) + return -BCH_ERR_ENOMEM_bio_write_init; + + if (mempool_init_page_pool(&c->bio_bounce_pages, + max_t(unsigned, + c->opts.btree_node_size, + c->opts.encoded_extent_max) / + PAGE_SIZE, 0)) + return -BCH_ERR_ENOMEM_bio_bounce_pages_init; + + return 0; +} diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h new file mode 100644 index 000000000000..9323167229ee --- /dev/null +++ b/fs/bcachefs/io_write.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_IO_WRITE_H +#define _BCACHEFS_IO_WRITE_H + +#include "checksum.h" +#include "io_write_types.h" + +#define to_wbio(_bio) \ + container_of((_bio), struct bch_write_bio, bio) + +void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *); +void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t); + +#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT +void bch2_latency_acct(struct bch_dev *, u64, int); +#else +static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) {} +#endif + +void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *, + enum bch_data_type, const struct bkey_i *, bool); + +#define BCH_WRITE_FLAGS() \ + x(ALLOC_NOWAIT) \ + x(CACHED) \ + x(DATA_ENCODED) \ + x(PAGES_STABLE) \ + x(PAGES_OWNED) \ + x(ONLY_SPECIFIED_DEVS) \ + x(WROTE_DATA_INLINE) \ + x(FROM_INTERNAL) \ + x(CHECK_ENOSPC) \ + x(SYNC) \ + x(MOVE) \ + x(IN_WORKER) \ + x(DONE) \ + x(IO_ERROR) \ + x(CONVERT_UNWRITTEN) + +enum __bch_write_flags { +#define x(f) __BCH_WRITE_##f, + BCH_WRITE_FLAGS() +#undef x +}; + +enum bch_write_flags { +#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f), + BCH_WRITE_FLAGS() +#undef x +}; + +static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op) +{ + return op->watermark == BCH_WATERMARK_copygc + ? op->c->copygc_wq + : op->c->btree_update_wq; +} + +int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *, + struct bkey_i *, bool *, s64 *, s64 *); +int bch2_extent_update(struct btree_trans *, subvol_inum, + struct btree_iter *, struct bkey_i *, + struct disk_reservation *, u64, s64 *, bool); + +static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c, + struct bch_io_opts opts) +{ + op->c = c; + op->end_io = NULL; + op->flags = 0; + op->written = 0; + op->error = 0; + op->csum_type = bch2_data_checksum_type(c, opts); + op->compression_opt = opts.compression; + op->nr_replicas = 0; + op->nr_replicas_required = c->opts.data_replicas_required; + op->watermark = BCH_WATERMARK_normal; + op->incompressible = 0; + op->open_buckets.nr = 0; + op->devs_have.nr = 0; + op->target = 0; + op->opts = opts; + op->subvol = 0; + op->pos = POS_MAX; + op->version = ZERO_VERSION; + op->write_point = (struct write_point_specifier) { 0 }; + op->res = (struct disk_reservation) { 0 }; + op->new_i_size = U64_MAX; + op->i_sectors_delta = 0; + op->devs_need_flush = NULL; +} + +void bch2_write(struct closure *); + +void bch2_write_point_do_index_updates(struct work_struct *); + +static inline struct bch_write_bio *wbio_init(struct bio *bio) +{ + struct bch_write_bio *wbio = to_wbio(bio); + + memset(&wbio->wbio, 0, sizeof(wbio->wbio)); + return wbio; +} + +void bch2_write_op_to_text(struct printbuf *, struct bch_write_op *); + +void bch2_fs_io_write_exit(struct bch_fs *); +int bch2_fs_io_write_init(struct bch_fs *); + +#endif /* _BCACHEFS_IO_WRITE_H */ diff --git a/fs/bcachefs/io_write_types.h b/fs/bcachefs/io_write_types.h new file mode 100644 index 000000000000..c7f97c2c4805 --- /dev/null +++ b/fs/bcachefs/io_write_types.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_IO_WRITE_TYPES_H +#define _BCACHEFS_IO_WRITE_TYPES_H + +#include "alloc_types.h" +#include "btree_types.h" +#include "buckets_types.h" +#include "extents_types.h" +#include "keylist_types.h" +#include "opts.h" +#include "super_types.h" + +#include +#include + +struct bch_write_bio { + struct_group(wbio, + struct bch_fs *c; + struct bch_write_bio *parent; + + u64 submit_time; + u64 inode_offset; + + struct bch_devs_list failed; + u8 dev; + + unsigned split:1, + bounce:1, + put_bio:1, + have_ioref:1, + nocow:1, + used_mempool:1, + first_btree_write:1; + ); + + struct bio bio; +}; + +struct bch_write_op { + struct closure cl; + struct bch_fs *c; + void (*end_io)(struct bch_write_op *); + u64 start_time; + + unsigned written; /* sectors */ + u16 flags; + s16 error; /* dio write path expects it to hold -ERESTARTSYS... */ + + unsigned compression_opt:8; + unsigned csum_type:4; + unsigned nr_replicas:4; + unsigned nr_replicas_required:4; + unsigned watermark:3; + unsigned incompressible:1; + unsigned stripe_waited:1; + + struct bch_devs_list devs_have; + u16 target; + u16 nonce; + struct bch_io_opts opts; + + u32 subvol; + struct bpos pos; + struct bversion version; + + /* For BCH_WRITE_DATA_ENCODED: */ + struct bch_extent_crc_unpacked crc; + + struct write_point_specifier write_point; + + struct write_point *wp; + struct list_head wp_list; + + struct disk_reservation res; + + struct open_buckets open_buckets; + + u64 new_i_size; + s64 i_sectors_delta; + + struct bch_devs_mask failed; + + struct keylist insert_keys; + u64 inline_keys[BKEY_EXTENT_U64s_MAX * 2]; + + /* + * Bitmask of devices that have had nocow writes issued to them since + * last flush: + */ + struct bch_devs_mask *devs_need_flush; + + /* Must be last: */ + struct bch_write_bio wbio; +}; + +#endif /* _BCACHEFS_IO_WRITE_TYPES_H */ diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 34740dca4b15..0e606009dc46 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -8,7 +8,6 @@ #include "checksum.h" #include "disk_groups.h" #include "error.h" -#include "io.h" #include "journal.h" #include "journal_io.h" #include "journal_reclaim.h" diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 3d7c5b919421..4746dfa7af97 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -10,7 +10,7 @@ #include "buckets.h" #include "errcode.h" #include "extents.h" -#include "io.h" +#include "io_write.h" #include "journal.h" #include "keylist.h" #include "migrate.h" diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index fb76a1dac74e..ac4df53bfde2 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -14,7 +14,8 @@ #include "errcode.h" #include "error.h" #include "inode.h" -#include "io.h" +#include "io_read.h" +#include "io_write.h" #include "journal_reclaim.h" #include "keylist.h" #include "move.h" diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index c3136abe8587..cbdd58db8782 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -2,6 +2,7 @@ #ifndef _BCACHEFS_MOVE_H #define _BCACHEFS_MOVE_H +#include "bcachefs_ioctl.h" #include "btree_iter.h" #include "buckets.h" #include "data_update.h" diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index ac658e99bf57..2371fd61ea58 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -13,25 +13,17 @@ #include "btree_write_buffer.h" #include "buckets.h" #include "clock.h" -#include "disk_groups.h" #include "errcode.h" #include "error.h" -#include "extents.h" -#include "eytzinger.h" -#include "io.h" -#include "keylist.h" #include "lru.h" #include "move.h" #include "movinggc.h" -#include "super-io.h" #include "trace.h" -#include #include #include #include #include -#include #include struct buckets_in_flight { diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 016cf0834b3d..568f1e8e7507 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -8,8 +8,6 @@ #include "compress.h" #include "disk_groups.h" #include "errcode.h" -#include "extents.h" -#include "io.h" #include "move.h" #include "rebalance.h" #include "super-io.h" diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 39f711d5069e..f155428ff395 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -5,9 +5,11 @@ #include "buckets.h" #include "extents.h" #include "inode.h" -#include "io.h" +#include "io_misc.h" +#include "io_write.h" #include "reflink.h" #include "subvolume.h" +#include "super-io.h" #include diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index f01883e785a5..5a1115396edc 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -6,7 +6,6 @@ #include "disk_groups.h" #include "ec.h" #include "error.h" -#include "io.h" #include "journal.h" #include "journal_sb.h" #include "journal_seq_blacklist.h" diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 7cfc04947717..55176023f15b 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -35,7 +35,8 @@ #include "fs-io-direct.h" #include "fsck.h" #include "inode.h" -#include "io.h" +#include "io_read.h" +#include "io_write.h" #include "journal.h" #include "journal_reclaim.h" #include "journal_seq_blacklist.h" @@ -483,7 +484,8 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_fs_fsio_exit(c); bch2_fs_ec_exit(c); bch2_fs_encryption_exit(c); - bch2_fs_io_exit(c); + bch2_fs_io_write_exit(c); + bch2_fs_io_read_exit(c); bch2_fs_buckets_waiting_for_journal_exit(c); bch2_fs_btree_interior_update_exit(c); bch2_fs_btree_iter_exit(c); @@ -848,7 +850,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_buckets_waiting_for_journal_init(c) ?: bch2_fs_btree_write_buffer_init(c) ?: bch2_fs_subvolumes_init(c) ?: - bch2_fs_io_init(c) ?: + bch2_fs_io_read_init(c) ?: + bch2_fs_io_write_init(c) ?: bch2_fs_nocow_locking_init(c) ?: bch2_fs_encryption_init(c) ?: bch2_fs_compress_init(c) ?: -- cgit v1.2.3 From 5902cc283c060f0a006ee9b2f2a64855a09399b4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 4 Sep 2023 05:38:30 -0400 Subject: bcachefs: New io_misc.c helpers This pulls the non vfs specific parts of truncate and finsert/fcollapse out of fs-io.c, and moves them to io_misc.c. This is prep work for logging these operations, to make them atomic in the event of a crash. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 209 +++++----------------------------------------- fs/bcachefs/fs-io.h | 2 +- fs/bcachefs/fs.c | 2 +- fs/bcachefs/io_misc.c | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/bcachefs/io_misc.h | 3 + 5 files changed, 250 insertions(+), 192 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 0b0b3b0d6c7d..b36513eb3d16 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -391,33 +391,12 @@ static int bch2_extend(struct mnt_idmap *idmap, return bch2_setattr_nonsize(idmap, inode, iattr); } -static int bch2_truncate_finish_fn(struct btree_trans *trans, - struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY; - return 0; -} - -static int bch2_truncate_start_fn(struct btree_trans *trans, - struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, void *p) -{ - u64 *new_i_size = p; - - bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY; - bi->bi_size = *new_i_size; - return 0; -} - -int bch2_truncate(struct mnt_idmap *idmap, +int bchfs_truncate(struct mnt_idmap *idmap, struct bch_inode_info *inode, struct iattr *iattr) { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; struct bch_inode_unpacked inode_u; - u64 new_i_size = iattr->ia_size; s64 i_sectors_delta = 0; int ret = 0; @@ -466,6 +445,8 @@ int bch2_truncate(struct mnt_idmap *idmap, if (unlikely(ret < 0)) goto err; + truncate_setsize(&inode->v, iattr->ia_size); + /* * When extending, we're going to write the new i_size to disk * immediately so we need to flush anything above the current on disk @@ -487,32 +468,22 @@ int bch2_truncate(struct mnt_idmap *idmap, if (ret) goto err; - mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode(c, inode, bch2_truncate_start_fn, - &new_i_size, 0); - mutex_unlock(&inode->ei_update_lock); + ret = bch2_truncate(c, inode_inum(inode), iattr->ia_size, &i_sectors_delta); + bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); - if (unlikely(ret)) + if (unlikely(ret)) { + /* + * If we error here, VFS caches are now inconsistent with btree + */ + set_bit(EI_INODE_ERROR, &inode->ei_flags); goto err; - - truncate_setsize(&inode->v, iattr->ia_size); - - ret = bch2_fpunch(c, inode_inum(inode), - round_up(iattr->ia_size, block_bytes(c)) >> 9, - U64_MAX, &i_sectors_delta); - bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); + } bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && !bch2_journal_error(&c->journal), c, "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", inode->v.i_ino, (u64) inode->v.i_blocks, inode->ei_inode.bi_sectors); - if (unlikely(ret)) - goto err; - - mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, 0); - mutex_unlock(&inode->ei_update_lock); ret = bch2_setattr_nonsize(idmap, inode, iattr); err: @@ -577,175 +548,33 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; - struct bkey_buf copy; - struct btree_trans trans; - struct btree_iter src, dst, del; - loff_t shift, new_size; - u64 src_start; + s64 i_sectors_delta = 0; int ret = 0; if ((offset | len) & (block_bytes(c) - 1)) return -EINVAL; if (insert) { - if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len) - return -EFBIG; - if (offset >= inode->v.i_size) return -EINVAL; - - src_start = U64_MAX; - shift = len; } else { if (offset + len >= inode->v.i_size) return -EINVAL; - - src_start = offset + len; - shift = -len; } - new_size = inode->v.i_size + shift; - ret = bch2_write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); if (ret) return ret; - if (insert) { - i_size_write(&inode->v, new_size); - mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode_size(c, inode, new_size, - ATTR_MTIME|ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); - } else { - s64 i_sectors_delta = 0; - - ret = bch2_fpunch(c, inode_inum(inode), - offset >> 9, (offset + len) >> 9, - &i_sectors_delta); - bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); + if (insert) + i_size_write(&inode->v, inode->v.i_size + len); - if (ret) - return ret; - } - - bch2_bkey_buf_init(©); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - bch2_trans_iter_init(&trans, &src, BTREE_ID_extents, - POS(inode->v.i_ino, src_start >> 9), - BTREE_ITER_INTENT); - bch2_trans_copy_iter(&dst, &src); - bch2_trans_copy_iter(&del, &src); - - while (ret == 0 || - bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - struct disk_reservation disk_res = - bch2_disk_reservation_init(c, 0); - struct bkey_i delete; - struct bkey_s_c k; - struct bpos next_pos; - struct bpos move_pos = POS(inode->v.i_ino, offset >> 9); - struct bpos atomic_end; - unsigned trigger_flags = 0; - u32 snapshot; - - bch2_trans_begin(&trans); - - ret = bch2_subvolume_get_snapshot(&trans, - inode->ei_subvol, &snapshot); - if (ret) - continue; - - bch2_btree_iter_set_snapshot(&src, snapshot); - bch2_btree_iter_set_snapshot(&dst, snapshot); - bch2_btree_iter_set_snapshot(&del, snapshot); - - bch2_trans_begin(&trans); - - k = insert - ? bch2_btree_iter_peek_prev(&src) - : bch2_btree_iter_peek_upto(&src, POS(inode->v.i_ino, U64_MAX)); - if ((ret = bkey_err(k))) - continue; - - if (!k.k || k.k->p.inode != inode->v.i_ino) - break; - - if (insert && - bkey_le(k.k->p, POS(inode->v.i_ino, offset >> 9))) - break; -reassemble: - bch2_bkey_buf_reassemble(©, c, k); - - if (insert && - bkey_lt(bkey_start_pos(k.k), move_pos)) - bch2_cut_front(move_pos, copy.k); - - copy.k->k.p.offset += shift >> 9; - bch2_btree_iter_set_pos(&dst, bkey_start_pos(©.k->k)); - - ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end); - if (ret) - continue; - - if (!bkey_eq(atomic_end, copy.k->k.p)) { - if (insert) { - move_pos = atomic_end; - move_pos.offset -= shift >> 9; - goto reassemble; - } else { - bch2_cut_back(atomic_end, copy.k); - } - } - - bkey_init(&delete.k); - delete.k.p = copy.k->k.p; - delete.k.size = copy.k->k.size; - delete.k.p.offset -= shift >> 9; - bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k)); - - next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; - - if (copy.k->k.size != k.k->size) { - /* We might end up splitting compressed extents: */ - unsigned nr_ptrs = - bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k)); - - ret = bch2_disk_reservation_get(c, &disk_res, - copy.k->k.size, nr_ptrs, - BCH_DISK_RESERVATION_NOFAIL); - BUG_ON(ret); - } - - ret = bch2_btree_iter_traverse(&del) ?: - bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: - bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: - bch2_trans_commit(&trans, &disk_res, NULL, - BTREE_INSERT_NOFAIL); - bch2_disk_reservation_put(c, &disk_res); - - if (!ret) - bch2_btree_iter_set_pos(&src, next_pos); - } - bch2_trans_iter_exit(&trans, &del); - bch2_trans_iter_exit(&trans, &dst); - bch2_trans_iter_exit(&trans, &src); - bch2_trans_exit(&trans); - bch2_bkey_buf_exit(©, c); - - if (ret) - return ret; + ret = bch2_fcollapse_finsert(c, inode_inum(inode), offset >> 9, len >> 9, + insert, &i_sectors_delta); + if (!ret && !insert) + i_size_write(&inode->v, inode->v.i_size - len); + bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); - mutex_lock(&inode->ei_update_lock); - if (!insert) { - i_size_write(&inode->v, new_size); - ret = bch2_write_inode_size(c, inode, new_size, - ATTR_MTIME|ATTR_CTIME); - } else { - /* We need an inode update to update bi_journal_seq for fsync: */ - ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, - ATTR_MTIME|ATTR_CTIME); - } - mutex_unlock(&inode->ei_update_lock); return ret; } diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h index bc6e8439d40b..ca70346e68dc 100644 --- a/fs/bcachefs/fs-io.h +++ b/fs/bcachefs/fs-io.h @@ -165,7 +165,7 @@ int __must_check bch2_write_inode_size(struct bch_fs *, int bch2_fsync(struct file *, loff_t, loff_t, int); -int bch2_truncate(struct mnt_idmap *, +int bchfs_truncate(struct mnt_idmap *, struct bch_inode_info *, struct iattr *); long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 0648874d54f3..0def3a57bd6d 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -798,7 +798,7 @@ static int bch2_setattr(struct mnt_idmap *idmap, return ret; return iattr->ia_valid & ATTR_SIZE - ? bch2_truncate(idmap, inode, iattr) + ? bchfs_truncate(idmap, inode, iattr) : bch2_setattr_nonsize(idmap, inode, iattr); } diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index c04e5dacfc8d..1afea613df4a 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -9,7 +9,10 @@ #include "btree_update.h" #include "buckets.h" #include "clock.h" +#include "error.h" #include "extents.h" +#include "extent_update.h" +#include "inode.h" #include "io_misc.h" #include "io_write.h" #include "subvolume.h" @@ -213,3 +216,226 @@ int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, return ret; } + +static int truncate_set_isize(struct btree_trans *trans, + subvol_inum inum, + u64 new_i_size) +{ + struct btree_iter iter = { NULL }; + struct bch_inode_unpacked inode_u; + int ret; + + ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT) ?: + (inode_u.bi_size = new_i_size, 0) ?: + bch2_inode_write(trans, &iter, &inode_u); + + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sectors_delta) +{ + struct btree_trans trans; + struct btree_iter fpunch_iter; + int ret; + + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); + bch2_trans_iter_init(&trans, &fpunch_iter, BTREE_ID_extents, + POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9), + BTREE_ITER_INTENT); + + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, + truncate_set_isize(&trans, inum, new_i_size)); + if (ret) + goto err; + + ret = bch2_fpunch_at(&trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + ret = 0; + if (ret) + goto err; +err: + bch2_trans_iter_exit(&trans, &fpunch_iter); + bch2_trans_exit(&trans); + + bch2_fs_fatal_err_on(ret, c, "%s: error truncating %u:%llu: %s", + __func__, inum.subvol, inum.inum, bch2_err_str(ret)); + return ret; +} + +static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len) +{ + struct btree_iter iter; + struct bch_inode_unpacked inode_u; + int ret; + + offset <<= 9; + len <<= 9; + + ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT); + if (ret) + return ret; + + if (len > 0) { + if (MAX_LFS_FILESIZE - inode_u.bi_size < len) { + ret = -EFBIG; + goto err; + } + + if (offset >= inode_u.bi_size) { + ret = -EINVAL; + goto err; + } + } + + inode_u.bi_size += len; + inode_u.bi_mtime = inode_u.bi_ctime = bch2_current_time(trans->c); + + ret = bch2_inode_write(trans, &iter, &inode_u); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, + u64 offset, u64 len, bool insert, + s64 *i_sectors_delta) +{ + struct bkey_buf copy; + struct btree_trans trans; + struct btree_iter src = { NULL }, dst = { NULL }, del = { NULL }; + s64 shift = insert ? len : -len; + int ret = 0; + + bch2_bkey_buf_init(©); + bch2_trans_init(&trans, c, 0, 1024); + + bch2_trans_iter_init(&trans, &src, BTREE_ID_extents, + POS(inum.inum, U64_MAX), + BTREE_ITER_INTENT); + bch2_trans_copy_iter(&dst, &src); + bch2_trans_copy_iter(&del, &src); + + if (insert) { + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, + adjust_i_size(&trans, inum, offset, len)); + if (ret) + goto err; + } else { + bch2_btree_iter_set_pos(&src, POS(inum.inum, offset)); + + ret = bch2_fpunch_at(&trans, &src, inum, offset + len, i_sectors_delta); + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto err; + + bch2_btree_iter_set_pos(&src, POS(inum.inum, offset + len)); + } + + while (ret == 0 || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + struct disk_reservation disk_res = + bch2_disk_reservation_init(c, 0); + struct bkey_i delete; + struct bkey_s_c k; + struct bpos next_pos; + struct bpos move_pos = POS(inum.inum, offset); + struct bpos atomic_end; + unsigned trigger_flags = 0; + u32 snapshot; + + bch2_trans_begin(&trans); + + ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + if (ret) + continue; + + bch2_btree_iter_set_snapshot(&src, snapshot); + bch2_btree_iter_set_snapshot(&dst, snapshot); + bch2_btree_iter_set_snapshot(&del, snapshot); + + bch2_trans_begin(&trans); + + k = insert + ? bch2_btree_iter_peek_prev(&src) + : bch2_btree_iter_peek_upto(&src, POS(inum.inum, U64_MAX)); + if ((ret = bkey_err(k))) + continue; + + if (!k.k || k.k->p.inode != inum.inum) + break; + + if (insert && + bkey_le(k.k->p, POS(inum.inum, offset))) + break; +reassemble: + bch2_bkey_buf_reassemble(©, c, k); + + if (insert && + bkey_lt(bkey_start_pos(k.k), move_pos)) + bch2_cut_front(move_pos, copy.k); + + copy.k->k.p.offset += shift; + bch2_btree_iter_set_pos(&dst, bkey_start_pos(©.k->k)); + + ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end); + if (ret) + continue; + + if (!bkey_eq(atomic_end, copy.k->k.p)) { + if (insert) { + move_pos = atomic_end; + move_pos.offset -= shift; + goto reassemble; + } else { + bch2_cut_back(atomic_end, copy.k); + } + } + + bkey_init(&delete.k); + delete.k.p = copy.k->k.p; + delete.k.size = copy.k->k.size; + delete.k.p.offset -= shift; + bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k)); + + next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p; + + if (copy.k->k.size != k.k->size) { + /* We might end up splitting compressed extents: */ + unsigned nr_ptrs = + bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k)); + + ret = bch2_disk_reservation_get(c, &disk_res, + copy.k->k.size, nr_ptrs, + BCH_DISK_RESERVATION_NOFAIL); + BUG_ON(ret); + } + + ret = bch2_btree_iter_traverse(&del) ?: + bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: + bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: + bch2_trans_commit(&trans, &disk_res, NULL, + BTREE_INSERT_NOFAIL); + bch2_disk_reservation_put(c, &disk_res); + + if (!ret) + bch2_btree_iter_set_pos(&src, next_pos); + } + + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto err; + + if (!insert) { + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, + adjust_i_size(&trans, inum, offset, -len)); + } else { + /* We need an inode update to update bi_journal_seq for fsync: */ + ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, + adjust_i_size(&trans, inum, 0, 0)); + } +err: + bch2_trans_iter_exit(&trans, &del); + bch2_trans_iter_exit(&trans, &dst); + bch2_trans_iter_exit(&trans, &src); + bch2_trans_exit(&trans); + bch2_bkey_buf_exit(©, c); + return ret; +} diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h index 46e9ce3251d6..894a7a04ba4b 100644 --- a/fs/bcachefs/io_misc.h +++ b/fs/bcachefs/io_misc.h @@ -9,4 +9,7 @@ int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, subvol_inum, u64, s64 *); int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); +int bch2_truncate(struct bch_fs *, subvol_inum, u64, u64 *); +int bch2_fcollapse_finsert(struct bch_fs *, subvol_inum, u64, u64, bool, s64 *); + #endif /* _BCACHEFS_IO_MISC_H */ -- cgit v1.2.3 From 96dea3d599dbc31f59eb786af2ac5079122beb88 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 12 Sep 2023 18:41:22 -0400 Subject: bcachefs: Fix W=12 build errors Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 17 ++++----- fs/bcachefs/alloc_foreground.c | 15 +++++--- fs/bcachefs/backpointers.c | 5 +-- fs/bcachefs/bcachefs.h | 2 +- fs/bcachefs/bcachefs_format.h | 15 ++++---- fs/bcachefs/bkey.c | 31 ++++++++++----- fs/bcachefs/bkey_methods.c | 6 ++- fs/bcachefs/bset.c | 12 ++++-- fs/bcachefs/btree_cache.c | 15 ++++++-- fs/bcachefs/btree_gc.c | 26 ++++++------- fs/bcachefs/btree_io.c | 36 +++++------------- fs/bcachefs/btree_iter.c | 37 +++++++++++++----- fs/bcachefs/btree_iter.h | 54 +++++++++++++------------- fs/bcachefs/btree_key_cache.c | 2 - fs/bcachefs/btree_trans_commit.c | 10 ++--- fs/bcachefs/btree_types.h | 34 ++++++++--------- fs/bcachefs/btree_update.c | 20 ++++++---- fs/bcachefs/btree_update.h | 6 +-- fs/bcachefs/btree_update_interior.c | 58 +++++++++++++--------------- fs/bcachefs/checksum.c | 12 +++--- fs/bcachefs/checksum.h | 5 +-- fs/bcachefs/compress.c | 7 +--- fs/bcachefs/data_update.c | 4 -- fs/bcachefs/debug.c | 6 +-- fs/bcachefs/disk_groups.c | 12 +++--- fs/bcachefs/errcode.c | 2 - fs/bcachefs/fs-io-buffered.c | 75 ++++++++++++++++++------------------- fs/bcachefs/fs-io-pagecache.c | 23 +++++++----- fs/bcachefs/fs-ioctl.h | 6 +-- fs/bcachefs/fs.c | 2 +- fs/bcachefs/fsck.c | 21 +++++++---- fs/bcachefs/inode.c | 3 +- fs/bcachefs/io_write.c | 14 ++++--- fs/bcachefs/journal.c | 9 ++++- fs/bcachefs/journal_io.c | 27 +++++++------ fs/bcachefs/journal_reclaim.c | 11 ++++-- fs/bcachefs/move.c | 1 - fs/bcachefs/movinggc.c | 26 ++++++------- fs/bcachefs/opts.h | 2 +- fs/bcachefs/printbuf.c | 66 ++++++++++++++++++-------------- fs/bcachefs/recovery.c | 16 ++------ fs/bcachefs/reflink.c | 4 ++ fs/bcachefs/six.c | 1 - fs/bcachefs/snapshot.c | 10 ++--- fs/bcachefs/super-io.c | 4 +- fs/bcachefs/super.c | 2 +- fs/bcachefs/sysfs.c | 30 +-------------- fs/bcachefs/tests.c | 57 +++++++++++++--------------- fs/bcachefs/util.c | 21 +++++------ fs/bcachefs/util.h | 6 +-- fs/bcachefs/varint.c | 24 +++++++----- 51 files changed, 459 insertions(+), 451 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index ee21aeece39e..4eab7e59ae93 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1200,15 +1200,15 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, } if (need_update) { - struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(g)); + struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g)); - ret = PTR_ERR_OR_ZERO(k); + ret = PTR_ERR_OR_ZERO(u); if (ret) goto err; - memcpy(k, &g, sizeof(g)); + memcpy(u, &g, sizeof(g)); - ret = bch2_trans_update(trans, bucket_gens_iter, k, 0); + ret = bch2_trans_update(trans, bucket_gens_iter, u, 0); if (ret) goto err; } @@ -1354,15 +1354,14 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, } if (need_update) { - struct bkey_i *k; + struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g)); - k = bch2_trans_kmalloc(trans, sizeof(g)); - ret = PTR_ERR_OR_ZERO(k); + ret = PTR_ERR_OR_ZERO(u); if (ret) goto out; - memcpy(k, &g, sizeof(g)); - ret = bch2_trans_update(trans, iter, k, 0); + memcpy(u, &g, sizeof(g)); + ret = bch2_trans_update(trans, iter, u, 0); } out: fsck_err: diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 8e1888a89011..e73b6c82870a 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -502,9 +502,14 @@ again: } /** - * bch_bucket_alloc - allocate a single bucket from a specific device + * bch2_bucket_alloc_trans - allocate a single bucket from a specific device + * @trans: transaction object + * @ca: device to allocate from + * @watermark: how important is this allocation? + * @cl: if not NULL, closure to be used to wait if buckets not available + * @usage: for secondarily also returning the current device usage * - * Returns index of bucket on success, 0 on failure + * Returns: an open_bucket on success, or an ERR_PTR() on failure. */ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, struct bch_dev *ca, @@ -775,7 +780,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, struct dev_alloc_list devs_sorted; struct ec_stripe_head *h; struct open_bucket *ob; - struct bch_dev *ca; unsigned i, ec_idx; int ret = 0; @@ -805,8 +809,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, } goto out_put_head; got_bucket: - ca = bch_dev_bkey_exists(c, ob->dev); - ob->ec_idx = ec_idx; ob->ec = h->s; ec_stripe_new_get(h->s, STRIPE_REF_io); @@ -1032,10 +1034,13 @@ static int open_bucket_add_buckets(struct btree_trans *trans, /** * should_drop_bucket - check if this is open_bucket should go away + * @ob: open_bucket to predicate on + * @c: filesystem handle * @ca: if set, we're killing buckets for a particular device * @ec: if true, we're shutting down erasure coding and killing all ec * open_buckets * otherwise, return true + * Returns: true if we should kill this open_bucket * * We're killing open_buckets because we're shutting down a device, erasure * coding, or the entire filesystem - check if this open_bucket matches: diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index bec62e5b21e5..82109585439b 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -351,7 +351,6 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ { struct bch_fs *c = trans->c; struct btree_iter alloc_iter = { NULL }; - struct bch_dev *ca; struct bkey_s_c alloc_k; struct printbuf buf = PRINTBUF; int ret = 0; @@ -363,8 +362,6 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_ goto out; } - ca = bch_dev_bkey_exists(c, k.k->p.inode); - alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bp_pos_to_bucket(c, k.k->p), 0); ret = bkey_err(alloc_k); @@ -629,7 +626,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter iter; enum btree_id btree_id; - struct bpos_level last_flushed = { UINT_MAX }; + struct bpos_level last_flushed = { UINT_MAX, POS_MIN }; int ret = 0; for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index e80fef1537c9..9fe3dac4a005 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -371,7 +371,7 @@ BCH_DEBUG_PARAMS() #undef BCH_DEBUG_PARAM #ifndef CONFIG_BCACHEFS_DEBUG -#define BCH_DEBUG_PARAM(name, description) static const bool bch2_##name; +#define BCH_DEBUG_PARAM(name, description) static const __maybe_unused bool bch2_##name; BCH_DEBUG_PARAMS_DEBUG() #undef BCH_DEBUG_PARAM #endif diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index c434202f351a..f0d130440baa 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -83,8 +83,8 @@ typedef uuid_t __uuid_t; #endif #define BITMASK(name, type, field, offset, end) \ -static const unsigned name##_OFFSET = offset; \ -static const unsigned name##_BITS = (end - offset); \ +static const __maybe_unused unsigned name##_OFFSET = offset; \ +static const __maybe_unused unsigned name##_BITS = (end - offset); \ \ static inline __u64 name(const type *k) \ { \ @@ -98,9 +98,9 @@ static inline void SET_##name(type *k, __u64 v) \ } #define LE_BITMASK(_bits, name, type, field, offset, end) \ -static const unsigned name##_OFFSET = offset; \ -static const unsigned name##_BITS = (end - offset); \ -static const __u##_bits name##_MAX = (1ULL << (end - offset)) - 1; \ +static const __maybe_unused unsigned name##_OFFSET = offset; \ +static const __maybe_unused unsigned name##_BITS = (end - offset); \ +static const __maybe_unused __u##_bits name##_MAX = (1ULL << (end - offset)) - 1;\ \ static inline __u64 name(const type *k) \ { \ @@ -1668,7 +1668,8 @@ enum bcachefs_metadata_version { bcachefs_metadata_version_max }; -static const unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_major_minor; +static const __maybe_unused +unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_major_minor; #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) @@ -1975,7 +1976,7 @@ enum bch_csum_type { BCH_CSUM_NR }; -static const unsigned bch_crc_bytes[] = { +static const __maybe_unused unsigned bch_crc_bytes[] = { [BCH_CSUM_none] = 0, [BCH_CSUM_crc32c_nonzero] = 4, [BCH_CSUM_crc32c] = 4, diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index a3abd9d2d176..abdb05507d16 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -308,9 +308,14 @@ struct bpos __bkey_unpack_pos(const struct bkey_format *format, /** * bch2_bkey_pack_key -- pack just the key, not the value + * @out: packed result + * @in: key to pack + * @format: format of packed result + * + * Returns: true on success, false on failure */ bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in, - const struct bkey_format *format) + const struct bkey_format *format) { struct pack_state state = pack_state_init(format, out); u64 *w = out->_data; @@ -336,9 +341,12 @@ bool bch2_bkey_pack_key(struct bkey_packed *out, const struct bkey *in, /** * bch2_bkey_unpack -- unpack the key and the value + * @b: btree node of @src key (for packed format) + * @dst: unpacked result + * @src: packed input */ void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst, - const struct bkey_packed *src) + const struct bkey_packed *src) { __bkey_unpack_key(b, &dst->k, src); @@ -349,19 +357,24 @@ void bch2_bkey_unpack(const struct btree *b, struct bkey_i *dst, /** * bch2_bkey_pack -- pack the key and the value + * @dst: packed result + * @src: unpacked input + * @format: format of packed result + * + * Returns: true on success, false on failure */ -bool bch2_bkey_pack(struct bkey_packed *out, const struct bkey_i *in, - const struct bkey_format *format) +bool bch2_bkey_pack(struct bkey_packed *dst, const struct bkey_i *src, + const struct bkey_format *format) { struct bkey_packed tmp; - if (!bch2_bkey_pack_key(&tmp, &in->k, format)) + if (!bch2_bkey_pack_key(&tmp, &src->k, format)) return false; - memmove_u64s((u64 *) out + format->key_u64s, - &in->v, - bkey_val_u64s(&in->k)); - memcpy_u64s_small(out, &tmp, format->key_u64s); + memmove_u64s((u64 *) dst + format->key_u64s, + &src->v, + bkey_val_u64s(&src->k)); + memcpy_u64s_small(dst, &tmp, format->key_u64s); return true; } diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 82f30ffbfb86..be9f012fc7be 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -369,7 +369,6 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id, { const struct bkey_ops *ops; struct bkey uk; - struct bkey_s u; unsigned nr_compat = 5; int i; @@ -434,7 +433,9 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id, } break; - case 4: + case 4: { + struct bkey_s u; + if (!bkey_packed(k)) { u = bkey_i_to_s(packed_to_bkey(k)); } else { @@ -451,6 +452,7 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id, if (ops->compat) ops->compat(btree_id, version, big_endian, write, u); break; + } default: BUG(); } diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index 685792137d2a..cff7486ef446 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -172,10 +172,10 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter, printk(KERN_ERR "iter was:"); btree_node_iter_for_each(_iter, set) { - struct bkey_packed *k = __btree_node_offset_to_key(b, set->k); - struct bset_tree *t = bch2_bkey_to_bset(b, k); + struct bkey_packed *k2 = __btree_node_offset_to_key(b, set->k); + struct bset_tree *t = bch2_bkey_to_bset(b, k2); printk(" [%zi %zi]", t - b->set, - k->_data - bset(b, t)->_data); + k2->_data - bset(b, t)->_data); } panic("\n"); } @@ -1269,9 +1269,13 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, } /** - * bch_btree_node_iter_init - initialize a btree node iterator, starting from a + * bch2_btree_node_iter_init - initialize a btree node iterator, starting from a * given position * + * @iter: iterator to initialize + * @b: btree node to search + * @search: search key + * * Main entry point to the lookup code for individual btree nodes: * * NOTE: diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 245ddd92b2d1..ef9492f7e937 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -885,7 +885,7 @@ retry: } if (unlikely(need_relock)) { - int ret = bch2_trans_relock(trans) ?: + ret = bch2_trans_relock(trans) ?: bch2_btree_path_relock_intent(trans, path); if (ret) { six_unlock_type(&b->c.lock, lock_type); @@ -916,11 +916,20 @@ retry: } /** - * bch_btree_node_get - find a btree node in the cache and lock it, reading it + * bch2_btree_node_get - find a btree node in the cache and lock it, reading it * in from disk if necessary. * + * @trans: btree transaction object + * @path: btree_path being traversed + * @k: pointer to btree node (generally KEY_TYPE_btree_ptr_v2) + * @level: level of btree node being looked up (0 == leaf node) + * @lock_type: SIX_LOCK_read or SIX_LOCK_intent + * @trace_ip: ip of caller of btree iterator code (i.e. caller of bch2_btree_iter_peek()) + * * The btree node will have either a read or a write lock held, depending on * the @write parameter. + * + * Returns: btree node or ERR_PTR() */ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, unsigned level, @@ -979,7 +988,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path * * relock it specifically: */ if (trans) { - int ret = bch2_trans_relock(trans) ?: + ret = bch2_trans_relock(trans) ?: bch2_btree_path_relock_intent(trans, path); if (ret) { BUG_ON(!trans->restarted); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 3c8ffbbaef4f..9496ff16fc91 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -566,8 +566,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id struct bkey_s_c *k) { struct bch_fs *c = trans->c; - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k); - const union bch_extent_entry *entry; + struct bkey_ptrs_c ptrs_c = bch2_bkey_ptrs_c(*k); + const union bch_extent_entry *entry_c; struct extent_ptr_decoded p = { 0 }; bool do_update = false; struct printbuf buf = PRINTBUF; @@ -577,10 +577,10 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id * XXX * use check_bucket_ref here */ - bkey_for_each_ptr_decode(k->k, ptrs, p, entry) { + bkey_for_each_ptr_decode(k->k, ptrs_c, p, entry_c) { struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); - enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr); + enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr); if (!g->gen_valid && (c->opts.reconstruct_alloc || @@ -1217,14 +1217,6 @@ static int bch2_gc_done(struct bch_fs *c, fsck_err(c, _msg ": got %llu, should be %llu" \ , ##__VA_ARGS__, dst->_f, src->_f))) \ dst->_f = src->_f -#define copy_stripe_field(_f, _msg, ...) \ - if (dst->_f != src->_f && \ - (!verify || \ - fsck_err(c, "stripe %zu has wrong "_msg \ - ": got %u, should be %u", \ - iter.pos, ##__VA_ARGS__, \ - dst->_f, src->_f))) \ - dst->_f = src->_f #define copy_dev_field(_f, _msg, ...) \ copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__) #define copy_fs_field(_f, _msg, ...) \ @@ -1776,6 +1768,12 @@ static void bch2_gc_stripes_reset(struct bch_fs *c, bool metadata_only) /** * bch2_gc - walk _all_ references to buckets, and recompute them: * + * @c: filesystem object + * @initial: are we in recovery? + * @metadata_only: are we just checking metadata references, or everything? + * + * Returns: 0 on success, or standard errcode on failure + * * Order matters here: * - Concurrent GC relies on the fact that we have a total ordering for * everything that GC walks - see gc_will_visit_node(), @@ -1985,11 +1983,9 @@ int bch2_gc_gens(struct bch_fs *c) for (i = 0; i < BTREE_ID_NR; i++) if (btree_type_has_ptrs(i)) { - struct btree_iter iter; - struct bkey_s_c k; - c->gc_gens_btree = i; c->gc_gens_pos = POS_MIN; + ret = for_each_btree_key_commit(&trans, iter, i, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 00f53cb5d44b..9fa9ed641300 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -336,7 +336,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, start_bset->journal_seq = cpu_to_le64(seq); if (sorting_entire_node) { - unsigned u64s = le16_to_cpu(out->keys.u64s); + u64s = le16_to_cpu(out->keys.u64s); BUG_ON(bytes != btree_bytes(c)); @@ -410,8 +410,6 @@ void bch2_btree_sort_into(struct bch_fs *c, bch2_verify_btree_nr_keys(dst); } -#define SORT_CRIT (4096 / sizeof(u64)) - /* * We're about to add another bset to the btree node, so if there's currently * too many bsets - sort some of them together: @@ -542,6 +540,7 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, prt_str(out, ": "); } +__printf(8, 9) static int __btree_err(int ret, struct bch_fs *c, struct bch_dev *ca, @@ -622,9 +621,6 @@ __cold void bch2_btree_node_drop_keys_outside_node(struct btree *b) { struct bset_tree *t; - struct bkey_s_c k; - struct bkey unpacked; - struct btree_node_iter iter; for_each_bset(b, t) { struct bset *i = bset(b, t); @@ -660,6 +656,9 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b) bch2_bset_set_no_aux_tree(b, b->set); bch2_btree_build_aux_trees(b); + struct bkey_s_c k; + struct bkey unpacked; + struct btree_node_iter iter; for_each_btree_node_key_unpack(b, k, &iter, &unpacked) { BUG_ON(bpos_lt(k.k->p, b->data->min_key)); BUG_ON(bpos_gt(k.k->p, b->data->max_key)); @@ -908,7 +907,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); unsigned u64s; - unsigned blacklisted_written, nonblacklisted_written = 0; unsigned ptr_written = btree_ptr_sectors_written(&b->key); struct printbuf buf = PRINTBUF; int ret = 0, retry_read = 0, write = READ; @@ -1042,8 +1040,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, sort_iter_add(iter, vstruct_idx(i, 0), vstruct_last(i)); - - nonblacklisted_written = b->written; } if (ptr_written) { @@ -1061,18 +1057,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, true), -BCH_ERR_btree_node_read_err_want_retry, c, ca, b, NULL, "found bset signature after last bset"); - - /* - * Blacklisted bsets are those that were written after the most recent - * (flush) journal write. Since there wasn't a flush, they may not have - * made it to all devices - which means we shouldn't write new bsets - * after them, as that could leave a gap and then reads from that device - * wouldn't find all the bsets in that btree node - which means it's - * important that we start writing new bsets after the most recent _non_ - * blacklisted bset: - */ - blacklisted_written = b->written; - b->written = nonblacklisted_written; } sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool); @@ -1140,9 +1124,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_node_reset_sib_u64s(b); bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + struct bch_dev *ca2 = bch_dev_bkey_exists(c, ptr->dev); - if (ca->mi.state != BCH_MEMBER_STATE_rw) + if (ca2->mi.state != BCH_MEMBER_STATE_rw) set_btree_node_need_rewrite(b); } @@ -1224,19 +1208,17 @@ start: bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], rb->start_time); bio_put(&rb->bio); - printbuf_exit(&buf); if (saw_error && !btree_node_read_error(b)) { - struct printbuf buf = PRINTBUF; - + printbuf_reset(&buf); bch2_bpos_to_text(&buf, b->key.k.p); bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error", __func__, bch2_btree_ids[b->c.btree_id], b->c.level, buf.buf); - printbuf_exit(&buf); bch2_btree_node_rewrite_async(c, b); } + printbuf_exit(&buf); clear_btree_node_read_in_flight(b); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 8d089bbdb1e5..6c064e82c0c8 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -488,7 +488,6 @@ fixup_done: if (!bch2_btree_node_iter_end(node_iter) && iter_current_key_modified && b->c.level) { - struct bset_tree *t; struct bkey_packed *k, *k2, *p; k = bch2_btree_node_iter_peek_all(node_iter, b); @@ -2048,8 +2047,12 @@ out: } /** - * bch2_btree_iter_peek: returns first key greater than or equal to iterator's - * current position + * bch2_btree_iter_peek_upto() - returns first key greater than or equal to + * iterator's current position + * @iter: iterator to peek from + * @end: search limit: returns keys less than or equal to @end + * + * Returns: key if found, or an error extractable with bkey_err(). */ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end) { @@ -2186,10 +2189,13 @@ end: } /** - * bch2_btree_iter_peek_all_levels: returns the first key greater than or equal - * to iterator's current position, returning keys from every level of the btree. - * For keys at different levels of the btree that compare equal, the key from - * the lower level (leaf) is returned first. + * bch2_btree_iter_peek_all_levels() - returns the first key greater than or + * equal to iterator's current position, returning keys from every level of the + * btree. For keys at different levels of the btree that compare equal, the key + * from the lower level (leaf) is returned first. + * @iter: iterator to peek from + * + * Returns: key if found, or an error extractable with bkey_err(). */ struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter) { @@ -2280,8 +2286,11 @@ out_no_locked: } /** - * bch2_btree_iter_next: returns first key greater than iterator's current + * bch2_btree_iter_next() - returns first key greater than iterator's current * position + * @iter: iterator to peek from + * + * Returns: key if found, or an error extractable with bkey_err(). */ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) { @@ -2292,8 +2301,11 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) } /** - * bch2_btree_iter_peek_prev: returns first key less than or equal to + * bch2_btree_iter_peek_prev() - returns first key less than or equal to * iterator's current position + * @iter: iterator to peek from + * + * Returns: key if found, or an error extractable with bkey_err(). */ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) { @@ -2416,8 +2428,11 @@ out_no_locked: } /** - * bch2_btree_iter_prev: returns first key less than iterator's current + * bch2_btree_iter_prev() - returns first key less than iterator's current * position + * @iter: iterator to peek from + * + * Returns: key if found, or an error extractable with bkey_err(). */ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) { @@ -2832,6 +2847,8 @@ static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans) * bch2_trans_begin() - reset a transaction after a interrupted attempt * @trans: transaction to reset * + * Returns: current restart counter, to be used with trans_was_restarted() + * * While iterating over nodes or updating nodes a attempt to lock a btree node * may return BCH_ERR_transaction_restart when the trylock fails. When this * occurs bch2_trans_begin() should be called and the transaction retried. diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index b885e4e210d4..360a26b58501 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -674,17 +674,17 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, #define lockrestart_do(_trans, _do) \ ({ \ u32 _restart_count; \ - int _ret; \ + int _ret2; \ \ do { \ _restart_count = bch2_trans_begin(_trans); \ - _ret = (_do); \ - } while (bch2_err_matches(_ret, BCH_ERR_transaction_restart)); \ + _ret2 = (_do); \ + } while (bch2_err_matches(_ret2, BCH_ERR_transaction_restart)); \ \ - if (!_ret) \ + if (!_ret2) \ bch2_trans_verify_not_restarted(_trans, _restart_count);\ \ - _ret; \ + _ret2; \ }) /* @@ -699,23 +699,23 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, #define nested_lockrestart_do(_trans, _do) \ ({ \ u32 _restart_count, _orig_restart_count; \ - int _ret; \ + int _ret2; \ \ _restart_count = _orig_restart_count = (_trans)->restart_count; \ \ - while (bch2_err_matches(_ret = (_do), BCH_ERR_transaction_restart))\ + while (bch2_err_matches(_ret2 = (_do), BCH_ERR_transaction_restart))\ _restart_count = bch2_trans_begin(_trans); \ \ - if (!_ret) \ + if (!_ret2) \ bch2_trans_verify_not_restarted(_trans, _restart_count);\ \ - _ret ?: trans_was_restarted(_trans, _restart_count); \ + _ret2 ?: trans_was_restarted(_trans, _restart_count); \ }) #define for_each_btree_key2(_trans, _iter, _btree_id, \ _start, _flags, _k, _do) \ ({ \ - int _ret = 0; \ + int _ret3 = 0; \ \ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ (_start), (_flags)); \ @@ -723,15 +723,15 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, while (1) { \ u32 _restart_count = bch2_trans_begin(_trans); \ \ - _ret = 0; \ + _ret3 = 0; \ (_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \ if (!(_k).k) \ break; \ \ - _ret = bkey_err(_k) ?: (_do); \ - if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\ + _ret3 = bkey_err(_k) ?: (_do); \ + if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\ continue; \ - if (_ret) \ + if (_ret3) \ break; \ bch2_trans_verify_not_restarted(_trans, _restart_count);\ if (!bch2_btree_iter_advance(&(_iter))) \ @@ -739,13 +739,13 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, } \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ - _ret; \ + _ret3; \ }) #define for_each_btree_key2_upto(_trans, _iter, _btree_id, \ _start, _end, _flags, _k, _do) \ ({ \ - int _ret = 0; \ + int _ret3 = 0; \ \ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ (_start), (_flags)); \ @@ -753,15 +753,15 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, while (1) { \ u32 _restart_count = bch2_trans_begin(_trans); \ \ - _ret = 0; \ + _ret3 = 0; \ (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, (_flags));\ if (!(_k).k) \ break; \ \ - _ret = bkey_err(_k) ?: (_do); \ - if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\ + _ret3 = bkey_err(_k) ?: (_do); \ + if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\ continue; \ - if (_ret) \ + if (_ret3) \ break; \ bch2_trans_verify_not_restarted(_trans, _restart_count);\ if (!bch2_btree_iter_advance(&(_iter))) \ @@ -769,13 +769,13 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, } \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ - _ret; \ + _ret3; \ }) #define for_each_btree_key_reverse(_trans, _iter, _btree_id, \ _start, _flags, _k, _do) \ ({ \ - int _ret = 0; \ + int _ret3 = 0; \ \ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ (_start), (_flags)); \ @@ -784,14 +784,14 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, u32 _restart_count = bch2_trans_begin(_trans); \ (_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\ if (!(_k).k) { \ - _ret = 0; \ + _ret3 = 0; \ break; \ } \ \ - _ret = bkey_err(_k) ?: (_do); \ - if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\ + _ret3 = bkey_err(_k) ?: (_do); \ + if (bch2_err_matches(_ret3, BCH_ERR_transaction_restart))\ continue; \ - if (_ret) \ + if (_ret3) \ break; \ bch2_trans_verify_not_restarted(_trans, _restart_count);\ if (!bch2_btree_iter_rewind(&(_iter))) \ @@ -799,7 +799,7 @@ __bch2_btree_iter_peek_upto_and_restart(struct btree_trans *trans, } \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ - _ret; \ + _ret3; \ }) #define for_each_btree_key_commit(_trans, _iter, _btree_id, \ diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index a74ee6d8a7cf..784f889340cd 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -242,8 +242,6 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, } if (ck) { - int ret; - ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent, _THIS_IP_); if (unlikely(ret)) { bkey_cached_move_to_freelist(bc, ck); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index eafb0388ef82..e3a0b101cbf8 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -214,7 +214,11 @@ inline void bch2_btree_add_journal_pin(struct bch_fs *c, } /** - * btree_insert_key - insert a key one key into a leaf node + * bch2_btree_insert_key_leaf() - insert a key one key into a leaf node + * @trans: btree transaction object + * @path: path pointing to @insert's pos + * @insert: key to insert + * @journal_seq: sequence number of journal reservation */ inline void bch2_btree_insert_key_leaf(struct btree_trans *trans, struct btree_path *path, @@ -555,7 +559,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, struct btree_write_buffered_key *wb; struct btree_trans_commit_hook *h; unsigned u64s = 0; - bool marking = false; int ret; if (race_fault()) { @@ -584,9 +587,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, *stopped_at = i; return ret; } - - if (btree_node_type_needs_gc(i->bkey_type)) - marking = true; } if (trans->nr_wb_updates && diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 70398aaa095e..96a03f414dd0 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -184,34 +184,34 @@ struct btree_node_iter { /* * Iterate over all possible positions, synthesizing deleted keys for holes: */ -static const u16 BTREE_ITER_SLOTS = 1 << 0; -static const u16 BTREE_ITER_ALL_LEVELS = 1 << 1; +static const __maybe_unused u16 BTREE_ITER_SLOTS = 1 << 0; +static const __maybe_unused u16 BTREE_ITER_ALL_LEVELS = 1 << 1; /* * Indicates that intent locks should be taken on leaf nodes, because we expect * to be doing updates: */ -static const u16 BTREE_ITER_INTENT = 1 << 2; +static const __maybe_unused u16 BTREE_ITER_INTENT = 1 << 2; /* * Causes the btree iterator code to prefetch additional btree nodes from disk: */ -static const u16 BTREE_ITER_PREFETCH = 1 << 3; +static const __maybe_unused u16 BTREE_ITER_PREFETCH = 1 << 3; /* * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for * @pos or the first key strictly greater than @pos */ -static const u16 BTREE_ITER_IS_EXTENTS = 1 << 4; -static const u16 BTREE_ITER_NOT_EXTENTS = 1 << 5; -static const u16 BTREE_ITER_CACHED = 1 << 6; -static const u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 7; -static const u16 BTREE_ITER_WITH_UPDATES = 1 << 8; -static const u16 BTREE_ITER_WITH_JOURNAL = 1 << 9; -static const u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 10; -static const u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 11; -static const u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 12; -static const u16 BTREE_ITER_NOPRESERVE = 1 << 13; -static const u16 BTREE_ITER_CACHED_NOFILL = 1 << 14; -static const u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 15; -#define __BTREE_ITER_FLAGS_END 16 +static const __maybe_unused u16 BTREE_ITER_IS_EXTENTS = 1 << 4; +static const __maybe_unused u16 BTREE_ITER_NOT_EXTENTS = 1 << 5; +static const __maybe_unused u16 BTREE_ITER_CACHED = 1 << 6; +static const __maybe_unused u16 BTREE_ITER_WITH_KEY_CACHE = 1 << 7; +static const __maybe_unused u16 BTREE_ITER_WITH_UPDATES = 1 << 8; +static const __maybe_unused u16 BTREE_ITER_WITH_JOURNAL = 1 << 9; +static const __maybe_unused u16 __BTREE_ITER_ALL_SNAPSHOTS = 1 << 10; +static const __maybe_unused u16 BTREE_ITER_ALL_SNAPSHOTS = 1 << 11; +static const __maybe_unused u16 BTREE_ITER_FILTER_SNAPSHOTS = 1 << 12; +static const __maybe_unused u16 BTREE_ITER_NOPRESERVE = 1 << 13; +static const __maybe_unused u16 BTREE_ITER_CACHED_NOFILL = 1 << 14; +static const __maybe_unused u16 BTREE_ITER_KEY_CACHE_FILL = 1 << 15; +#define __BTREE_ITER_FLAGS_END 16 enum btree_path_uptodate { BTREE_ITER_UPTODATE = 0, diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 823f0da2f502..3d126f043db0 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -681,15 +681,17 @@ int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id, * bch2_btree_insert - insert keys into the extent btree * @c: pointer to struct bch_fs * @id: btree to insert into - * @insert_keys: list of keys to insert - * @hook: insert callback + * @k: key to insert + * @disk_res: must be non-NULL whenever inserting or potentially + * splitting data extents + * @flags: transaction commit flags + * + * Returns: 0 on success, error code on failure */ -int bch2_btree_insert(struct bch_fs *c, enum btree_id id, - struct bkey_i *k, - struct disk_reservation *disk_res, - u64 *journal_seq, int flags) +int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, + struct disk_reservation *disk_res, int flags) { - return bch2_trans_do(c, disk_res, journal_seq, flags, + return bch2_trans_do(c, disk_res, NULL, flags, bch2_btree_insert_trans(&trans, id, k, 0)); } @@ -847,6 +849,7 @@ int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, return bch2_trans_update_buffered(trans, btree, k); } +__printf(2, 0) static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args) { struct printbuf buf = PRINTBUF; @@ -883,6 +886,7 @@ err: return ret; } +__printf(3, 0) static int __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, va_list args) @@ -900,6 +904,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, return ret; } +__printf(2, 3) int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...) { va_list args; @@ -915,6 +920,7 @@ int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...) * Use for logging messages during recovery to enable reserved space and avoid * blocking. */ +__printf(2, 3) int bch2_journal_log_msg(struct bch_fs *c, const char *fmt, ...) { va_list args; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 0596c5e73a3e..0be980d16007 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -66,7 +66,7 @@ int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id, int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *, enum btree_update_flags); int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, - struct disk_reservation *, u64 *, int flags); + struct disk_reservation *, int flags); int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id, struct bpos, struct bpos, unsigned, u64 *); @@ -115,8 +115,8 @@ void bch2_trans_commit_hook(struct btree_trans *, struct btree_trans_commit_hook *); int __bch2_trans_commit(struct btree_trans *, unsigned); -int bch2_fs_log_msg(struct bch_fs *, const char *, ...); -int bch2_journal_log_msg(struct bch_fs *, const char *, ...); +__printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...); +__printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...); /** * bch2_trans_commit - insert keys at given iterator positions diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index c8d0942650f1..bac495b382bb 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -145,8 +145,13 @@ static size_t btree_node_u64s_with_format(struct btree *b, /** * bch2_btree_node_format_fits - check if we could rewrite node with a new format * - * This assumes all keys can pack with the new format -- it just checks if - * the re-packed keys would fit inside the node itself. + * @c: filesystem handle + * @b: btree node to rewrite + * @new_f: bkey format to translate keys to + * + * Returns: true if all re-packed keys will be able to fit in a new node. + * + * Assumes all keys will successfully pack with the new format. */ bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b, struct bkey_format *new_f) @@ -244,7 +249,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct write_point *wp; struct btree *b; BKEY_PADDED_ONSTACK(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; - struct open_buckets ob = { .nr = 0 }; + struct open_buckets obs = { .nr = 0 }; struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim @@ -257,7 +262,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct btree_alloc *a = &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; - ob = a->ob; + obs = a->ob; bkey_copy(&tmp.k, &a->k); mutex_unlock(&c->btree_reserve_cache_lock); goto mem_alloc; @@ -292,7 +297,7 @@ retry: bkey_btree_ptr_v2_init(&tmp.k); bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, btree_sectors(c), false); - bch2_open_bucket_get(c, wp, &ob); + bch2_open_bucket_get(c, wp, &obs); bch2_alloc_sectors_done(c, wp); mem_alloc: b = bch2_btree_node_mem_alloc(trans, interior_node); @@ -304,7 +309,7 @@ mem_alloc: BUG_ON(b->ob.nr); bkey_copy(&b->key, &tmp.k); - b->ob = ob; + b->ob = obs; return b; } @@ -697,15 +702,15 @@ err: * btree_interior_update_lock: */ if (as->b == b) { - struct bset *i = btree_bset_last(b); - BUG_ON(!b->c.level); BUG_ON(!btree_node_dirty(b)); if (!ret) { - i->journal_seq = cpu_to_le64( + struct bset *last = btree_bset_last(b); + + last->journal_seq = cpu_to_le64( max(journal_seq, - le64_to_cpu(i->journal_seq))); + le64_to_cpu(last->journal_seq))); bch2_btree_add_journal_pin(c, b, journal_seq); } else { @@ -1216,18 +1221,6 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b) bch2_recalc_btree_reserve(c); } -/** - * bch_btree_set_root - update the root in memory and on disk - * - * To ensure forward progress, the current task must not be holding any - * btree node write locks. However, you must hold an intent lock on the - * old root. - * - * Note: This allocates a journal entry but doesn't add any keys to - * it. All the btree roots are part of every journal write, so there - * is nothing new to be done. This just guarantees that there is a - * journal write. - */ static void bch2_btree_set_root(struct btree_update *as, struct btree_trans *trans, struct btree_path *path, @@ -1341,12 +1334,12 @@ __bch2_btree_insert_keys_interior(struct btree_update *as, ; while (!bch2_keylist_empty(keys)) { - struct bkey_i *k = bch2_keylist_front(keys); + insert = bch2_keylist_front(keys); - if (bpos_gt(k->k.p, b->key.k.p)) + if (bpos_gt(insert->k.p, b->key.k.p)) break; - bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, k); + bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert); bch2_keylist_pop_front(keys); } } @@ -1661,12 +1654,16 @@ bch2_btree_insert_keys_interior(struct btree_update *as, } /** - * bch_btree_insert_node - insert bkeys into a given btree node + * bch2_btree_insert_node - insert bkeys into a given btree node * - * @iter: btree iterator + * @as: btree_update object + * @trans: btree_trans object + * @path: path that points to current node + * @b: node to insert keys into * @keys: list of keys to insert - * @hook: insert callback - * @persistent: if not null, @persistent will wait on journal write + * @flags: transaction commit flags + * + * Returns: 0 on success, typically transaction restart error on failure * * Inserts as many keys as it can into a given btree node, splitting it if full. * If a split occurred, this function will return early. This can only happen @@ -1934,9 +1931,6 @@ err_free_update: goto out; } -/** - * bch_btree_node_rewrite - Rewrite/move a btree node - */ int bch2_btree_node_rewrite(struct btree_trans *trans, struct btree_iter *iter, struct btree *b, diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c index ff0c3cd39ee2..3f385d499026 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -366,11 +366,11 @@ struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a, BUG_ON(!bch2_checksum_mergeable(type)); while (b_len) { - unsigned b = min_t(unsigned, b_len, PAGE_SIZE); + unsigned page_len = min_t(unsigned, b_len, PAGE_SIZE); bch2_checksum_update(&state, - page_address(ZERO_PAGE(0)), b); - b_len -= b; + page_address(ZERO_PAGE(0)), page_len); + b_len -= page_len; } a.lo = (__le64 __force) bch2_checksum_final(&state); a.lo ^= b.lo; @@ -395,9 +395,9 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio, unsigned csum_type; struct bch_csum csum; } splits[3] = { - { crc_a, len_a, new_csum_type }, - { crc_b, len_b, new_csum_type }, - { NULL, bio_sectors(bio) - len_a - len_b, new_csum_type }, + { crc_a, len_a, new_csum_type, { 0 }}, + { crc_b, len_b, new_csum_type, { 0 } }, + { NULL, bio_sectors(bio) - len_a - len_b, new_csum_type, { 0 } }, }, *i; bool mergeable = crc_old.csum_type == new_csum_type && bch2_checksum_mergeable(new_csum_type); diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h index c7b1a8fca685..779f175029a8 100644 --- a/fs/bcachefs/checksum.h +++ b/fs/bcachefs/checksum.h @@ -40,10 +40,9 @@ struct bch_csum bch2_checksum(struct bch_fs *, unsigned, struct nonce, */ #define csum_vstruct(_c, _type, _nonce, _i) \ ({ \ - const void *start = ((const void *) (_i)) + sizeof((_i)->csum); \ - const void *end = vstruct_end(_i); \ + const void *_start = ((const void *) (_i)) + sizeof((_i)->csum);\ \ - bch2_checksum(_c, _type, _nonce, start, end - start); \ + bch2_checksum(_c, _type, _nonce, _start, vstruct_end(_i) - _start);\ }) int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t); diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index f1651807c2b7..1480b64547b0 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -570,7 +570,6 @@ void bch2_fs_compress_exit(struct bch_fs *c) static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) { size_t decompress_workspace_size = 0; - bool decompress_workspace_needed; ZSTD_parameters params = zstd_get_params(zstd_max_clevel(), c->opts.encoded_extent_max); struct { @@ -580,7 +579,8 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) size_t decompress_workspace; } compression_types[] = { { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, - max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS) }, + max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS), + 0 }, { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip, zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), zlib_inflate_workspacesize(), }, @@ -619,9 +619,6 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) if (!(features & (1 << i->feature))) continue; - if (i->decompress_workspace) - decompress_workspace_needed = true; - if (mempool_initialized(&c->compress_workspace[i->type])) continue; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 29576c4c109d..84ca128a59a3 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -49,10 +49,6 @@ static void trace_move_extent_fail2(struct data_update *m, if (insert) { i = 0; bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) { - struct bkey_s new_s; - new_s.k = (void *) new.k; - new_s.v = (void *) new.v; - if (((1U << i) & m->data_opts.rewrite_ptrs) && (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) && !ptr->cached) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 5f3e65f9069e..7593ba04dfb2 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -153,10 +153,8 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) BUG_ON(b->nsets != 1); for (k = inmemory->start; k != vstruct_last(inmemory); k = bkey_p_next(k)) - if (k->type == KEY_TYPE_btree_ptr_v2) { - struct bch_btree_ptr_v2 *v = (void *) bkeyp_val(&b->format, k); - v->mem_ptr = 0; - } + if (k->type == KEY_TYPE_btree_ptr_v2) + ((struct bch_btree_ptr_v2 *) bkeyp_val(&b->format, k))->mem_ptr = 0; v = c->verify_data; bkey_copy(&v->key, &b->key); diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index 9fa8d7d49f3e..b292dbef7992 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -32,21 +32,21 @@ static int bch2_sb_disk_groups_validate(struct bch_sb *sb, for (i = 0; i < sb->nr_devices; i++) { struct bch_member *m = mi->members + i; - unsigned g; + unsigned group_id; if (!BCH_MEMBER_GROUP(m)) continue; - g = BCH_MEMBER_GROUP(m) - 1; + group_id = BCH_MEMBER_GROUP(m) - 1; - if (g >= nr_groups) { + if (group_id >= nr_groups) { prt_printf(err, "disk %u has invalid label %u (have %u)", - i, g, nr_groups); + i, group_id, nr_groups); return -BCH_ERR_invalid_sb_disk_groups; } - if (BCH_GROUP_DELETED(&groups->entries[g])) { - prt_printf(err, "disk %u has deleted label %u", i, g); + if (BCH_GROUP_DELETED(&groups->entries[group_id])) { + prt_printf(err, "disk %u has deleted label %u", i, group_id); return -BCH_ERR_invalid_sb_disk_groups; } } diff --git a/fs/bcachefs/errcode.c b/fs/bcachefs/errcode.c index 8d58f2cca260..d260ff9bbfeb 100644 --- a/fs/bcachefs/errcode.c +++ b/fs/bcachefs/errcode.c @@ -12,8 +12,6 @@ static const char * const bch2_errcode_strs[] = { NULL }; -#define BCH_ERR_0 0 - static unsigned bch2_errcode_parents[] = { #define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = class, BCH_ERRCODES() diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 2034d635c718..7650d8b3122a 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -695,12 +695,12 @@ int bch2_write_begin(struct file *file, struct address_space *mapping, if (IS_ERR_OR_NULL(folio)) goto err_unlock; - if (folio_test_uptodate(folio)) - goto out; - offset = pos - folio_pos(folio); len = min_t(size_t, len, folio_end_pos(folio) - pos); + if (folio_test_uptodate(folio)) + goto out; + /* If we're writing entire folio, don't need to read it in first: */ if (!offset && len == folio_size(folio)) goto out; @@ -801,10 +801,10 @@ int bch2_write_end(struct file *file, struct address_space *mapping, return copied; } -static noinline void folios_trunc(folios *folios, struct folio **fi) +static noinline void folios_trunc(folios *fs, struct folio **fi) { - while (folios->data + folios->nr > fi) { - struct folio *f = darray_pop(folios); + while (fs->data + fs->nr > fi) { + struct folio *f = darray_pop(fs); folio_unlock(f); folio_put(f); @@ -818,35 +818,35 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch2_folio_reservation res; - folios folios; + folios fs; struct folio **fi, *f; - unsigned copied = 0, f_offset; - u64 end = pos + len, f_pos; + unsigned copied = 0, f_offset, f_copied; + u64 end = pos + len, f_pos, f_len; loff_t last_folio_pos = inode->v.i_size; int ret = 0; BUG_ON(!len); bch2_folio_reservation_init(c, inode, &res); - darray_init(&folios); + darray_init(&fs); ret = bch2_filemap_get_contig_folios_d(mapping, pos, end, FGP_LOCK|FGP_WRITE|FGP_STABLE|FGP_CREAT, mapping_gfp_mask(mapping), - &folios); + &fs); if (ret) goto out; - BUG_ON(!folios.nr); + BUG_ON(!fs.nr); - f = darray_first(folios); + f = darray_first(fs); if (pos != folio_pos(f) && !folio_test_uptodate(f)) { ret = bch2_read_single_folio(f, mapping); if (ret) goto out; } - f = darray_last(folios); + f = darray_last(fs); end = min(end, folio_end_pos(f)); last_folio_pos = folio_pos(f); if (end != folio_end_pos(f) && !folio_test_uptodate(f)) { @@ -859,15 +859,15 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, } } - ret = bch2_folio_set(c, inode_inum(inode), folios.data, folios.nr); + ret = bch2_folio_set(c, inode_inum(inode), fs.data, fs.nr); if (ret) goto out; f_pos = pos; - f_offset = pos - folio_pos(darray_first(folios)); - darray_for_each(folios, fi) { - struct folio *f = *fi; - u64 f_len = min(end, folio_end_pos(f)) - f_pos; + f_offset = pos - folio_pos(darray_first(fs)); + darray_for_each(fs, fi) { + f = *fi; + f_len = min(end, folio_end_pos(f)) - f_pos; /* * XXX: per POSIX and fstests generic/275, on -ENOSPC we're @@ -879,11 +879,11 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, */ ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len); if (unlikely(ret)) { - folios_trunc(&folios, fi); - if (!folios.nr) + folios_trunc(&fs, fi); + if (!fs.nr) goto out; - end = min(end, folio_end_pos(darray_last(folios))); + end = min(end, folio_end_pos(darray_last(fs))); break; } @@ -892,18 +892,17 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, } if (mapping_writably_mapped(mapping)) - darray_for_each(folios, fi) + darray_for_each(fs, fi) flush_dcache_folio(*fi); f_pos = pos; - f_offset = pos - folio_pos(darray_first(folios)); - darray_for_each(folios, fi) { - struct folio *f = *fi; - u64 f_len = min(end, folio_end_pos(f)) - f_pos; - unsigned f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter); - + f_offset = pos - folio_pos(darray_first(fs)); + darray_for_each(fs, fi) { + f = *fi; + f_len = min(end, folio_end_pos(f)) - f_pos; + f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter); if (!f_copied) { - folios_trunc(&folios, fi); + folios_trunc(&fs, fi); break; } @@ -912,7 +911,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, pos + copied + f_copied < inode->v.i_size) { iov_iter_revert(iter, f_copied); folio_zero_range(f, 0, folio_size(f)); - folios_trunc(&folios, fi); + folios_trunc(&fs, fi); break; } @@ -920,7 +919,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, copied += f_copied; if (f_copied != f_len) { - folios_trunc(&folios, fi + 1); + folios_trunc(&fs, fi + 1); break; } @@ -939,10 +938,10 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, spin_unlock(&inode->v.i_lock); f_pos = pos; - f_offset = pos - folio_pos(darray_first(folios)); - darray_for_each(folios, fi) { - struct folio *f = *fi; - u64 f_len = min(end, folio_end_pos(f)) - f_pos; + f_offset = pos - folio_pos(darray_first(fs)); + darray_for_each(fs, fi) { + f = *fi; + f_len = min(end, folio_end_pos(f)) - f_pos; if (!folio_test_uptodate(f)) folio_mark_uptodate(f); @@ -955,7 +954,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, inode->ei_last_dirtied = (unsigned long) current; out: - darray_for_each(folios, fi) { + darray_for_each(fs, fi) { folio_unlock(*fi); folio_put(*fi); } @@ -968,7 +967,7 @@ out: if (last_folio_pos >= inode->v.i_size) truncate_pagecache(&inode->v, inode->v.i_size); - darray_exit(&folios); + darray_exit(&fs); bch2_folio_reservation_put(c, inode, &res); return copied ?: ret; diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c index 1e60eead2981..4d1612ede484 100644 --- a/fs/bcachefs/fs-io-pagecache.c +++ b/fs/bcachefs/fs-io-pagecache.c @@ -14,7 +14,7 @@ int bch2_filemap_get_contig_folios_d(struct address_space *mapping, loff_t start, u64 end, int fgp_flags, gfp_t gfp, - folios *folios) + folios *fs) { struct folio *f; u64 pos = start; @@ -24,7 +24,7 @@ int bch2_filemap_get_contig_folios_d(struct address_space *mapping, if ((u64) pos >= (u64) start + (1ULL << 20)) fgp_flags &= ~FGP_CREAT; - ret = darray_make_room_gfp(folios, 1, gfp & GFP_KERNEL); + ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL); if (ret) break; @@ -32,16 +32,16 @@ int bch2_filemap_get_contig_folios_d(struct address_space *mapping, if (IS_ERR_OR_NULL(f)) break; - BUG_ON(folios->nr && folio_pos(f) != pos); + BUG_ON(fs->nr && folio_pos(f) != pos); pos = folio_end_pos(f); - darray_push(folios, f); + darray_push(fs, f); } - if (!folios->nr && !ret && (fgp_flags & FGP_CREAT)) + if (!fs->nr && !ret && (fgp_flags & FGP_CREAT)) ret = -ENOMEM; - return folios->nr ? 0 : ret; + return fs->nr ? 0 : ret; } /* pagecache_block must be held */ @@ -73,12 +73,15 @@ int bch2_write_invalidate_inode_pages_range(struct address_space *mapping, return ret; } +#if 0 +/* Useful for debug tracing: */ static const char * const bch2_folio_sector_states[] = { #define x(n) #n, BCH_FOLIO_SECTOR_STATE() #undef x NULL }; +#endif static inline enum bch_folio_sector_state folio_sector_dirty(enum bch_folio_sector_state state) @@ -177,20 +180,20 @@ static void __bch2_folio_set(struct folio *folio, * extents btree: */ int bch2_folio_set(struct bch_fs *c, subvol_inum inum, - struct folio **folios, unsigned nr_folios) + struct folio **fs, unsigned nr_folios) { struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; struct bch_folio *s; - u64 offset = folio_sector(folios[0]); + u64 offset = folio_sector(fs[0]); unsigned folio_idx; u32 snapshot; bool need_set = false; int ret; for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) { - s = bch2_folio_create(folios[folio_idx], GFP_KERNEL); + s = bch2_folio_create(fs[folio_idx], GFP_KERNEL); if (!s) return -ENOMEM; @@ -216,7 +219,7 @@ retry: unsigned state = bkey_to_sector_state(k); while (folio_idx < nr_folios) { - struct folio *folio = folios[folio_idx]; + struct folio *folio = fs[folio_idx]; u64 folio_start = folio_sector(folio); u64 folio_end = folio_end_sector(folio); unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) - diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h index f201980ef2c3..54a9c21a3b83 100644 --- a/fs/bcachefs/fs-ioctl.h +++ b/fs/bcachefs/fs-ioctl.h @@ -5,7 +5,7 @@ /* Inode flags: */ /* bcachefs inode flags -> vfs inode flags: */ -static const unsigned bch_flags_to_vfs[] = { +static const __maybe_unused unsigned bch_flags_to_vfs[] = { [__BCH_INODE_SYNC] = S_SYNC, [__BCH_INODE_IMMUTABLE] = S_IMMUTABLE, [__BCH_INODE_APPEND] = S_APPEND, @@ -13,7 +13,7 @@ static const unsigned bch_flags_to_vfs[] = { }; /* bcachefs inode flags -> FS_IOC_GETFLAGS: */ -static const unsigned bch_flags_to_uflags[] = { +static const __maybe_unused unsigned bch_flags_to_uflags[] = { [__BCH_INODE_SYNC] = FS_SYNC_FL, [__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL, [__BCH_INODE_APPEND] = FS_APPEND_FL, @@ -22,7 +22,7 @@ static const unsigned bch_flags_to_uflags[] = { }; /* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ -static const unsigned bch_flags_to_xflags[] = { +static const __maybe_unused unsigned bch_flags_to_xflags[] = { [__BCH_INODE_SYNC] = FS_XFLAG_SYNC, [__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE, [__BCH_INODE_APPEND] = FS_XFLAG_APPEND, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 0def3a57bd6d..f814e9e0a741 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1661,7 +1661,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) up_write(&c->state_lock); } - if (opts.errors >= 0) + if (opt_defined(opts, errors)) c->opts.errors = opts.errors; err: return bch2_err_class(ret); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index e8cb4448bf2d..b9c9ece63175 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -471,7 +471,12 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor, * and @ancestor hasn't been overwritten in @seen * - * That is, returns whether key in @ancestor snapshot is visible in @id snapshot + * @c: filesystem handle + * @seen: list of snapshot ids already seen at current position + * @id: descendent snapshot id + * @ancestor: ancestor snapshot id + * + * Returns: whether key in @ancestor snapshot is visible in @id snapshot */ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, u32 id, u32 ancestor) @@ -516,14 +521,16 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see * snapshot id @dst, test whether there is some snapshot in which @dst is * visible. * - * This assumes we're visiting @src keys in natural key order. + * @c: filesystem handle + * @s: list of snapshot IDs already seen at @src + * @src: snapshot ID of src key + * @dst: snapshot ID of dst key + * Returns: true if there is some snapshot in which @dst is visible * - * @s - list of snapshot IDs already seen at @src - * @src - snapshot ID of src key - * @dst - snapshot ID of dst key + * Assumes we're visiting @src keys in natural key order */ -static int ref_visible(struct bch_fs *c, struct snapshots_seen *s, - u32 src, u32 dst) +static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s, + u32 src, u32 dst) { return dst <= src ? key_visible_in_snapshot(c, s, dst, src) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 4548de6e97b2..81ff2720835b 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -120,8 +120,7 @@ static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed, if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { struct bch_inode_unpacked unpacked; - int ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i), - &unpacked); + ret = bch2_inode_unpack(bkey_i_to_s_c(&packed->inode.k_i), &unpacked); BUG_ON(ret); BUG_ON(unpacked.bi_inum != inode->bi_inum); BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 7f29fd2f05b1..3439e9553325 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -489,7 +489,8 @@ static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op) } /** - * bch_write_index - after a write, update index to point to new data + * __bch2_write_index - after a write, update index to point to new data + * @op: bch_write_op to process */ static void __bch2_write_index(struct bch_write_op *op) { @@ -526,10 +527,10 @@ static void __bch2_write_index(struct bch_write_op *op) op->written += sectors_start - keylist_sectors(keys); if (ret && !bch2_err_matches(ret, EROFS)) { - struct bkey_i *k = bch2_keylist_front(&op->insert_keys); + struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); bch_err_inum_offset_ratelimited(c, - k->k.p.inode, k->k.p.offset << 9, + insert->k.p.inode, insert->k.p.offset << 9, "write error while doing btree update: %s", bch2_err_str(ret)); } @@ -1179,10 +1180,10 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) })); if (ret && !bch2_err_matches(ret, EROFS)) { - struct bkey_i *k = bch2_keylist_front(&op->insert_keys); + struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); bch_err_inum_offset_ratelimited(c, - k->k.p.inode, k->k.p.offset << 9, + insert->k.p.inode, insert->k.p.offset << 9, "write error while doing btree update: %s", bch2_err_str(ret)); } @@ -1546,7 +1547,8 @@ err: } /** - * bch_write - handle a write to a cache device or flash only volume + * bch2_write() - handle a write to a cache device or flash only volume + * @cl: &bch_write_op->cl * * This is the starting point for any data to end up in a cache device; it could * be from a normal write, or a writeback write, or a write to a flash only diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 4b9295a15837..40455e892112 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -588,8 +588,13 @@ out: /** * bch2_journal_flush_seq_async - wait for a journal entry to be written + * @j: journal object + * @seq: seq to flush + * @parent: closure object to wait with + * Returns: 1 if @seq has already been flushed, 0 if @seq is being flushed, + * -EIO if @seq will never be flushed * - * like bch2_journal_wait_on_seq, except that it triggers a write immediately if + * Like bch2_journal_wait_on_seq, except that it triggers a write immediately if * necessary */ int bch2_journal_flush_seq_async(struct journal *j, u64 seq, @@ -944,7 +949,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, goto unlock; while (ja->nr < nr) { - struct disk_reservation disk_res = { 0, 0 }; + struct disk_reservation disk_res = { 0, 0, 0 }; /* * note: journal buckets aren't really counted as _sectors_ used yet, so diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 0e606009dc46..269c8e8a1d95 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -237,17 +237,17 @@ static void journal_entry_err_msg(struct printbuf *out, #define journal_entry_err(c, version, jset, entry, msg, ...) \ ({ \ - struct printbuf buf = PRINTBUF; \ + struct printbuf _buf = PRINTBUF; \ \ - journal_entry_err_msg(&buf, version, jset, entry); \ - prt_printf(&buf, msg, ##__VA_ARGS__); \ + journal_entry_err_msg(&_buf, version, jset, entry); \ + prt_printf(&_buf, msg, ##__VA_ARGS__); \ \ switch (flags & BKEY_INVALID_WRITE) { \ case READ: \ - mustfix_fsck_err(c, "%s", buf.buf); \ + mustfix_fsck_err(c, "%s", _buf.buf); \ break; \ case WRITE: \ - bch_err(c, "corrupt metadata before write: %s\n", buf.buf);\ + bch_err(c, "corrupt metadata before write: %s\n", _buf.buf);\ if (bch2_fs_inconsistent(c)) { \ ret = -BCH_ERR_fsck_errors_not_fixed; \ goto fsck_err; \ @@ -255,7 +255,7 @@ static void journal_entry_err_msg(struct printbuf *out, break; \ } \ \ - printbuf_exit(&buf); \ + printbuf_exit(&_buf); \ true; \ }) @@ -1281,7 +1281,7 @@ int bch2_journal_read(struct bch_fs *c, continue; for (ptr = 0; ptr < i->nr_ptrs; ptr++) { - struct bch_dev *ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev); + ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev); if (!i->ptrs[ptr].csum_good) bch_err_dev_offset(ca, i->ptrs[ptr].sector, @@ -1379,16 +1379,21 @@ static void __journal_write_alloc(struct journal *j, } /** - * journal_next_bucket - move on to the next journal bucket if possible + * journal_write_alloc - decide where to write next journal entry + * + * @j: journal object + * @w: journal buf (entry to be written) + * + * Returns: 0 on success, or -EROFS on failure */ -static int journal_write_alloc(struct journal *j, struct journal_buf *w, - unsigned sectors) +static int journal_write_alloc(struct journal *j, struct journal_buf *w) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_devs_mask devs; struct journal_device *ja; struct bch_dev *ca; struct dev_alloc_list devs_sorted; + unsigned sectors = vstruct_sectors(w->data, c->block_bits); unsigned target = c->opts.metadata_target ?: c->opts.foreground_target; unsigned i, replicas = 0, replicas_want = @@ -1812,7 +1817,7 @@ void bch2_journal_write(struct closure *cl) retry_alloc: spin_lock(&j->lock); - ret = journal_write_alloc(j, w, sectors); + ret = journal_write_alloc(j, w); if (ret && j->can_discard) { spin_unlock(&j->lock); diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 73d135a8f37a..1f3d5890ff11 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -292,7 +292,6 @@ void bch2_journal_do_discards(struct journal *j) static void bch2_journal_reclaim_fast(struct journal *j) { - struct journal_entry_pin_list temp; bool popped = false; lockdep_assert_held(&j->lock); @@ -303,7 +302,7 @@ static void bch2_journal_reclaim_fast(struct journal *j) */ while (!fifo_empty(&j->pin) && !atomic_read(&fifo_peek_front(&j->pin).count)) { - fifo_pop(&j->pin, temp); + j->pin.front++; popped = true; } @@ -419,6 +418,8 @@ void bch2_journal_pin_set(struct journal *j, u64 seq, /** * bch2_journal_pin_flush: ensure journal pin callback is no longer running + * @j: journal object + * @pin: pin to flush */ void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin) { @@ -579,7 +580,11 @@ static u64 journal_seq_to_flush(struct journal *j) } /** - * bch2_journal_reclaim - free up journal buckets + * __bch2_journal_reclaim - free up journal buckets + * @j: journal object + * @direct: direct or background reclaim? + * @kicked: requested to run since we last ran? + * Returns: 0 on success, or -EIO if the journal has been shutdown * * Background journal reclaim writes out btree nodes. It should be run * early enough so that we never completely run out of journal buckets. diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index ac4df53bfde2..d62b757536a3 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -724,7 +724,6 @@ int __bch2_evacuate_bucket(struct btree_trans *trans, if (!bp.level) { const struct bch_extent_ptr *ptr; - struct bkey_s_c k; unsigned i = 0; k = bch2_backpointer_get_key(trans, &iter, bp_pos, bp, 0); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 83ebb56a3fae..874c9324ab66 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -164,7 +164,7 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans, lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX), 0, k, ({ struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) }; - int ret = 0; + int ret2 = 0; saw++; @@ -173,11 +173,11 @@ static int bch2_copygc_get_buckets(struct btree_trans *trans, else if (bucket_in_flight(buckets_in_flight, b.k)) in_flight++; else { - ret = darray_push(buckets, b) ?: buckets->nr >= nr_to_get; - if (ret >= 0) + ret2 = darray_push(buckets, b) ?: buckets->nr >= nr_to_get; + if (ret2 >= 0) sectors += b.sectors; } - ret; + ret2; })); pr_debug("have: %zu (%zu) saw %zu in flight %zu not movable %zu got %zu (%zu)/%zu buckets ret %i", @@ -304,13 +304,13 @@ static int bch2_copygc_thread(void *arg) struct moving_context ctxt; struct bch_move_stats move_stats; struct io_clock *clock = &c->io_clock[WRITE]; - struct buckets_in_flight move_buckets; + struct buckets_in_flight buckets; u64 last, wait; int ret = 0; - memset(&move_buckets, 0, sizeof(move_buckets)); + memset(&buckets, 0, sizeof(buckets)); - ret = rhashtable_init(&move_buckets.table, &bch_move_bucket_params); + ret = rhashtable_init(&buckets.table, &bch_move_bucket_params); if (ret) { bch_err_msg(c, ret, "allocating copygc buckets in flight"); return ret; @@ -329,12 +329,12 @@ static int bch2_copygc_thread(void *arg) cond_resched(); if (!c->copy_gc_enabled) { - move_buckets_wait(&trans, &ctxt, &move_buckets, true); + move_buckets_wait(&trans, &ctxt, &buckets, true); kthread_wait_freezable(c->copy_gc_enabled); } if (unlikely(freezing(current))) { - move_buckets_wait(&trans, &ctxt, &move_buckets, true); + move_buckets_wait(&trans, &ctxt, &buckets, true); __refrigerator(false); continue; } @@ -345,7 +345,7 @@ static int bch2_copygc_thread(void *arg) if (wait > clock->max_slop) { c->copygc_wait_at = last; c->copygc_wait = last + wait; - move_buckets_wait(&trans, &ctxt, &move_buckets, true); + move_buckets_wait(&trans, &ctxt, &buckets, true); trace_and_count(c, copygc_wait, c, wait, last + wait); bch2_kthread_io_clock_wait(clock, last + wait, MAX_SCHEDULE_TIMEOUT); @@ -355,14 +355,14 @@ static int bch2_copygc_thread(void *arg) c->copygc_wait = 0; c->copygc_running = true; - ret = bch2_copygc(&trans, &ctxt, &move_buckets); + ret = bch2_copygc(&trans, &ctxt, &buckets); c->copygc_running = false; wake_up(&c->copygc_running_wq); } - move_buckets_wait(&trans, &ctxt, &move_buckets, true); - rhashtable_destroy(&move_buckets.table); + move_buckets_wait(&trans, &ctxt, &buckets, true); + rhashtable_destroy(&buckets.table); bch2_trans_exit(&trans); bch2_moving_ctxt_exit(&ctxt); diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 8a9db110d64f..c21c258e4018 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -469,7 +469,7 @@ struct bch_opts { #undef x }; -static const struct bch_opts bch2_opts_default = { +static const __maybe_unused struct bch_opts bch2_opts_default = { #define x(_name, _bits, _mode, _type, _sb_opt, _default, ...) \ ._name##_defined = true, \ ._name = _default, \ diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c index c41daa180682..de41f9a14492 100644 --- a/fs/bcachefs/printbuf.c +++ b/fs/bcachefs/printbuf.c @@ -81,8 +81,10 @@ void bch2_prt_printf(struct printbuf *out, const char *fmt, ...) } /** - * printbuf_str - returns printbuf's buf as a C string, guaranteed to be null - * terminated + * bch2_printbuf_str() - returns printbuf's buf as a C string, guaranteed to be + * null terminated + * @buf: printbuf to terminate + * Returns: Printbuf contents, as a nul terminated C string */ const char *bch2_printbuf_str(const struct printbuf *buf) { @@ -97,8 +99,9 @@ const char *bch2_printbuf_str(const struct printbuf *buf) } /** - * printbuf_exit - exit a printbuf, freeing memory it owns and poisoning it + * bch2_printbuf_exit() - exit a printbuf, freeing memory it owns and poisoning it * against accidental use. + * @buf: printbuf to exit */ void bch2_printbuf_exit(struct printbuf *buf) { @@ -120,7 +123,7 @@ void bch2_printbuf_tabstop_pop(struct printbuf *buf) } /* - * printbuf_tabstop_set - add a tabstop, n spaces from the previous tabstop + * bch2_printbuf_tabstop_set() - add a tabstop, n spaces from the previous tabstop * * @buf: printbuf to control * @spaces: number of spaces from previous tabpstop @@ -144,7 +147,7 @@ int bch2_printbuf_tabstop_push(struct printbuf *buf, unsigned spaces) } /** - * printbuf_indent_add - add to the current indent level + * bch2_printbuf_indent_add() - add to the current indent level * * @buf: printbuf to control * @spaces: number of spaces to add to the current indent level @@ -164,7 +167,7 @@ void bch2_printbuf_indent_add(struct printbuf *buf, unsigned spaces) } /** - * printbuf_indent_sub - subtract from the current indent level + * bch2_printbuf_indent_sub() - subtract from the current indent level * * @buf: printbuf to control * @spaces: number of spaces to subtract from the current indent level @@ -227,9 +230,8 @@ static void __prt_tab(struct printbuf *out) } /** - * prt_tab - Advance printbuf to the next tabstop - * - * @buf: printbuf to control + * bch2_prt_tab() - Advance printbuf to the next tabstop + * @out: printbuf to control * * Advance output to the next tabstop by printing spaces. */ @@ -267,7 +269,7 @@ static void __prt_tab_rjust(struct printbuf *buf) } /** - * prt_tab_rjust - Advance printbuf to the next tabstop, right justifying + * bch2_prt_tab_rjust - Advance printbuf to the next tabstop, right justifying * previous output * * @buf: printbuf to control @@ -284,11 +286,11 @@ void bch2_prt_tab_rjust(struct printbuf *buf) } /** - * prt_bytes_indented - Print an array of chars, handling embedded control characters + * bch2_prt_bytes_indented() - Print an array of chars, handling embedded control characters * - * @out: printbuf to output to - * @str: string to print - * @count: number of bytes to print + * @out: output printbuf + * @str: string to print + * @count: number of bytes to print * * The following contol characters are handled as so: * \n: prt_newline newline that obeys current indent level @@ -335,32 +337,38 @@ void bch2_prt_bytes_indented(struct printbuf *out, const char *str, unsigned cou } /** - * prt_human_readable_u64 - Print out a u64 in human readable units + * bch2_prt_human_readable_u64() - Print out a u64 in human readable units + * @out: output printbuf + * @v: integer to print * - * Units of 2^10 (default) or 10^3 are controlled via @buf->si_units + * Units of 2^10 (default) or 10^3 are controlled via @out->si_units */ -void bch2_prt_human_readable_u64(struct printbuf *buf, u64 v) +void bch2_prt_human_readable_u64(struct printbuf *out, u64 v) { - bch2_printbuf_make_room(buf, 10); - buf->pos += string_get_size(v, 1, !buf->si_units, - buf->buf + buf->pos, - printbuf_remaining_size(buf)); + bch2_printbuf_make_room(out, 10); + out->pos += string_get_size(v, 1, !out->si_units, + out->buf + out->pos, + printbuf_remaining_size(out)); } /** - * prt_human_readable_s64 - Print out a s64 in human readable units + * bch2_prt_human_readable_s64() - Print out a s64 in human readable units + * @out: output printbuf + * @v: integer to print * - * Units of 2^10 (default) or 10^3 are controlled via @buf->si_units + * Units of 2^10 (default) or 10^3 are controlled via @out->si_units */ -void bch2_prt_human_readable_s64(struct printbuf *buf, s64 v) +void bch2_prt_human_readable_s64(struct printbuf *out, s64 v) { if (v < 0) - prt_char(buf, '-'); - bch2_prt_human_readable_u64(buf, abs(v)); + prt_char(out, '-'); + bch2_prt_human_readable_u64(out, abs(v)); } /** - * prt_units_u64 - Print out a u64 according to printbuf unit options + * bch2_prt_units_u64() - Print out a u64 according to printbuf unit options + * @out: output printbuf + * @v: integer to print * * Units are either raw (default), or human reabable units (controlled via * @buf->human_readable_units) @@ -374,7 +382,9 @@ void bch2_prt_units_u64(struct printbuf *out, u64 v) } /** - * prt_units_s64 - Print out a s64 according to printbuf unit options + * bch2_prt_units_s64() - Print out a s64 according to printbuf unit options + * @out: output printbuf + * @v: integer to print * * Units are either raw (default), or human reabable units (controlled via * @buf->human_readable_units) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index b9dd858fc299..f566c94260d6 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -423,15 +423,9 @@ static int bch2_initialize_subvolumes(struct bch_fs *c) root_volume.v.snapshot = cpu_to_le32(U32_MAX); root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO); - ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, - &root_tree.k_i, - NULL, NULL, 0) ?: - bch2_btree_insert(c, BTREE_ID_snapshots, - &root_snapshot.k_i, - NULL, NULL, 0) ?: - bch2_btree_insert(c, BTREE_ID_subvolumes, - &root_volume.k_i, - NULL, NULL, 0); + ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?: + bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?: + bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0); if (ret) bch_err_fn(c, ret); return ret; @@ -1010,9 +1004,7 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_inode_pack(&packed_inode, &root_inode); packed_inode.inode.k.p.snapshot = U32_MAX; - ret = bch2_btree_insert(c, BTREE_ID_inodes, - &packed_inode.inode.k_i, - NULL, NULL, 0); + ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, 0); if (ret) { bch_err_msg(c, ret, "creating root directory"); goto err; diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index f155428ff395..fb605b25b067 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -91,6 +91,9 @@ void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, bch2_bkey_ptrs_to_text(out, c, k); } +#if 0 +Currently disabled, needs to be debugged: + bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r) { struct bkey_s_reflink_v l = bkey_s_to_reflink_v(_l); @@ -98,6 +101,7 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r return l.v->refcount == r.v->refcount && bch2_extent_merge(c, _l, _r); } +#endif int bch2_trans_mark_reflink_v(struct btree_trans *trans, enum btree_id btree_id, unsigned level, diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c index 7faa27310de4..b684b9f00c1b 100644 --- a/fs/bcachefs/six.c +++ b/fs/bcachefs/six.c @@ -29,7 +29,6 @@ static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type); #define SIX_LOCK_HELD_intent (1U << 26) #define SIX_LOCK_HELD_write (1U << 27) #define SIX_LOCK_WAITING_read (1U << (28 + SIX_LOCK_read)) -#define SIX_LOCK_WAITING_intent (1U << (28 + SIX_LOCK_intent)) #define SIX_LOCK_WAITING_write (1U << (28 + SIX_LOCK_write)) #define SIX_LOCK_NOSPIN (1U << 31) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index cfd70d6dea62..73fca04011ad 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -507,18 +507,18 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, bch2_trans_iter_exit(trans, &iter); if (!ret && !found) { - struct bkey_i_subvolume *s; + struct bkey_i_subvolume *u; *subvol_id = bch2_snapshot_tree_oldest_subvol(c, snapshot_root); - s = bch2_bkey_get_mut_typed(trans, &iter, + u = bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_subvolumes, POS(0, *subvol_id), 0, subvolume); - ret = PTR_ERR_OR_ZERO(s); + ret = PTR_ERR_OR_ZERO(u); if (ret) return ret; - SET_BCH_SUBVOLUME_SNAP(&s->v, false); + SET_BCH_SUBVOLUME_SNAP(&u->v, false); } return ret; @@ -930,7 +930,7 @@ static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s) swap(s->children[0], s->children[1]); } -int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) +static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) { struct bch_fs *c = trans->c; struct btree_iter iter, p_iter = (struct btree_iter) { NULL }; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index d908b62c19f7..e0bd50983bb2 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -246,9 +246,9 @@ struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb, /* XXX: we're not checking that offline device have enough space */ for_each_online_member(ca, c, i) { - struct bch_sb_handle *sb = &ca->disk_sb; + struct bch_sb_handle *dev_sb = &ca->disk_sb; - if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) { + if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) { percpu_ref_put(&ca->ref); return NULL; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index ef11cede1dba..332951b794b4 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -435,7 +435,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) #ifndef BCH_WRITE_REF_DEBUG percpu_ref_reinit(&c->writes); #else - for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++) { + for (i = 0; i < BCH_WRITE_REF_NR; i++) { BUG_ON(atomic_long_read(&c->writes[i])); atomic_long_inc(&c->writes[i]); } diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 41c6900c34c1..1e26c2645ce4 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -113,10 +113,6 @@ do { \ prt_human_readable_s64(out, val); \ } while (0) -#define var_printf(_var, fmt) sysfs_printf(_var, fmt, var(_var)) -#define var_print(_var) sysfs_print(_var, var(_var)) -#define var_hprint(_var) sysfs_hprint(_var, var(_var)) - #define sysfs_strtoul(file, var) \ do { \ if (attr == &sysfs_ ## file) \ @@ -139,30 +135,6 @@ do { \ _v; \ }) -#define strtoul_restrict_or_return(cp, min, max) \ -({ \ - unsigned long __v = 0; \ - int _r = strtoul_safe_restrict(cp, __v, min, max); \ - if (_r) \ - return _r; \ - __v; \ -}) - -#define strtoi_h_or_return(cp) \ -({ \ - u64 _v; \ - int _r = strtoi_h(cp, &_v); \ - if (_r) \ - return _r; \ - _v; \ -}) - -#define sysfs_hatoi(file, var) \ -do { \ - if (attr == &sysfs_ ## file) \ - return strtoi_h(buf, &var) ?: (ssize_t) size; \ -} while (0) - write_attribute(trigger_gc); write_attribute(trigger_discards); write_attribute(trigger_invalidates); @@ -291,7 +263,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c incompressible_sectors = 0, compressed_sectors_compressed = 0, compressed_sectors_uncompressed = 0; - int ret; + int ret = 0; if (!test_bit(BCH_FS_STARTED, &c->flags)) return -EPERM; diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 0187c81e32ad..18ccb37b5a26 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -128,14 +128,13 @@ static int test_iterate(struct bch_fs *c, u64 nr) pr_info("inserting test keys"); for (i = 0; i < nr; i++) { - struct bkey_i_cookie k; + struct bkey_i_cookie ck; - bkey_cookie_init(&k.k_i); - k.k.p.offset = i; - k.k.p.snapshot = U32_MAX; + bkey_cookie_init(&ck.k_i); + ck.k.p.offset = i; + ck.k.p.snapshot = U32_MAX; - ret = bch2_btree_insert(c, BTREE_ID_xattrs, &k.k_i, - NULL, NULL, 0); + ret = bch2_btree_insert(c, BTREE_ID_xattrs, &ck.k_i, NULL, 0); if (ret) { bch_err_msg(c, ret, "insert error"); goto err; @@ -194,15 +193,14 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) pr_info("inserting test extents"); for (i = 0; i < nr; i += 8) { - struct bkey_i_cookie k; + struct bkey_i_cookie ck; - bkey_cookie_init(&k.k_i); - k.k.p.offset = i + 8; - k.k.p.snapshot = U32_MAX; - k.k.size = 8; + bkey_cookie_init(&ck.k_i); + ck.k.p.offset = i + 8; + ck.k.p.snapshot = U32_MAX; + ck.k.size = 8; - ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, - NULL, NULL, 0); + ret = bch2_btree_insert(c, BTREE_ID_extents, &ck.k_i, NULL, 0); if (ret) { bch_err_msg(c, ret, "insert error"); goto err; @@ -263,14 +261,13 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) pr_info("inserting test keys"); for (i = 0; i < nr; i++) { - struct bkey_i_cookie k; + struct bkey_i_cookie ck; - bkey_cookie_init(&k.k_i); - k.k.p.offset = i * 2; - k.k.p.snapshot = U32_MAX; + bkey_cookie_init(&ck.k_i); + ck.k.p.offset = i * 2; + ck.k.p.snapshot = U32_MAX; - ret = bch2_btree_insert(c, BTREE_ID_xattrs, &k.k_i, - NULL, NULL, 0); + ret = bch2_btree_insert(c, BTREE_ID_xattrs, &ck.k_i, NULL, 0); if (ret) { bch_err_msg(c, ret, "insert error"); goto err; @@ -336,15 +333,14 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) pr_info("inserting test keys"); for (i = 0; i < nr; i += 16) { - struct bkey_i_cookie k; + struct bkey_i_cookie ck; - bkey_cookie_init(&k.k_i); - k.k.p.offset = i + 16; - k.k.p.snapshot = U32_MAX; - k.k.size = 8; + bkey_cookie_init(&ck.k_i); + ck.k.p.offset = i + 16; + ck.k.p.snapshot = U32_MAX; + ck.k.size = 8; - ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, - NULL, NULL, 0); + ret = bch2_btree_insert(c, BTREE_ID_extents, &ck.k_i, NULL, 0); if (ret) { bch_err_msg(c, ret, "insert error"); goto err; @@ -458,8 +454,7 @@ static int insert_test_extent(struct bch_fs *c, k.k_i.k.size = end - start; k.k_i.k.version.lo = test_version++; - ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, - NULL, NULL, 0); + ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0); if (ret) bch_err_fn(c, ret); return ret; @@ -546,8 +541,7 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) bkey_cookie_init(&cookie.k_i); cookie.k.p.snapshot = snapid_hi; - ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, - NULL, NULL, 0); + ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0); if (ret) return ret; @@ -572,8 +566,7 @@ static int test_snapshots(struct bch_fs *c, u64 nr) bkey_cookie_init(&cookie.k_i); cookie.k.p.snapshot = U32_MAX; - ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, - NULL, NULL, 0); + ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0); if (ret) return ret; diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 1ab7e247cca6..08bac0ba8d0b 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -112,10 +112,10 @@ got_unit: #define parse_or_ret(cp, _f) \ do { \ - int ret = _f; \ - if (ret < 0) \ - return ret; \ - cp += ret; \ + int _ret = _f; \ + if (_ret < 0) \ + return _ret; \ + cp += _ret; \ } while (0) static int __bch2_strtou64_h(const char *cp, u64 *res) @@ -605,11 +605,9 @@ void bch2_time_stats_init(struct bch2_time_stats *stats) /** * bch2_ratelimit_delay() - return how long to delay until the next time to do - * some work - * - * @d - the struct bch_ratelimit to update - * - * Returns the amount of time to delay by, in jiffies + * some work + * @d: the struct bch_ratelimit to update + * Returns: the amount of time to delay by, in jiffies */ u64 bch2_ratelimit_delay(struct bch_ratelimit *d) { @@ -622,9 +620,8 @@ u64 bch2_ratelimit_delay(struct bch_ratelimit *d) /** * bch2_ratelimit_increment() - increment @d by the amount of work done - * - * @d - the struct bch_ratelimit to update - * @done - the amount of work done, in arbitrary units + * @d: the struct bch_ratelimit to update + * @done: the amount of work done, in arbitrary units */ void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done) { diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index d34423352f60..849a37ae497c 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -776,12 +776,12 @@ static inline void __move_gap(void *array, size_t element_size, #define bubble_sort(_base, _nr, _cmp) \ do { \ - ssize_t _i, _end; \ + ssize_t _i, _last; \ bool _swapped = true; \ \ - for (_end = (ssize_t) (_nr) - 1; _end > 0 && _swapped; --_end) {\ + for (_last= (ssize_t) (_nr) - 1; _last > 0 && _swapped; --_last) {\ _swapped = false; \ - for (_i = 0; _i < _end; _i++) \ + for (_i = 0; _i < _last; _i++) \ if (_cmp((_base)[_i], (_base)[_i + 1]) > 0) { \ swap((_base)[_i], (_base)[_i + 1]); \ _swapped = true; \ diff --git a/fs/bcachefs/varint.c b/fs/bcachefs/varint.c index 2a2ab86ed6e1..cb4f33ed9ab3 100644 --- a/fs/bcachefs/varint.c +++ b/fs/bcachefs/varint.c @@ -13,10 +13,9 @@ /** * bch2_varint_encode - encode a variable length integer - * @out - destination to encode to - * @v - unsigned integer to encode - * - * Returns the size in bytes of the encoded integer - at most 9 bytes + * @out: destination to encode to + * @v: unsigned integer to encode + * Returns: size in bytes of the encoded integer - at most 9 bytes */ int bch2_varint_encode(u8 *out, u64 v) { @@ -40,11 +39,10 @@ int bch2_varint_encode(u8 *out, u64 v) /** * bch2_varint_decode - encode a variable length integer - * @in - varint to decode - * @end - end of buffer to decode from - * @out - on success, decoded integer - * - * Returns the size in bytes of the decoded integer - or -1 on failure (would + * @in: varint to decode + * @end: end of buffer to decode from + * @out: on success, decoded integer + * Returns: size in bytes of the decoded integer - or -1 on failure (would * have read past the end of the buffer) */ int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out) @@ -73,6 +71,9 @@ int bch2_varint_decode(const u8 *in, const u8 *end, u64 *out) /** * bch2_varint_encode_fast - fast version of bch2_varint_encode + * @out: destination to encode to + * @v: unsigned integer to encode + * Returns: size in bytes of the encoded integer - at most 9 bytes * * This version assumes it's always safe to write 8 bytes to @out, even if the * encoded integer would be smaller. @@ -96,6 +97,11 @@ int bch2_varint_encode_fast(u8 *out, u64 v) /** * bch2_varint_decode_fast - fast version of bch2_varint_decode + * @in: varint to decode + * @end: end of buffer to decode from + * @out: on success, decoded integer + * Returns: size in bytes of the decoded integer - or -1 on failure (would + * have read past the end of the buffer) * * This version assumes that it is safe to read at most 8 bytes past the end of * @end (we still return an error if the varint extends past @end). -- cgit v1.2.3 From 6bd68ec266ad71827ef940151067b67b62fb8fed Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 12 Sep 2023 17:16:02 -0400 Subject: bcachefs: Heap allocate btree_trans We're using more stack than we'd like in a number of functions, and btree_trans is the biggest object that we stack allocate. But we have to do a heap allocatation to initialize it anyways, so there's no real downside to heap allocating the entire thing. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 33 +++--- fs/bcachefs/alloc_background.c | 133 +++++++++++------------ fs/bcachefs/alloc_foreground.c | 2 +- fs/bcachefs/backpointers.c | 22 ++-- fs/bcachefs/bcachefs.h | 8 +- fs/bcachefs/btree_gc.c | 95 ++++++++--------- fs/bcachefs/btree_io.c | 13 +-- fs/bcachefs/btree_iter.c | 82 ++++++++------- fs/bcachefs/btree_iter.h | 14 +-- fs/bcachefs/btree_key_cache.c | 12 +-- fs/bcachefs/btree_trans_commit.c | 8 +- fs/bcachefs/btree_types.h | 4 +- fs/bcachefs/btree_update.c | 6 +- fs/bcachefs/btree_update.h | 25 ++--- fs/bcachefs/btree_update_interior.c | 35 +++--- fs/bcachefs/btree_write_buffer.c | 2 +- fs/bcachefs/buckets.c | 2 +- fs/bcachefs/data_update.c | 2 +- fs/bcachefs/debug.c | 34 +++--- fs/bcachefs/dirent.c | 31 +++--- fs/bcachefs/ec.c | 34 +++--- fs/bcachefs/fs-io-buffered.c | 16 +-- fs/bcachefs/fs-io-direct.c | 14 ++- fs/bcachefs/fs-io-pagecache.c | 14 +-- fs/bcachefs/fs-io.c | 84 +++++++-------- fs/bcachefs/fs.c | 166 ++++++++++++++--------------- fs/bcachefs/fsck.c | 96 +++++++---------- fs/bcachefs/inode.c | 46 ++++---- fs/bcachefs/io_misc.c | 19 ++-- fs/bcachefs/io_read.c | 36 +++---- fs/bcachefs/io_write.c | 43 ++++---- fs/bcachefs/journal.c | 4 +- fs/bcachefs/journal_seq_blacklist.c | 12 +-- fs/bcachefs/logged_ops.c | 4 +- fs/bcachefs/lru.c | 4 +- fs/bcachefs/migrate.c | 24 ++--- fs/bcachefs/move.c | 39 ++++--- fs/bcachefs/movinggc.c | 18 ++-- fs/bcachefs/quota.c | 14 +-- fs/bcachefs/recovery.c | 6 +- fs/bcachefs/reflink.c | 36 +++---- fs/bcachefs/snapshot.c | 58 +++++----- fs/bcachefs/subvolume.c | 6 +- fs/bcachefs/super.c | 7 -- fs/bcachefs/sysfs.c | 10 +- fs/bcachefs/tests.c | 205 ++++++++++++++++-------------------- fs/bcachefs/xattr.c | 30 +++--- 47 files changed, 731 insertions(+), 877 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index ae2036b0fcc4..9653401957b3 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -279,18 +279,16 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap, struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0); - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; struct bkey_s_c_xattr xattr; struct posix_acl *acl = NULL; struct bkey_s_c k; int ret; - - bch2_trans_init(&trans, c, 0, 0); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = bch2_hash_lookup(&trans, &iter, bch2_xattr_hash_desc, + ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc, &hash, inode_inum(inode), &search, 0); if (ret) { if (!bch2_err_matches(ret, ENOENT)) @@ -306,7 +304,7 @@ retry: } xattr = bkey_s_c_to_xattr(k); - acl = bch2_acl_from_disk(&trans, xattr_val(xattr.v), + acl = bch2_acl_from_disk(trans, xattr_val(xattr.v), le16_to_cpu(xattr.v->x_val_len)); if (!IS_ERR(acl)) @@ -315,8 +313,8 @@ out: if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart)) goto retry; - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); return acl; } @@ -356,7 +354,7 @@ int bch2_set_acl(struct mnt_idmap *idmap, { struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter inode_iter = { NULL }; struct bch_inode_unpacked inode_u; struct posix_acl *acl; @@ -364,12 +362,11 @@ int bch2_set_acl(struct mnt_idmap *idmap, int ret; mutex_lock(&inode->ei_update_lock); - bch2_trans_init(&trans, c, 0, 0); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); acl = _acl; - ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode_inum(inode), + ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode), BTREE_ITER_INTENT); if (ret) goto btree_err; @@ -382,30 +379,30 @@ retry: goto btree_err; } - ret = bch2_set_acl_trans(&trans, inode_inum(inode), &inode_u, acl, type); + ret = bch2_set_acl_trans(trans, inode_inum(inode), &inode_u, acl, type); if (ret) goto btree_err; inode_u.bi_ctime = bch2_current_time(c); inode_u.bi_mode = mode; - ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, NULL, 0); + ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?: + bch2_trans_commit(trans, NULL, NULL, 0); btree_err: - bch2_trans_iter_exit(&trans, &inode_iter); + bch2_trans_iter_exit(trans, &inode_iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (unlikely(ret)) goto err; - bch2_inode_update_after_write(&trans, inode, &inode_u, + bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME|ATTR_MODE); set_cached_acl(&inode->v, type, acl); err: - bch2_trans_exit(&trans); mutex_unlock(&inode->ei_update_lock); + bch2_trans_put(trans); return ret; } diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 4eab7e59ae93..19ef7a444c23 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -548,7 +548,7 @@ void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bke int bch2_bucket_gens_init(struct bch_fs *c) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct bch_alloc_v4 a; @@ -559,9 +559,7 @@ int bch2_bucket_gens_init(struct bch_fs *c) u8 gen; int ret; - bch2_trans_init(&trans, c, 0, 0); - - for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, + for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { /* * Not a fsck error because this is checked/repaired by @@ -574,10 +572,10 @@ int bch2_bucket_gens_init(struct bch_fs *c) pos = alloc_gens_pos(iter.pos, &offset); if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) { - ret = commit_do(&trans, NULL, NULL, + ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, - bch2_btree_insert_trans(&trans, BTREE_ID_bucket_gens, &g.k_i, 0)); + bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); if (ret) break; have_bucket_gens_key = false; @@ -591,15 +589,15 @@ int bch2_bucket_gens_init(struct bch_fs *c) g.v.gens[offset] = gen; } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); if (have_bucket_gens_key && !ret) - ret = commit_do(&trans, NULL, NULL, + ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, - bch2_btree_insert_trans(&trans, BTREE_ID_bucket_gens, &g.k_i, 0)); + bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (ret) bch_err_fn(c, ret); @@ -608,20 +606,19 @@ int bch2_bucket_gens_init(struct bch_fs *c) int bch2_alloc_read(struct bch_fs *c) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct bch_dev *ca; int ret; down_read(&c->gc_lock); - bch2_trans_init(&trans, c, 0, 0); if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) { const struct bch_bucket_gens *g; u64 b; - for_each_btree_key(&trans, iter, BTREE_ID_bucket_gens, POS_MIN, + for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; @@ -645,11 +642,11 @@ int bch2_alloc_read(struct bch_fs *c) b++) *bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK]; } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); } else { struct bch_alloc_v4 a; - for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, + for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { /* * Not a fsck error because this is checked/repaired by @@ -662,10 +659,10 @@ int bch2_alloc_read(struct bch_fs *c) *bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen; } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); } - bch2_trans_exit(&trans); + bch2_trans_put(trans); up_read(&c->gc_lock); if (ret) @@ -1371,27 +1368,25 @@ fsck_err: int bch2_check_alloc_info(struct bch_fs *c) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter; struct bkey hole; struct bkey_s_c k; int ret = 0; - bch2_trans_init(&trans, c, 0, 0); - - bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, POS_MIN, + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH); - bch2_trans_iter_init(&trans, &discard_iter, BTREE_ID_need_discard, POS_MIN, + bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN, BTREE_ITER_PREFETCH); - bch2_trans_iter_init(&trans, &freespace_iter, BTREE_ID_freespace, POS_MIN, + bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace, POS_MIN, BTREE_ITER_PREFETCH); - bch2_trans_iter_init(&trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN, + bch2_trans_iter_init(trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN, BTREE_ITER_PREFETCH); while (1) { struct bpos next; - bch2_trans_begin(&trans); + bch2_trans_begin(trans); k = bch2_get_key_or_real_bucket_hole(&iter, &hole); ret = bkey_err(k); @@ -1404,7 +1399,7 @@ int bch2_check_alloc_info(struct bch_fs *c) if (k.k->type) { next = bpos_nosnap_successor(k.k->p); - ret = bch2_check_alloc_key(&trans, + ret = bch2_check_alloc_key(trans, k, &iter, &discard_iter, &freespace_iter, @@ -1414,11 +1409,11 @@ int bch2_check_alloc_info(struct bch_fs *c) } else { next = k.k->p; - ret = bch2_check_alloc_hole_freespace(&trans, + ret = bch2_check_alloc_hole_freespace(trans, bkey_start_pos(k.k), &next, &freespace_iter) ?: - bch2_check_alloc_hole_bucket_gens(&trans, + bch2_check_alloc_hole_bucket_gens(trans, bkey_start_pos(k.k), &next, &bucket_gens_iter); @@ -1426,7 +1421,7 @@ int bch2_check_alloc_info(struct bch_fs *c) goto bkey_err; } - ret = bch2_trans_commit(&trans, NULL, NULL, + ret = bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW); if (ret) @@ -1439,29 +1434,29 @@ bkey_err: if (ret) break; } - bch2_trans_iter_exit(&trans, &bucket_gens_iter); - bch2_trans_iter_exit(&trans, &freespace_iter); - bch2_trans_iter_exit(&trans, &discard_iter); - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &bucket_gens_iter); + bch2_trans_iter_exit(trans, &freespace_iter); + bch2_trans_iter_exit(trans, &discard_iter); + bch2_trans_iter_exit(trans, &iter); if (ret < 0) goto err; - ret = for_each_btree_key2(&trans, iter, + ret = for_each_btree_key2(trans, iter, BTREE_ID_need_discard, POS_MIN, BTREE_ITER_PREFETCH, k, - bch2_check_discard_freespace_key(&trans, &iter, k.k->p)) ?: - for_each_btree_key2(&trans, iter, + bch2_check_discard_freespace_key(trans, &iter, k.k->p)) ?: + for_each_btree_key2(trans, iter, BTREE_ID_freespace, POS_MIN, BTREE_ITER_PREFETCH, k, - bch2_check_discard_freespace_key(&trans, &iter, k.k->p)) ?: - for_each_btree_key_commit(&trans, iter, + bch2_check_discard_freespace_key(trans, &iter, k.k->p)) ?: + for_each_btree_key_commit(trans, iter, BTREE_ID_bucket_gens, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, - bch2_check_bucket_gens_key(&trans, &iter, k)); + bch2_check_bucket_gens_key(trans, &iter, k)); err: - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (ret) bch_err_fn(c, ret); return ret; @@ -1547,10 +1542,10 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) int ret = 0; ret = bch2_trans_run(c, - for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc, + for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, - bch2_check_alloc_to_lru_ref(&trans, &iter))); + bch2_check_alloc_to_lru_ref(trans, &iter))); if (ret) bch_err_fn(c, ret); return ret; @@ -1675,29 +1670,25 @@ out: static void bch2_do_discards_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, discard_work); - struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0; struct bpos discard_pos_done = POS_MAX; int ret; - bch2_trans_init(&trans, c, 0, 0); - /* * We're doing the commit in bch2_discard_one_bucket instead of using * for_each_btree_key_commit() so that we can increment counters after * successful commit: */ - ret = for_each_btree_key2(&trans, iter, - BTREE_ID_need_discard, POS_MIN, 0, k, - bch2_discard_one_bucket(&trans, &iter, &discard_pos_done, - &seen, - &open, - &need_journal_commit, - &discarded)); - - bch2_trans_exit(&trans); + ret = bch2_trans_run(c, + for_each_btree_key2(trans, iter, + BTREE_ID_need_discard, POS_MIN, 0, k, + bch2_discard_one_bucket(trans, &iter, &discard_pos_done, + &seen, + &open, + &need_journal_commit, + &discarded))); if (need_journal_commit * 2 > seen) bch2_journal_flush_async(&c->journal, NULL); @@ -1803,15 +1794,13 @@ static void bch2_do_invalidates_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work); struct bch_dev *ca; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; unsigned i; int ret = 0; - bch2_trans_init(&trans, c, 0, 0); - - ret = bch2_btree_write_buffer_flush(&trans); + ret = bch2_btree_write_buffer_flush(trans); if (ret) goto err; @@ -1819,11 +1808,11 @@ static void bch2_do_invalidates_work(struct work_struct *work) s64 nr_to_invalidate = should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); - ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_lru, + ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru, lru_pos(ca->dev_idx, 0, 0), lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX), BTREE_ITER_INTENT, k, - invalidate_one_bucket(&trans, &iter, k, &nr_to_invalidate)); + invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate)); if (ret < 0) { percpu_ref_put(&ca->ref); @@ -1831,7 +1820,7 @@ static void bch2_do_invalidates_work(struct work_struct *work) } } err: - bch2_trans_exit(&trans); + bch2_trans_put(trans); bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); } @@ -1845,7 +1834,7 @@ void bch2_do_invalidates(struct bch_fs *c) static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, unsigned long *last_updated) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct bkey hole; @@ -1853,9 +1842,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, struct bch_member *m; int ret; - bch2_trans_init(&trans, c, 0, 0); - - bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(ca->dev_idx, ca->mi.first_bucket), BTREE_ITER_PREFETCH); /* @@ -1869,7 +1856,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, *last_updated = jiffies; } - bch2_trans_begin(&trans); + bch2_trans_begin(trans); if (bkey_ge(iter.pos, end)) { ret = 0; @@ -1889,8 +1876,8 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert); - ret = bch2_bucket_do_index(&trans, k, a, true) ?: - bch2_trans_commit(&trans, NULL, NULL, + ret = bch2_bucket_do_index(trans, k, a, true) ?: + bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL); if (ret) @@ -1900,7 +1887,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, } else { struct bkey_i *freespace; - freespace = bch2_trans_kmalloc(&trans, sizeof(*freespace)); + freespace = bch2_trans_kmalloc(trans, sizeof(*freespace)); ret = PTR_ERR_OR_ZERO(freespace); if (ret) goto bkey_err; @@ -1910,8 +1897,8 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, freespace->k.p = k.k->p; freespace->k.size = k.k->size; - ret = bch2_btree_insert_trans(&trans, BTREE_ID_freespace, freespace, 0) ?: - bch2_trans_commit(&trans, NULL, NULL, + ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?: + bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL); if (ret) @@ -1926,8 +1913,8 @@ bkey_err: break; } - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); if (ret < 0) { bch_err_msg(ca, ret, "initializing free space"); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index e73b6c82870a..3bc4abd3d7d5 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -602,7 +602,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, struct open_bucket *ob; bch2_trans_do(c, NULL, NULL, 0, - PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, watermark, + PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark, cl, &usage))); return ob; } diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 82109585439b..43defeaa1718 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -390,10 +390,10 @@ int bch2_check_btree_backpointers(struct bch_fs *c) int ret; ret = bch2_trans_run(c, - for_each_btree_key_commit(&trans, iter, + for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, 0, k, NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, - bch2_check_btree_backpointer(&trans, &iter, k))); + bch2_check_btree_backpointer(trans, &iter, k))); if (ret) bch_err_fn(c, ret); return ret; @@ -723,13 +723,12 @@ static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans, int bch2_check_extents_to_backpointers(struct bch_fs *c) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct bpos start = POS_MIN, end; int ret; - bch2_trans_init(&trans, c, 0, 0); while (1) { - ret = bch2_get_alloc_in_memory_pos(&trans, start, &end); + ret = bch2_get_alloc_in_memory_pos(trans, start, &end); if (ret) break; @@ -749,13 +748,13 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) printbuf_exit(&buf); } - ret = bch2_check_extents_to_backpointers_pass(&trans, start, end); + ret = bch2_check_extents_to_backpointers_pass(trans, start, end); if (ret || bpos_eq(end, SPOS_MAX)) break; start = bpos_successor(end); } - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (ret) bch_err_fn(c, ret); @@ -824,13 +823,12 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, int bch2_check_backpointers_to_extents(struct bch_fs *c) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; int ret; - bch2_trans_init(&trans, c, 0, 0); while (1) { - ret = bch2_get_btree_in_memory_pos(&trans, + ret = bch2_get_btree_in_memory_pos(trans, (1U << BTREE_ID_extents)| (1U << BTREE_ID_reflink), ~0, @@ -856,13 +854,13 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) printbuf_exit(&buf); } - ret = bch2_check_backpointers_to_extents_pass(&trans, start, end); + ret = bch2_check_backpointers_to_extents_pass(trans, start, end); if (ret || !bbpos_cmp(end, BBPOS_MAX)) break; start = bbpos_successor(end); } - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (ret) bch_err_fn(c, ret); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 9fe3dac4a005..ad18f3b10af0 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -627,8 +627,8 @@ struct journal_keys { size_t size; }; -struct btree_path_buf { - struct btree_path *path; +struct btree_trans_buf { + struct btree_trans *trans; }; #define REPLICAS_DELTA_LIST_MAX (1U << 16) @@ -787,9 +787,9 @@ struct bch_fs { /* btree_iter.c: */ struct seqmutex btree_trans_lock; struct list_head btree_trans_list; - mempool_t btree_paths_pool; + mempool_t btree_trans_pool; mempool_t btree_trans_mem_pool; - struct btree_path_buf __percpu *btree_paths_bufs; + struct btree_trans_buf __percpu *btree_trans_bufs; struct srcu_struct btree_trans_barrier; bool btree_trans_barrier_initialized; diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 9496ff16fc91..693ed067b1a7 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -529,13 +529,11 @@ fsck_err: int bch2_check_topology(struct bch_fs *c) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree *b; unsigned i; int ret = 0; - bch2_trans_init(&trans, c, 0, 0); - for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) { struct btree_root *r = bch2_btree_id_root(c, i); @@ -546,8 +544,8 @@ int bch2_check_topology(struct bch_fs *c) if (btree_node_fake(b)) continue; - btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); - ret = bch2_btree_repair_topology_recurse(&trans, b); + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); + ret = bch2_btree_repair_topology_recurse(trans, b); six_unlock_read(&b->c.lock); if (ret == DROP_THIS_NODE) { @@ -556,7 +554,7 @@ int bch2_check_topology(struct bch_fs *c) } } - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } @@ -1068,35 +1066,33 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r) static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); enum btree_id ids[BTREE_ID_NR]; unsigned i; int ret = 0; - bch2_trans_init(&trans, c, 0, 0); - for (i = 0; i < BTREE_ID_NR; i++) ids[i] = i; bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp); for (i = 0; i < BTREE_ID_NR && !ret; i++) ret = initial - ? bch2_gc_btree_init(&trans, ids[i], metadata_only) - : bch2_gc_btree(&trans, ids[i], initial, metadata_only); + ? bch2_gc_btree_init(trans, ids[i], metadata_only) + : bch2_gc_btree(trans, ids[i], initial, metadata_only); for (i = BTREE_ID_NR; i < btree_id_nr_alive(c) && !ret; i++) { if (!bch2_btree_id_root(c, i)->alive) continue; ret = initial - ? bch2_gc_btree_init(&trans, i, metadata_only) - : bch2_gc_btree(&trans, i, initial, metadata_only); + ? bch2_gc_btree_init(trans, i, metadata_only) + : bch2_gc_btree(trans, i, initial, metadata_only); } if (ret < 0) bch_err_fn(c, ret); - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } @@ -1458,21 +1454,19 @@ fsck_err: static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct bch_dev *ca; unsigned i; int ret = 0; - bch2_trans_init(&trans, c, 0, 0); - for_each_member_device(ca, c, i) { - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc, + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, ca->mi.first_bucket), BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k, NULL, NULL, BTREE_INSERT_LAZY_RW, - bch2_alloc_write_key(&trans, &iter, k, metadata_only)); + bch2_alloc_write_key(trans, &iter, k, metadata_only)); if (ret < 0) { bch_err_fn(c, ret); @@ -1481,14 +1475,14 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only) } } - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret < 0 ? ret : 0; } static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) { struct bch_dev *ca; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct bucket *g; @@ -1504,7 +1498,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) if (!buckets) { percpu_ref_put(&ca->ref); bch_err(c, "error allocating ca->buckets[gc]"); - return -BCH_ERR_ENOMEM_gc_alloc_start; + ret = -BCH_ERR_ENOMEM_gc_alloc_start; + goto err; } buckets->first_bucket = ca->mi.first_bucket; @@ -1512,9 +1507,7 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) rcu_assign_pointer(ca->buckets_gc, buckets); } - bch2_trans_init(&trans, c, 0, 0); - - for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, + for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { ca = bch_dev_bkey_exists(c, k.k->p.inode); g = gc_bucket(ca, k.k->p.offset); @@ -1535,13 +1528,11 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) g->stripe_redundancy = a->stripe_redundancy; } } - bch2_trans_iter_exit(&trans, &iter); - - bch2_trans_exit(&trans); - + bch2_trans_iter_exit(trans, &iter); +err: + bch2_trans_put(trans); if (ret) bch_err_fn(c, ret); - return ret; } @@ -1616,7 +1607,7 @@ fsck_err: static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) { - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; size_t idx = 0; @@ -1625,23 +1616,23 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only) if (metadata_only) return 0; - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); - ret = for_each_btree_key_commit(&trans, iter, + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BTREE_INSERT_NOFAIL, - bch2_gc_write_reflink_key(&trans, &iter, k, &idx)); + bch2_gc_write_reflink_key(trans, &iter, k, &idx)); c->reflink_gc_nr = 0; - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } static int bch2_gc_reflink_start(struct bch_fs *c, bool metadata_only) { - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; struct reflink_gc *r; @@ -1650,10 +1641,10 @@ static int bch2_gc_reflink_start(struct bch_fs *c, if (metadata_only) return 0; - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); c->reflink_gc_nr = 0; - for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN, + for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { const __le64 *refcount = bkey_refcount_c(k); @@ -1671,9 +1662,9 @@ static int bch2_gc_reflink_start(struct bch_fs *c, r->size = k.k->size; r->refcount = 0; } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } @@ -1740,7 +1731,7 @@ fsck_err: static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only) { - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; int ret = 0; @@ -1748,15 +1739,15 @@ static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only) if (metadata_only) return 0; - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); - ret = for_each_btree_key_commit(&trans, iter, + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_stripes, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BTREE_INSERT_NOFAIL, - bch2_gc_write_stripes_key(&trans, &iter, k)); + bch2_gc_write_stripes_key(trans, &iter, k)); - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } @@ -1942,7 +1933,7 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i int bch2_gc_gens(struct bch_fs *c) { - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; struct bch_dev *ca; @@ -1960,7 +1951,7 @@ int bch2_gc_gens(struct bch_fs *c) trace_and_count(c, gc_gens_start, c); down_read(&c->gc_lock); - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); for_each_member_device(ca, c, i) { struct bucket_gens *gens; @@ -1986,26 +1977,26 @@ int bch2_gc_gens(struct bch_fs *c) c->gc_gens_btree = i; c->gc_gens_pos = POS_MIN; - ret = for_each_btree_key_commit(&trans, iter, i, + ret = for_each_btree_key_commit(trans, iter, i, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, NULL, NULL, BTREE_INSERT_NOFAIL, - gc_btree_gens_key(&trans, &iter, k)); + gc_btree_gens_key(trans, &iter, k)); if (ret && !bch2_err_matches(ret, EROFS)) bch_err_fn(c, ret); if (ret) goto err; } - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc, + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BTREE_INSERT_NOFAIL, - bch2_alloc_write_oldest_gen(&trans, &iter, k)); + bch2_alloc_write_oldest_gen(trans, &iter, k)); if (ret && !bch2_err_matches(ret, EROFS)) bch_err_fn(c, ret); if (ret) @@ -2024,7 +2015,7 @@ err: ca->oldest_gen = NULL; } - bch2_trans_exit(&trans); + bch2_trans_put(trans); up_read(&c->gc_lock); mutex_unlock(&c->gc_gens_lock); return ret; diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 9fa9ed641300..a869cf6ac7c6 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1628,8 +1628,7 @@ err: int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, const struct bkey_i *k, unsigned level) { - return bch2_trans_run(c, __bch2_btree_root_read(&trans, id, k, level)); - + return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level)); } void bch2_btree_complete_write(struct bch_fs *c, struct btree *b, @@ -1691,15 +1690,13 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b) static void btree_node_write_done(struct bch_fs *c, struct btree *b) { - struct btree_trans trans; - - bch2_trans_init(&trans, c, 0, 0); + struct btree_trans *trans = bch2_trans_get(c); - btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); __btree_node_write_done(c, b); six_unlock_read(&b->c.lock); - bch2_trans_exit(&trans); + bch2_trans_put(trans); } static void btree_node_write_work(struct work_struct *work) @@ -1728,7 +1725,7 @@ static void btree_node_write_work(struct work_struct *work) } } else { ret = bch2_trans_do(c, NULL, NULL, 0, - bch2_btree_node_update_key_get_iter(&trans, b, &wbio->key, + bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, BCH_WATERMARK_reclaim| BTREE_INSERT_JOURNAL_RECLAIM| BTREE_INSERT_NOFAIL| diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 6c064e82c0c8..1d79514754d7 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2906,28 +2906,23 @@ u32 bch2_trans_begin(struct btree_trans *trans) return trans->restart_count; } -static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) +static struct btree_trans *bch2_trans_alloc(struct bch_fs *c) { - size_t paths_bytes = sizeof(struct btree_path) * BTREE_ITER_MAX; - size_t updates_bytes = sizeof(struct btree_insert_entry) * BTREE_ITER_MAX; - void *p = NULL; + struct btree_trans *trans; - BUG_ON(trans->used_mempool); - -#ifdef __KERNEL__ - p = this_cpu_xchg(c->btree_paths_bufs->path, NULL); -#endif - if (!p) { - p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS); - /* - * paths need to be zeroed, bch2_check_for_deadlock looks at - * paths in other threads - */ - memset(p, 0, paths_bytes); + if (IS_ENABLED(__KERNEL__)) { + trans = this_cpu_xchg(c->btree_trans_bufs->trans, NULL); + if (trans) + return trans; } - trans->paths = p; p += paths_bytes; - trans->updates = p; p += updates_bytes; + trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS); + /* + * paths need to be zeroed, bch2_check_for_deadlock looks at + * paths in other threads + */ + memset(&trans->paths, 0, sizeof(trans->paths)); + return trans; } const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR]; @@ -2947,11 +2942,14 @@ unsigned bch2_trans_get_fn_idx(const char *fn) return i; } -void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_idx) +struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) __acquires(&c->btree_trans_barrier) { + struct btree_trans *trans; struct btree_transaction_stats *s; + trans = bch2_trans_alloc(c); + memset(trans, 0, sizeof(*trans)); trans->c = c; trans->fn = fn_idx < ARRAY_SIZE(bch2_btree_transaction_fns) @@ -2963,8 +2961,6 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_ !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags); closure_init_stack(&trans->ref); - bch2_trans_alloc_paths(trans, c); - s = btree_trans_stats(trans); if (s && s->max_mem) { unsigned expected_mem_bytes = roundup_pow_of_two(s->max_mem); @@ -3010,6 +3006,8 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_ list_add_done: seqmutex_unlock(&c->btree_trans_lock); } + + return trans; } static void check_btree_paths_leaked(struct btree_trans *trans) @@ -3034,7 +3032,7 @@ leaked: #endif } -void bch2_trans_exit(struct btree_trans *trans) +void bch2_trans_put(struct btree_trans *trans) __releases(&c->btree_trans_barrier) { struct btree_insert_entry *i; @@ -3080,18 +3078,11 @@ void bch2_trans_exit(struct btree_trans *trans) else kfree(trans->mem); -#ifdef __KERNEL__ - /* - * Userspace doesn't have a real percpu implementation: - */ - trans->paths = this_cpu_xchg(c->btree_paths_bufs->path, trans->paths); -#endif - - if (trans->paths) - mempool_free(trans->paths, &c->btree_paths_pool); - - trans->mem = (void *) 0x1; - trans->paths = (void *) 0x1; + /* Userspace doesn't have a real percpu implementation: */ + if (IS_ENABLED(__KERNEL__)) + trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans); + if (trans) + mempool_free(trans, &c->btree_trans_pool); } static void __maybe_unused @@ -3169,6 +3160,17 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) void bch2_fs_btree_iter_exit(struct bch_fs *c) { struct btree_transaction_stats *s; + struct btree_trans *trans; + int cpu; + + trans = list_first_entry_or_null(&c->btree_trans_list, struct btree_trans, list); + if (trans) + panic("%s leaked btree_trans\n", trans->fn); + + if (c->btree_trans_bufs) + for_each_possible_cpu(cpu) + kfree(per_cpu_ptr(c->btree_trans_bufs, cpu)->trans); + free_percpu(c->btree_trans_bufs); for (s = c->btree_transaction_stats; s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats); @@ -3180,13 +3182,12 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c) if (c->btree_trans_barrier_initialized) cleanup_srcu_struct(&c->btree_trans_barrier); mempool_exit(&c->btree_trans_mem_pool); - mempool_exit(&c->btree_paths_pool); + mempool_exit(&c->btree_trans_pool); } int bch2_fs_btree_iter_init(struct bch_fs *c) { struct btree_transaction_stats *s; - unsigned nr = BTREE_ITER_MAX; int ret; for (s = c->btree_transaction_stats; @@ -3199,9 +3200,12 @@ int bch2_fs_btree_iter_init(struct bch_fs *c) INIT_LIST_HEAD(&c->btree_trans_list); seqmutex_init(&c->btree_trans_lock); - ret = mempool_init_kmalloc_pool(&c->btree_paths_pool, 1, - sizeof(struct btree_path) * nr + - sizeof(struct btree_insert_entry) * nr) ?: + c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf); + if (!c->btree_trans_bufs) + return -ENOMEM; + + ret = mempool_init_kmalloc_pool(&c->btree_trans_pool, 1, + sizeof(struct btree_trans)) ?: mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1, BTREE_TRANS_MEM_MAX) ?: init_srcu_struct(&c->btree_trans_barrier); diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 360a26b58501..fbe273453db3 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -915,21 +915,21 @@ void bch2_btree_path_to_text(struct printbuf *, struct btree_path *); void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); void bch2_dump_trans_updates(struct btree_trans *); void bch2_dump_trans_paths_updates(struct btree_trans *); -void __bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned); -void bch2_trans_exit(struct btree_trans *); + +struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned); +void bch2_trans_put(struct btree_trans *); extern const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR]; unsigned bch2_trans_get_fn_idx(const char *); -#define bch2_trans_init(_trans, _c, _nr_iters, _mem) \ -do { \ +#define bch2_trans_get(_c) \ +({ \ static unsigned trans_fn_idx; \ \ if (unlikely(!trans_fn_idx)) \ trans_fn_idx = bch2_trans_get_fn_idx(__func__); \ - \ - __bch2_trans_init(_trans, _c, trans_fn_idx); \ -} while (0) + __bch2_trans_get(_c, trans_fn_idx); \ +}) void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 784f889340cd..29a0b566a4fe 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -704,13 +704,11 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, struct bkey_cached *ck = container_of(pin, struct bkey_cached, journal); struct bkey_cached_key key; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); int ret = 0; - bch2_trans_init(&trans, c, 0, 0); - - btree_node_lock_nopath_nofail(&trans, &ck->c, SIX_LOCK_read); + btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_read); key = ck->key; if (ck->journal.seq != seq || @@ -727,13 +725,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, } six_unlock_read(&ck->c.lock); - ret = commit_do(&trans, NULL, NULL, 0, - btree_key_cache_flush_pos(&trans, key, seq, + ret = commit_do(trans, NULL, NULL, 0, + btree_key_cache_flush_pos(trans, key, seq, BTREE_INSERT_JOURNAL_RECLAIM, false)); unlock: srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index e3a0b101cbf8..183db5d67a26 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -163,13 +163,11 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, struct bch_fs *c = container_of(j, struct bch_fs, journal); struct btree_write *w = container_of(pin, struct btree_write, journal); struct btree *b = container_of(w, struct btree, writes[i]); - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); unsigned long old, new, v; unsigned idx = w - b->writes; - bch2_trans_init(&trans, c, 0, 0); - - btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); v = READ_ONCE(b->flags); do { @@ -188,7 +186,7 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, btree_node_write_if_need(c, b, SIX_LOCK_read); six_unlock_read(&b->c.lock); - bch2_trans_exit(&trans); + bch2_trans_put(trans); return 0; } diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 96a03f414dd0..c9a38e254949 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -452,8 +452,8 @@ struct btree_trans { void *mem; u8 sorted[BTREE_ITER_MAX + 8]; - struct btree_path *paths; - struct btree_insert_entry *updates; + struct btree_path paths[BTREE_ITER_MAX]; + struct btree_insert_entry updates[BTREE_ITER_MAX]; struct btree_write_buffered_key *wb_updates; /* update path: */ diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 3d126f043db0..3342718de45d 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -692,7 +692,7 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, struct disk_reservation *disk_res, int flags) { return bch2_trans_do(c, disk_res, NULL, flags, - bch2_btree_insert_trans(&trans, id, k, 0)); + bch2_btree_insert_trans(trans, id, k, 0)); } int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter, @@ -824,7 +824,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, u64 *journal_seq) { int ret = bch2_trans_run(c, - bch2_btree_delete_range_trans(&trans, id, start, end, + bch2_btree_delete_range_trans(trans, id, start, end, update_flags, journal_seq)); if (ret == -BCH_ERR_transaction_restart_nested) ret = 0; @@ -898,7 +898,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, } else { ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW|commit_flags, - __bch2_trans_log_msg(&trans.extra_journal_entries, fmt, args)); + __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args)); } return ret; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 0be980d16007..4bfe602ce8e3 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -146,30 +146,17 @@ static inline int bch2_trans_commit(struct btree_trans *trans, nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_flags))) -#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \ -({ \ - struct btree_trans trans; \ - int _ret; \ - \ - bch2_trans_init(&trans, (_c), 0, 0); \ - _ret = commit_do(&trans, _disk_res, _journal_seq, _flags, _do); \ - bch2_trans_exit(&trans); \ - \ - _ret; \ -}) - #define bch2_trans_run(_c, _do) \ ({ \ - struct btree_trans trans; \ - int _ret; \ - \ - bch2_trans_init(&trans, (_c), 0, 0); \ - _ret = (_do); \ - bch2_trans_exit(&trans); \ - \ + struct btree_trans *trans = bch2_trans_get(_c); \ + int _ret = (_do); \ + bch2_trans_put(trans); \ _ret; \ }) +#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \ + bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do)) + #define trans_for_each_update(_trans, _i) \ for ((_i) = (_trans)->updates; \ (_i) < (_trans)->updates + (_trans)->nr_updates; \ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index bac495b382bb..7dbf6b6c7f34 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -597,12 +597,11 @@ static void btree_update_nodes_written(struct btree_update *as) { struct bch_fs *c = as->c; struct btree *b; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); u64 journal_seq = 0; unsigned i; int ret; - bch2_trans_init(&trans, c, 0, 512); /* * If we're already in an error state, it might be because a btree node * was never written, and we might be trying to free that same btree @@ -623,7 +622,7 @@ static void btree_update_nodes_written(struct btree_update *as) b = as->old_nodes[i]; - btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); seq = b->data ? b->data->keys.seq : 0; six_unlock_read(&b->c.lock); @@ -645,13 +644,13 @@ static void btree_update_nodes_written(struct btree_update *as) * journal reclaim does btree updates when flushing bkey_cached entries, * which may require allocations as well. */ - ret = commit_do(&trans, &as->disk_res, &journal_seq, + ret = commit_do(trans, &as->disk_res, &journal_seq, BCH_WATERMARK_reclaim| BTREE_INSERT_NOFAIL| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_JOURNAL_RECLAIM, - btree_update_nodes_written_trans(&trans, as)); - bch2_trans_unlock(&trans); + btree_update_nodes_written_trans(trans, as)); + bch2_trans_unlock(trans); bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c, "%s(): error %s", __func__, bch2_err_str(ret)); @@ -660,7 +659,7 @@ err: struct btree_path *path; b = as->b; - path = get_unlocked_mut_path(&trans, as->btree_id, b->c.level, b->key.k.p); + path = get_unlocked_mut_path(trans, as->btree_id, b->c.level, b->key.k.p); /* * @b is the node we did the final insert into: * @@ -683,13 +682,13 @@ err: * we may rarely end up with a locked path besides the one we * have here: */ - bch2_trans_unlock(&trans); - btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent); - mark_btree_node_locked(&trans, path, b->c.level, BTREE_NODE_INTENT_LOCKED); + bch2_trans_unlock(trans); + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); + mark_btree_node_locked(trans, path, b->c.level, BTREE_NODE_INTENT_LOCKED); path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock); path->l[b->c.level].b = b; - bch2_btree_node_lock_write_nofail(&trans, path, &b->c); + bch2_btree_node_lock_write_nofail(trans, path, &b->c); mutex_lock(&c->btree_interior_update_lock); @@ -729,8 +728,8 @@ err: six_unlock_write(&b->c.lock); btree_node_write_if_need(c, b, SIX_LOCK_intent); - btree_node_unlock(&trans, path, b->c.level); - bch2_path_put(&trans, path, true); + btree_node_unlock(trans, path, b->c.level); + bch2_path_put(trans, path, true); } bch2_journal_pin_drop(&c->journal, &as->journal); @@ -750,7 +749,7 @@ err: for (i = 0; i < as->nr_new_nodes; i++) { b = as->new_nodes[i]; - btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read); + btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); btree_node_write_if_need(c, b, SIX_LOCK_read); six_unlock_read(&b->c.lock); } @@ -758,8 +757,8 @@ err: for (i = 0; i < as->nr_open_buckets; i++) bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]); - bch2_btree_update_free(as, &trans); - bch2_trans_exit(&trans); + bch2_btree_update_free(as, trans); + bch2_trans_put(trans); } static void btree_interior_update_work(struct work_struct *work) @@ -2049,7 +2048,7 @@ static void async_btree_node_rewrite_work(struct work_struct *work) int ret; ret = bch2_trans_do(c, NULL, NULL, 0, - async_btree_node_rewrite_trans(&trans, a)); + async_btree_node_rewrite_trans(trans, a)); if (ret) bch_err_fn(c, ret); bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite); @@ -2365,7 +2364,7 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id) void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) { - bch2_trans_run(c, __bch2_btree_root_alloc(&trans, id)); + bch2_trans_run(c, __bch2_btree_root_alloc(trans, id)); } void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c) diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 6d2d43b6ff6a..4e6241db518b 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -296,7 +296,7 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j, mutex_lock(&wb->flush_lock); return bch2_trans_run(c, - __bch2_btree_write_buffer_flush(&trans, BTREE_INSERT_NOCHECK_RW, true)); + __bch2_btree_write_buffer_flush(trans, BTREE_INSERT_NOCHECK_RW, true)); } static inline u64 btree_write_buffer_ref(int idx) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 78139f73636c..994129142d39 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1923,7 +1923,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca) { - int ret = bch2_trans_run(c, __bch2_trans_mark_dev_sb(&trans, ca)); + int ret = bch2_trans_run(c, __bch2_trans_mark_dev_sb(trans, ca)); if (ret) bch_err_fn(c, ret); diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 84ca128a59a3..899ff46de8e0 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -303,7 +303,7 @@ out: int bch2_data_update_index_update(struct bch_write_op *op) { - return bch2_trans_run(op->c, __bch2_data_update_index_update(&trans, op)); + return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op)); } void bch2_data_update_read_done(struct data_update *m, diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 7593ba04dfb2..404148bd348a 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -366,7 +366,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; ssize_t ret; @@ -379,17 +379,17 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, if (ret) return ret; - bch2_trans_init(&trans, i->c, 0, 0); - ret = for_each_btree_key2(&trans, iter, i->id, i->from, + trans = bch2_trans_get(i->c); + ret = for_each_btree_key2(trans, iter, i->id, i->from, BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ({ bch2_bkey_val_to_text(&i->buf, i->c, k); prt_newline(&i->buf); - drop_locks_do(&trans, flush_buf(i)); + drop_locks_do(trans, flush_buf(i)); })); i->from = iter.pos; - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (!ret) ret = flush_buf(i); @@ -408,7 +408,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct btree *b; ssize_t ret; @@ -424,26 +424,26 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, if (bpos_eq(SPOS_MAX, i->from)) return i->ret; - bch2_trans_init(&trans, i->c, 0, 0); + trans = bch2_trans_get(i->c); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - for_each_btree_node(&trans, iter, i->id, i->from, 0, b, ret) { + for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) { bch2_btree_node_to_text(&i->buf, i->c, b); i->from = !bpos_eq(SPOS_MAX, b->key.k.p) ? bpos_successor(b->key.k.p) : b->key.k.p; - ret = drop_locks_do(&trans, flush_buf(i)); + ret = drop_locks_do(trans, flush_buf(i)); if (ret) break; } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (!ret) ret = flush_buf(i); @@ -462,7 +462,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; ssize_t ret; @@ -475,9 +475,9 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, if (ret) return ret; - bch2_trans_init(&trans, i->c, 0, 0); + trans = bch2_trans_get(i->c); - ret = for_each_btree_key2(&trans, iter, i->id, i->from, + ret = for_each_btree_key2(trans, iter, i->id, i->from, BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ({ struct btree_path_level *l = &iter.path->l[0]; @@ -490,11 +490,11 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, } bch2_bfloat_to_text(&i->buf, l->b, _k); - drop_locks_do(&trans, flush_buf(i)); + drop_locks_do(trans, flush_buf(i)); })); i->from = iter.pos; - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (!ret) ret = flush_buf(i); diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index a7559ab03802..6c6c8d57d72b 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -479,21 +479,19 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir, const struct bch_hash_info *hash_info, const struct qstr *name, subvol_inum *inum) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; int ret; - - bch2_trans_init(&trans, c, 0, 0); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = __bch2_dirent_lookup_trans(&trans, &iter, dir, hash_info, + ret = __bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (!ret) - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); return ret; } @@ -522,7 +520,7 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_dirent dirent; @@ -533,15 +531,14 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) int ret; bch2_bkey_buf_init(&sk); - bch2_trans_init(&trans, c, 0, 0); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); if (ret) goto err; - for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_dirents, + for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, SPOS(inum.inum, ctx->pos, snapshot), POS(inum.inum, U64_MAX), 0, k, ret) { if (k.k->type != KEY_TYPE_dirent) @@ -549,7 +546,7 @@ retry: dirent = bkey_s_c_to_dirent(k); - ret = bch2_dirent_read_target(&trans, inum, dirent, &target); + ret = bch2_dirent_read_target(trans, inum, dirent, &target); if (ret < 0) break; if (ret) @@ -558,7 +555,7 @@ retry: /* dir_emit() can fault and block: */ bch2_bkey_buf_reassemble(&sk, c, k); dirent = bkey_i_to_s_c_dirent(sk.k); - bch2_trans_unlock(&trans); + bch2_trans_unlock(trans); name = bch2_dirent_get_name(dirent); @@ -574,16 +571,16 @@ retry: * read_target looks up subvolumes, we can overflow paths if the * directory has many subvolumes in it */ - ret = btree_trans_too_many_iters(&trans); + ret = btree_trans_too_many_iters(trans); if (ret) break; } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_exit(&trans); + bch2_trans_put(trans); bch2_bkey_buf_exit(&sk, c); return ret; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 40e72b96745a..8646856e4539 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -476,7 +476,7 @@ err: static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *stripe) { - return bch2_trans_run(c, get_stripe_key_trans(&trans, idx, stripe)); + return bch2_trans_run(c, get_stripe_key_trans(trans, idx, stripe)); } /* recovery read path: */ @@ -788,12 +788,10 @@ static void ec_stripe_delete_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, ec_stripe_delete_work); - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); int ret; u64 idx; - bch2_trans_init(&trans, c, 0, 0); - while (1) { mutex_lock(&c->ec_stripes_heap_lock); idx = stripe_idx_to_delete(c); @@ -802,15 +800,15 @@ static void ec_stripe_delete_work(struct work_struct *work) if (!idx) break; - ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, - ec_stripe_delete(&trans, idx)); + ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, + ec_stripe_delete(trans, idx)); if (ret) { bch_err_fn(c, ret); break; } } - bch2_trans_exit(&trans); + bch2_trans_put(trans); bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete); } @@ -999,24 +997,22 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; unsigned i, nr_data = v->nr_blocks - v->nr_redundant; int ret = 0; - bch2_trans_init(&trans, c, 0, 0); - - ret = bch2_btree_write_buffer_flush(&trans); + ret = bch2_btree_write_buffer_flush(trans); if (ret) goto err; for (i = 0; i < nr_data; i++) { - ret = ec_stripe_update_bucket(&trans, s, i); + ret = ec_stripe_update_bucket(trans, s, i); if (ret) break; } err: - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } @@ -1124,7 +1120,7 @@ static void ec_stripe_create(struct ec_stripe_new *s) ret = bch2_trans_do(c, &s->res, NULL, BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL, - ec_stripe_key_update(&trans, + ec_stripe_key_update(trans, bkey_i_to_stripe(&s->new_stripe.key), !s->have_existing_stripe)); if (ret) { @@ -1822,7 +1818,7 @@ void bch2_fs_ec_flush(struct bch_fs *c) int bch2_stripes_read(struct bch_fs *c) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; const struct bch_stripe *s; @@ -1830,9 +1826,7 @@ int bch2_stripes_read(struct bch_fs *c) unsigned i; int ret; - bch2_trans_init(&trans, c, 0, 0); - - for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN, + for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { if (k.k->type != KEY_TYPE_stripe) continue; @@ -1855,9 +1849,9 @@ int bch2_stripes_read(struct bch_fs *c) bch2_stripes_heap_insert(c, m, k.k->p.offset); } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (ret) bch_err_fn(c, ret); diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 7650d8b3122a..58ccc7b91ac7 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -270,7 +270,7 @@ void bch2_readahead(struct readahead_control *ractl) struct bch_inode_info *inode = to_bch_ei(ractl->mapping->host); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_io_opts opts; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct folio *folio; struct readpages_iter readpages_iter; int ret; @@ -280,8 +280,6 @@ void bch2_readahead(struct readahead_control *ractl) ret = readpages_iter_init(&readpages_iter, ractl); BUG_ON(ret); - bch2_trans_init(&trans, c, 0, 0); - bch2_pagecache_add_get(inode); while ((folio = readpage_iter_peek(&readpages_iter))) { @@ -300,31 +298,27 @@ void bch2_readahead(struct readahead_control *ractl) rbio->bio.bi_end_io = bch2_readpages_end_io; BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); - bchfs_read(&trans, rbio, inode_inum(inode), + bchfs_read(trans, rbio, inode_inum(inode), &readpages_iter); - bch2_trans_unlock(&trans); + bch2_trans_unlock(trans); } bch2_pagecache_add_put(inode); - bch2_trans_exit(&trans); + bch2_trans_put(trans); darray_exit(&readpages_iter.folios); } static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio, subvol_inum inum, struct folio *folio) { - struct btree_trans trans; - bch2_folio_create(folio, __GFP_NOFAIL); rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; rbio->bio.bi_iter.bi_sector = folio_sector(folio); BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); - bch2_trans_init(&trans, c, 0, 0); - bchfs_read(&trans, rbio, inum, NULL); - bch2_trans_exit(&trans); + bch2_trans_run(c, (bchfs_read(trans, rbio, inum, NULL), 0)); } static void bch2_read_single_folio_end_io(struct bio *bio) diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 4c61cb18c366..6a9557e7ecab 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -234,23 +234,21 @@ static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, u64 offset, u64 size, unsigned nr_replicas, bool compressed) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; u64 end = offset + size; u32 snapshot; bool ret = true; int err; - - bch2_trans_init(&trans, c, 0, 0); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - err = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + err = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); if (err) goto err; - for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, + for_each_btree_key_norestart(trans, iter, BTREE_ID_extents, SPOS(inum.inum, offset, snapshot), BTREE_ITER_SLOTS, k, err) { if (bkey_ge(bkey_start_pos(k.k), POS(inum.inum, end))) @@ -265,11 +263,11 @@ retry: } offset = iter.pos.offset; - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); err: if (bch2_err_matches(err, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_exit(&trans); + bch2_trans_put(trans); return err ? false : ret; } diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c index 4d1612ede484..8bd9bcdd27f7 100644 --- a/fs/bcachefs/fs-io-pagecache.c +++ b/fs/bcachefs/fs-io-pagecache.c @@ -182,7 +182,7 @@ static void __bch2_folio_set(struct folio *folio, int bch2_folio_set(struct bch_fs *c, subvol_inum inum, struct folio **fs, unsigned nr_folios) { - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; struct bch_folio *s; @@ -204,15 +204,15 @@ int bch2_folio_set(struct bch_fs *c, subvol_inum inum, return 0; folio_idx = 0; - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); if (ret) goto err; - for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, + for_each_btree_key_norestart(trans, iter, BTREE_ID_extents, SPOS(inum.inum, offset, snapshot), BTREE_ITER_SLOTS, k, ret) { unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k); @@ -243,11 +243,11 @@ retry: } offset = iter.pos.offset; - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index ffe9206e4c18..b0e8144ec550 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -207,31 +207,29 @@ static inline int range_has_data(struct bch_fs *c, u32 subvol, struct bpos start, struct bpos end) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; int ret = 0; - - bch2_trans_init(&trans, c, 0, 0); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(&trans, subvol, &start.snapshot); + ret = bch2_subvolume_get_snapshot(trans, subvol, &start.snapshot); if (ret) goto err; - for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, start, end, 0, k, ret) + for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents, start, end, 0, k, ret) if (bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k)) { ret = 1; break; } start = iter.pos; - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } @@ -582,16 +580,15 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, u64 start_sector, u64 end_sector) { struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bpos end_pos = POS(inode->v.i_ino, end_sector); struct bch_io_opts opts; int ret = 0; bch2_inode_opts_get(&opts, c, &inode->ei_inode); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 512); - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inode->v.i_ino, start_sector), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); @@ -604,9 +601,9 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, u64 hole_start, hole_end; u32 snapshot; - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(&trans, + ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot); if (ret) goto bkey_err; @@ -643,7 +640,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, &hole_start, &hole_end, opts.data_replicas, true)) - ret = drop_locks_do(&trans, + ret = drop_locks_do(trans, (bch2_clamp_data_hole(&inode->v, &hole_start, &hole_end, @@ -666,7 +663,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, goto bkey_err; } - ret = bch2_extent_fallocate(&trans, inode_inum(inode), &iter, + ret = bch2_extent_fallocate(trans, inode_inum(inode), &iter, sectors, opts, &i_sectors_delta, writepoint_hashed((unsigned long) current)); if (ret) @@ -674,7 +671,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, bch2_i_sectors_acct(c, inode, "a_res, i_sectors_delta); - drop_locks_do(&trans, + drop_locks_do(trans, (bch2_mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0)); bkey_err: bch2_quota_reservation_put(c, inode, "a_res); @@ -686,14 +683,14 @@ bkey_err: struct quota_res quota_res = { 0 }; s64 i_sectors_delta = 0; - bch2_fpunch_at(&trans, &iter, inode_inum(inode), + bch2_fpunch_at(trans, &iter, inode_inum(inode), end_sector, &i_sectors_delta); bch2_i_sectors_acct(c, inode, "a_res, i_sectors_delta); bch2_quota_reservation_put(c, inode, "a_res); } - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); return ret; } @@ -799,26 +796,24 @@ static int quota_reserve_range(struct bch_inode_info *inode, u64 start, u64 end) { struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; u32 snapshot; u64 sectors = end - start; u64 pos = start; int ret; - - bch2_trans_init(&trans, c, 0, 0); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(&trans, inode->ei_subvol, &snapshot); + ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot); if (ret) goto err; - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inode->v.i_ino, pos, snapshot), 0); - while (!(ret = btree_trans_too_many_iters(&trans)) && + while (!(ret = btree_trans_too_many_iters(trans)) && (k = bch2_btree_iter_peek_upto(&iter, POS(inode->v.i_ino, end - 1))).k && !(ret = bkey_err(k))) { if (bkey_extent_is_allocation(k.k)) { @@ -830,17 +825,14 @@ retry: bch2_btree_iter_advance(&iter); } pos = iter.pos.offset; - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_exit(&trans); - - if (ret) - return ret; + bch2_trans_put(trans); - return bch2_quota_reservation_add(c, inode, res, sectors, true); + return ret ?: bch2_quota_reservation_add(c, inode, res, sectors, true); } loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, @@ -933,7 +925,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; subvol_inum inum = inode_inum(inode); @@ -945,15 +937,15 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) if (offset >= isize) return -ENXIO; - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); if (ret) goto err; - for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_extents, + for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents, SPOS(inode->v.i_ino, offset >> 9, snapshot), POS(inode->v.i_ino, U64_MAX), 0, k, ret) { @@ -963,12 +955,12 @@ retry: } else if (k.k->p.offset >> 9 > isize) break; } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (ret) return ret; @@ -986,7 +978,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; subvol_inum inum = inode_inum(inode); @@ -998,15 +990,15 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) if (offset >= isize) return -ENXIO; - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); if (ret) goto err; - for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, + for_each_btree_key_norestart(trans, iter, BTREE_ID_extents, SPOS(inode->v.i_ino, offset >> 9, snapshot), BTREE_ITER_SLOTS, k, ret) { if (k.k->p.inode != inode->v.i_ino) { @@ -1024,12 +1016,12 @@ retry: offset = max(offset, bkey_start_offset(k.k) << 9); } } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (ret) return ret; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index f814e9e0a741..bfbd4f004edc 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -82,29 +82,27 @@ int __must_check bch2_write_inode(struct bch_fs *c, inode_set_fn set, void *p, unsigned fields) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; struct bch_inode_unpacked inode_u; int ret; - - bch2_trans_init(&trans, c, 0, 512); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = bch2_inode_peek(&trans, &iter, &inode_u, inode_inum(inode), + ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_INTENT) ?: - (set ? set(&trans, inode, &inode_u, p) : 0) ?: - bch2_inode_write(&trans, &iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); + (set ? set(trans, inode, &inode_u, p) : 0) ?: + bch2_inode_write(trans, &iter, &inode_u) ?: + bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); /* * the btree node lock protects inode->ei_inode, not ei_update_lock; * this is important for inode updates via bchfs_write_index_update */ if (!ret) - bch2_inode_update_after_write(&trans, inode, &inode_u, fields); + bch2_inode_update_after_write(trans, inode, &inode_u, fields); - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; @@ -114,7 +112,7 @@ retry: inode_inum(inode).subvol, inode_inum(inode).inum); - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret < 0 ? ret : 0; } @@ -182,7 +180,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) { struct bch_inode_unpacked inode_u; struct bch_inode_info *inode; - struct btree_trans trans; + struct btree_trans *trans; struct bch_subvolume subvol; int ret; @@ -196,14 +194,14 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) if (!(inode->v.i_state & I_NEW)) return &inode->v; - bch2_trans_init(&trans, c, 8, 0); - ret = lockrestart_do(&trans, - bch2_subvolume_get(&trans, inum.subvol, true, 0, &subvol) ?: - bch2_inode_find_by_inum_trans(&trans, inum, &inode_u)); + trans = bch2_trans_get(c); + ret = lockrestart_do(trans, + bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: + bch2_inode_find_by_inum_trans(trans, inum, &inode_u)); if (!ret) - bch2_vfs_inode_init(&trans, inum, inode, &inode_u, &subvol); - bch2_trans_exit(&trans); + bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol); + bch2_trans_put(trans); if (ret) { iget_failed(&inode->v); @@ -226,7 +224,7 @@ __bch2_create(struct mnt_idmap *idmap, unsigned flags) { struct bch_fs *c = dir->v.i_sb->s_fs_info; - struct btree_trans trans; + struct btree_trans *trans; struct bch_inode_unpacked dir_u; struct bch_inode_info *inode, *old; struct bch_inode_unpacked inode_u; @@ -256,13 +254,11 @@ __bch2_create(struct mnt_idmap *idmap, if (!(flags & BCH_CREATE_TMPFILE)) mutex_lock(&dir->ei_update_lock); - bch2_trans_init(&trans, c, 8, - 2048 + (!(flags & BCH_CREATE_TMPFILE) - ? dentry->d_name.len : 0)); + trans = bch2_trans_get(c); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = bch2_create_trans(&trans, + ret = bch2_create_trans(trans, inode_inum(dir), &dir_u, &inode_u, !(flags & BCH_CREATE_TMPFILE) ? &dentry->d_name : NULL, @@ -278,9 +274,9 @@ retry: inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol; inum.inum = inode_u.bi_inum; - ret = bch2_subvolume_get(&trans, inum.subvol, true, + ret = bch2_subvolume_get(trans, inum.subvol, true, BTREE_ITER_WITH_UPDATES, &subvol) ?: - bch2_trans_commit(&trans, NULL, &journal_seq, 0); + bch2_trans_commit(trans, NULL, &journal_seq, 0); if (unlikely(ret)) { bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, KEY_TYPE_QUOTA_WARN); @@ -291,13 +287,13 @@ err_before_quota: } if (!(flags & BCH_CREATE_TMPFILE)) { - bch2_inode_update_after_write(&trans, dir, &dir_u, + bch2_inode_update_after_write(trans, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); mutex_unlock(&dir->ei_update_lock); } bch2_iget5_set(&inode->v, &inum); - bch2_vfs_inode_init(&trans, inum, inode, &inode_u, &subvol); + bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol); set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl); @@ -337,7 +333,7 @@ err_before_quota: unlock_new_inode(&inode->v); } - bch2_trans_exit(&trans); + bch2_trans_put(trans); err: posix_acl_release(default_acl); posix_acl_release(acl); @@ -346,7 +342,7 @@ err_trans: if (!(flags & BCH_CREATE_TMPFILE)) mutex_unlock(&dir->ei_update_lock); - bch2_trans_exit(&trans); + bch2_trans_put(trans); make_bad_inode(&inode->v); iput(&inode->v); inode = ERR_PTR(ret); @@ -401,26 +397,25 @@ static int __bch2_link(struct bch_fs *c, struct bch_inode_info *dir, struct dentry *dentry) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct bch_inode_unpacked dir_u, inode_u; int ret; mutex_lock(&inode->ei_update_lock); - bch2_trans_init(&trans, c, 4, 1024); - ret = commit_do(&trans, NULL, NULL, 0, - bch2_link_trans(&trans, + ret = commit_do(trans, NULL, NULL, 0, + bch2_link_trans(trans, inode_inum(dir), &dir_u, inode_inum(inode), &inode_u, &dentry->d_name)); if (likely(!ret)) { - bch2_inode_update_after_write(&trans, dir, &dir_u, + bch2_inode_update_after_write(trans, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); - bch2_inode_update_after_write(&trans, inode, &inode_u, ATTR_CTIME); + bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME); } - bch2_trans_exit(&trans); + bch2_trans_put(trans); mutex_unlock(&inode->ei_update_lock); return ret; } @@ -451,24 +446,23 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, struct bch_inode_info *dir = to_bch_ei(vdir); struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct bch_inode_unpacked dir_u, inode_u; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); int ret; bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); - bch2_trans_init(&trans, c, 4, 1024); - ret = commit_do(&trans, NULL, NULL, + ret = commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL, - bch2_unlink_trans(&trans, + bch2_unlink_trans(trans, inode_inum(dir), &dir_u, &inode_u, &dentry->d_name, deleting_snapshot)); if (unlikely(ret)) goto err; - bch2_inode_update_after_write(&trans, dir, &dir_u, + bch2_inode_update_after_write(trans, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); - bch2_inode_update_after_write(&trans, inode, &inode_u, + bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_MTIME); if (inode_u.bi_subvol) { @@ -479,8 +473,8 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, set_nlink(&inode->v, 0); } err: - bch2_trans_exit(&trans); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); + bch2_trans_put(trans); return ret; } @@ -543,7 +537,7 @@ static int bch2_rename2(struct mnt_idmap *idmap, struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode); struct bch_inode_unpacked dst_dir_u, src_dir_u; struct bch_inode_unpacked src_inode_u, dst_inode_u; - struct btree_trans trans; + struct btree_trans *trans; enum bch_rename_mode mode = flags & RENAME_EXCHANGE ? BCH_RENAME_EXCHANGE : dst_dentry->d_inode @@ -560,7 +554,7 @@ static int bch2_rename2(struct mnt_idmap *idmap, return ret; } - bch2_trans_init(&trans, c, 8, 2048); + trans = bch2_trans_get(c); bch2_lock_inodes(INODE_UPDATE_LOCK, src_dir, @@ -587,8 +581,8 @@ static int bch2_rename2(struct mnt_idmap *idmap, goto err; } - ret = commit_do(&trans, NULL, NULL, 0, - bch2_rename_trans(&trans, + ret = commit_do(trans, NULL, NULL, 0, + bch2_rename_trans(trans, inode_inum(src_dir), &src_dir_u, inode_inum(dst_dir), &dst_dir_u, &src_inode_u, @@ -603,21 +597,21 @@ static int bch2_rename2(struct mnt_idmap *idmap, BUG_ON(dst_inode && dst_inode->v.i_ino != dst_inode_u.bi_inum); - bch2_inode_update_after_write(&trans, src_dir, &src_dir_u, + bch2_inode_update_after_write(trans, src_dir, &src_dir_u, ATTR_MTIME|ATTR_CTIME); if (src_dir != dst_dir) - bch2_inode_update_after_write(&trans, dst_dir, &dst_dir_u, + bch2_inode_update_after_write(trans, dst_dir, &dst_dir_u, ATTR_MTIME|ATTR_CTIME); - bch2_inode_update_after_write(&trans, src_inode, &src_inode_u, + bch2_inode_update_after_write(trans, src_inode, &src_inode_u, ATTR_CTIME); if (dst_inode) - bch2_inode_update_after_write(&trans, dst_inode, &dst_inode_u, + bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u, ATTR_CTIME); err: - bch2_trans_exit(&trans); + bch2_trans_put(trans); bch2_fs_quota_transfer(c, src_inode, bch_qid(&src_inode->ei_inode), @@ -680,7 +674,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_qid qid; - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter inode_iter = { NULL }; struct bch_inode_unpacked inode_u; struct posix_acl *acl = NULL; @@ -701,13 +695,13 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, if (ret) goto err; - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); kfree(acl); acl = NULL; - ret = bch2_inode_peek(&trans, &inode_iter, &inode_u, inode_inum(inode), + ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode), BTREE_ITER_INTENT); if (ret) goto btree_err; @@ -715,29 +709,29 @@ retry: bch2_setattr_copy(idmap, inode, &inode_u, attr); if (attr->ia_valid & ATTR_MODE) { - ret = bch2_acl_chmod(&trans, inode_inum(inode), &inode_u, + ret = bch2_acl_chmod(trans, inode_inum(inode), &inode_u, inode_u.bi_mode, &acl); if (ret) goto btree_err; } - ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, NULL, + ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?: + bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); btree_err: - bch2_trans_iter_exit(&trans, &inode_iter); + bch2_trans_iter_exit(trans, &inode_iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (unlikely(ret)) goto err_trans; - bch2_inode_update_after_write(&trans, inode, &inode_u, attr->ia_valid); + bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid); if (acl) set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); err_trans: - bch2_trans_exit(&trans); + bch2_trans_put(trans); err: mutex_unlock(&inode->ei_update_lock); @@ -879,7 +873,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, { struct bch_fs *c = vinode->i_sb->s_fs_info; struct bch_inode_info *ei = to_bch_ei(vinode); - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; struct bkey_buf cur, prev; @@ -900,18 +894,18 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bch2_bkey_buf_init(&cur); bch2_bkey_buf_init(&prev); - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(&trans, ei->ei_subvol, &snapshot); + ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot); if (ret) goto err; - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(ei->v.i_ino, start, snapshot), 0); - while (!(ret = btree_trans_too_many_iters(&trans)) && + while (!(ret = btree_trans_too_many_iters(trans)) && (k = bch2_btree_iter_peek_upto(&iter, end)).k && !(ret = bkey_err(k))) { enum btree_id data_btree = BTREE_ID_extents; @@ -928,7 +922,7 @@ retry: bch2_bkey_buf_reassemble(&cur, c, k); - ret = bch2_read_indirect_extent(&trans, &data_btree, + ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, &cur); if (ret) break; @@ -947,7 +941,7 @@ retry: cur.k->k.p.offset += cur.k->k.size; if (have_extent) { - bch2_trans_unlock(&trans); + bch2_trans_unlock(trans); ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), 0); if (ret) @@ -961,18 +955,18 @@ retry: POS(iter.pos.inode, iter.pos.offset + sectors)); } start = iter.pos.offset; - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (!ret && have_extent) { - bch2_trans_unlock(&trans); + bch2_trans_unlock(trans); ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), FIEMAP_EXTENT_LAST); } - bch2_trans_exit(&trans); + bch2_trans_put(trans); bch2_bkey_buf_exit(&cur, c); bch2_bkey_buf_exit(&prev, c); return ret < 0 ? ret : 0; @@ -1230,7 +1224,7 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child struct bch_inode_info *inode = to_bch_ei(child->d_inode); struct bch_inode_info *dir = to_bch_ei(parent->d_inode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter1; struct btree_iter iter2; struct bkey_s_c k; @@ -1245,23 +1239,23 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child if (!S_ISDIR(dir->v.i_mode)) return -EINVAL; - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); - bch2_trans_iter_init(&trans, &iter1, BTREE_ID_dirents, + bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents, POS(dir->ei_inode.bi_inum, 0), 0); - bch2_trans_iter_init(&trans, &iter2, BTREE_ID_dirents, + bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents, POS(dir->ei_inode.bi_inum, 0), 0); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(&trans, dir->ei_subvol, &snapshot); + ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot); if (ret) goto err; bch2_btree_iter_set_snapshot(&iter1, snapshot); bch2_btree_iter_set_snapshot(&iter2, snapshot); - ret = bch2_inode_find_by_inum_trans(&trans, inode_inum(inode), &inode_u); + ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u); if (ret) goto err; @@ -1279,7 +1273,7 @@ retry: } d = bkey_s_c_to_dirent(k); - ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target); + ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target); if (ret > 0) ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; if (ret) @@ -1301,7 +1295,7 @@ retry: continue; d = bkey_s_c_to_dirent(k); - ret = bch2_dirent_read_target(&trans, inode_inum(dir), d, &target); + ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target); if (ret < 0) break; if (ret) @@ -1325,9 +1319,9 @@ err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_iter_exit(&trans, &iter1); - bch2_trans_iter_exit(&trans, &iter2); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter1); + bch2_trans_iter_exit(trans, &iter2); + bch2_trans_put(trans); return ret; } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index b9c9ece63175..e3d68082fdd3 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -987,7 +987,7 @@ noinline_for_stack int bch2_check_inodes(struct bch_fs *c) { bool full = c->opts.fsck; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bch_inode_unpacked prev = { 0 }; struct snapshots_seen s; @@ -995,16 +995,15 @@ int bch2_check_inodes(struct bch_fs *c) int ret; snapshots_seen_init(&s); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes, + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, - check_inode(&trans, &iter, k, &prev, &s, full)); + check_inode(trans, &iter, k, &prev, &s, full)); - bch2_trans_exit(&trans); snapshots_seen_exit(&s); + bch2_trans_put(trans); if (ret) bch_err_fn(c, ret); return ret; @@ -1437,7 +1436,7 @@ int bch2_check_extents(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); struct snapshots_seen s; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct extent_ends extent_ends; @@ -1446,23 +1445,22 @@ int bch2_check_extents(struct bch_fs *c) snapshots_seen_init(&s); extent_ends_init(&extent_ends); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096); - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_extents, + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_extents, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, &res, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({ bch2_disk_reservation_put(c, &res); - check_extent(&trans, &iter, k, &w, &s, &extent_ends); + check_extent(trans, &iter, k, &w, &s, &extent_ends); })) ?: - check_i_sectors(&trans, &w); + check_i_sectors(trans, &w); bch2_disk_reservation_put(c, &res); extent_ends_exit(&extent_ends); inode_walker_exit(&w); - bch2_trans_exit(&trans); snapshots_seen_exit(&s); + bch2_trans_put(trans); if (ret) bch_err_fn(c, ret); @@ -1803,23 +1801,22 @@ int bch2_check_dirents(struct bch_fs *c) struct inode_walker target = inode_walker_init(); struct snapshots_seen s; struct bch_hash_info hash_info; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; int ret = 0; snapshots_seen_init(&s); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_dirents, + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, - check_dirent(&trans, &iter, k, &hash_info, &dir, &target, &s)); + check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)); - bch2_trans_exit(&trans); + bch2_trans_put(trans); snapshots_seen_exit(&s); inode_walker_exit(&dir); inode_walker_exit(&target); @@ -1873,23 +1870,18 @@ int bch2_check_xattrs(struct bch_fs *c) { struct inode_walker inode = inode_walker_init(); struct bch_hash_info hash_info; - struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; int ret = 0; - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs, + ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, - check_xattr(&trans, &iter, k, &hash_info, &inode)); - - bch2_trans_exit(&trans); - + check_xattr(trans, &iter, k, &hash_info, &inode))); if (ret) bch_err_fn(c, ret); return ret; @@ -1958,7 +1950,7 @@ int bch2_check_root(struct bch_fs *c) ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL| BTREE_INSERT_LAZY_RW, - check_root_trans(&trans)); + check_root_trans(trans)); if (ret) bch_err_fn(c, ret); @@ -2110,16 +2102,14 @@ fsck_err: */ int bch2_check_directory_structure(struct bch_fs *c) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct bch_inode_unpacked u; pathbuf path = { 0, }; int ret; - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - - for_each_btree_key(&trans, iter, BTREE_ID_inodes, POS_MIN, + for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_INTENT| BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ret) { @@ -2136,12 +2126,12 @@ int bch2_check_directory_structure(struct bch_fs *c) if (u.bi_flags & BCH_INODE_UNLINKED) continue; - ret = check_path(&trans, &path, &u, iter.pos.snapshot); + ret = check_path(trans, &path, &u, iter.pos.snapshot); if (ret) break; } - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); darray_exit(&path); if (ret) @@ -2230,15 +2220,13 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, struct nlink_table *t, u64 start, u64 *end) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct bch_inode_unpacked u; int ret = 0; - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - - for_each_btree_key(&trans, iter, BTREE_ID_inodes, + for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, start), BTREE_ITER_INTENT| BTREE_ITER_PREFETCH| @@ -2267,8 +2255,8 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, } } - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); if (ret) bch_err(c, "error in fsck: btree error %i while walking inodes", ret); @@ -2280,7 +2268,7 @@ noinline_for_stack static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, u64 range_start, u64 range_end) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct snapshots_seen s; struct btree_iter iter; struct bkey_s_c k; @@ -2289,9 +2277,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links snapshots_seen_init(&s); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - - for_each_btree_key(&trans, iter, BTREE_ID_dirents, POS_MIN, + for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, BTREE_ITER_INTENT| BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ret) { @@ -2311,12 +2297,12 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links break; } } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); if (ret) bch_err(c, "error in fsck: btree error %i while walking dirents", ret); - bch2_trans_exit(&trans); + bch2_trans_put(trans); snapshots_seen_exit(&s); return ret; } @@ -2367,22 +2353,17 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, struct nlink_table *links, u64 range_start, u64 range_end) { - struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; size_t idx = 0; int ret = 0; - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes, - POS(0, range_start), - BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, - check_nlinks_update_inode(&trans, &iter, k, links, &idx, range_end)); - - bch2_trans_exit(&trans); - + ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, + POS(0, range_start), + BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, + NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, + check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); if (ret < 0) { bch_err(c, "error in fsck: btree error %i while walking inodes", ret); return ret; @@ -2464,13 +2445,12 @@ int bch2_fix_reflink_p(struct bch_fs *c) return 0; ret = bch2_trans_run(c, - for_each_btree_key_commit(&trans, iter, + for_each_btree_key_commit(trans, iter, BTREE_ID_extents, POS_MIN, BTREE_ITER_INTENT|BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, - fix_reflink_p_key(&trans, &iter, k))); - + fix_reflink_p_key(trans, &iter, k))); if (ret) bch_err_fn(c, ret); return ret; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 81ff2720835b..8bfd99cb7ad1 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -826,7 +826,7 @@ err: int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; struct bkey_i_inode_generation delete; struct bch_inode_unpacked inode_u; @@ -834,8 +834,6 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) u32 snapshot; int ret; - bch2_trans_init(&trans, c, 0, 1024); - /* * If this was a directory, there shouldn't be any real dirents left - * but there could be whiteouts (from hash collisions) that we should @@ -844,19 +842,19 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) * XXX: the dirent could ideally would delete whiteouts when they're no * longer needed */ - ret = bch2_inode_delete_keys(&trans, inum, BTREE_ID_extents) ?: - bch2_inode_delete_keys(&trans, inum, BTREE_ID_xattrs) ?: - bch2_inode_delete_keys(&trans, inum, BTREE_ID_dirents); + ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?: + bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?: + bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents); if (ret) goto err; retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); if (ret) goto err; - k = bch2_bkey_get_iter(&trans, &iter, BTREE_ID_inodes, + k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, SPOS(0, inum.inum, snapshot), BTREE_ITER_INTENT|BTREE_ITER_CACHED); ret = bkey_err(k); @@ -864,7 +862,7 @@ retry: goto err; if (!bkey_is_inode(k.k)) { - bch2_fs_inconsistent(trans.c, + bch2_fs_inconsistent(c, "inode %llu:%u not found when deleting", inum.inum, snapshot); ret = -EIO; @@ -877,15 +875,15 @@ retry: delete.k.p = iter.pos; delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1); - ret = bch2_trans_update(&trans, &iter, &delete.k_i, 0) ?: - bch2_trans_commit(&trans, NULL, NULL, + ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?: + bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); err: - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } @@ -919,7 +917,7 @@ int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum, struct bch_inode_unpacked *inode) { return bch2_trans_do(c, NULL, NULL, 0, - bch2_inode_find_by_inum_trans(&trans, inum, inode)); + bch2_inode_find_by_inum_trans(trans, inum, inode)); } int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) @@ -1091,14 +1089,12 @@ delete: int bch2_delete_dead_inodes(struct bch_fs *c) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; int ret; - bch2_trans_init(&trans, c, 0, 0); - - ret = bch2_btree_write_buffer_flush_sync(&trans); + ret = bch2_btree_write_buffer_flush_sync(trans); if (ret) goto err; @@ -1108,26 +1104,26 @@ int bch2_delete_dead_inodes(struct bch_fs *c) * but we can't retry because the btree write buffer won't have been * flushed and we'd spin: */ - for_each_btree_key(&trans, iter, BTREE_ID_deleted_inodes, POS_MIN, + for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - ret = lockrestart_do(&trans, may_delete_deleted_inode(&trans, k.k->p)); + ret = lockrestart_do(trans, may_delete_deleted_inode(trans, k.k->p)); if (ret < 0) break; if (ret) { if (!test_bit(BCH_FS_RW, &c->flags)) { - bch2_trans_unlock(&trans); + bch2_trans_unlock(trans); bch2_fs_lazy_rw(c); } - ret = bch2_inode_rm_snapshot(&trans, k.k->p.offset, k.k->p.snapshot); + ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) break; } } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); err: - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index b1be70e15c60..668493bcfe36 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -198,19 +198,18 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, s64 *i_sectors_delta) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; int ret; - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, start), BTREE_ITER_INTENT); - ret = bch2_fpunch_at(&trans, &iter, inum, end, i_sectors_delta); + ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta); - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; @@ -289,8 +288,8 @@ int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sec op.v.new_i_size = cpu_to_le64(new_i_size); return bch2_trans_run(c, - bch2_logged_op_start(&trans, &op.k_i) ?: - __bch2_resume_logged_op_truncate(&trans, &op.k_i, i_sectors_delta)); + bch2_logged_op_start(trans, &op.k_i) ?: + __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta)); } /* finsert/fcollapse: */ @@ -493,6 +492,6 @@ int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, op.v.pos = cpu_to_le64(insert ? U64_MAX : offset); return bch2_trans_run(c, - bch2_logged_op_start(&trans, &op.k_i) ?: - __bch2_resume_logged_op_finsert(&trans, &op.k_i, i_sectors_delta)); + bch2_logged_op_start(trans, &op.k_i) ?: + __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta)); } diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 3c6c1396915a..9a57da00573d 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -359,7 +359,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio struct bch_io_failures *failed, unsigned flags) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_buf sk; struct bkey_s_c k; @@ -369,9 +369,8 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio flags |= BCH_READ_MUST_CLONE; bch2_bkey_buf_init(&sk); - bch2_trans_init(&trans, c, 0, 0); - bch2_trans_iter_init(&trans, &iter, rbio->data_btree, + bch2_trans_iter_init(trans, &iter, rbio->data_btree, rbio->read_pos, BTREE_ITER_SLOTS); retry: rbio->bio.bi_status = 0; @@ -382,7 +381,7 @@ retry: bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - bch2_trans_unlock(&trans); + bch2_trans_unlock(trans); if (!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, @@ -393,7 +392,7 @@ retry: goto out; } - ret = __bch2_read_extent(&trans, rbio, bvec_iter, + ret = __bch2_read_extent(trans, rbio, bvec_iter, rbio->read_pos, rbio->data_btree, k, 0, failed, flags); @@ -403,8 +402,8 @@ retry: goto err; out: bch2_rbio_done(rbio); - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); bch2_bkey_buf_exit(&sk, c); return; err: @@ -526,7 +525,7 @@ out: static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) { bch2_trans_do(rbio->c, NULL, NULL, BTREE_INSERT_NOFAIL, - __bch2_rbio_narrow_crcs(&trans, rbio)); + __bch2_rbio_narrow_crcs(trans, rbio)); } /* Inner part that may run in process context */ @@ -1082,7 +1081,7 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, struct bvec_iter bvec_iter, subvol_inum inum, struct bch_io_failures *failed, unsigned flags) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_buf sk; struct bkey_s_c k; @@ -1092,16 +1091,15 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, BUG_ON(flags & BCH_READ_NODECODE); bch2_bkey_buf_init(&sk); - bch2_trans_init(&trans, c, 0, 0); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); iter = (struct btree_iter) { NULL }; - ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot); + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); if (ret) goto err; - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum.inum, bvec_iter.bi_sector, snapshot), BTREE_ITER_SLOTS); while (1) { @@ -1112,7 +1110,7 @@ retry: * read_extent -> io_time_reset may cause a transaction restart * without returning an error, we need to check for that here: */ - ret = bch2_trans_relock(&trans); + ret = bch2_trans_relock(trans); if (ret) break; @@ -1130,7 +1128,7 @@ retry: bch2_bkey_buf_reassemble(&sk, c, k); - ret = bch2_read_indirect_extent(&trans, &data_btree, + ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, &sk); if (ret) break; @@ -1149,7 +1147,7 @@ retry: if (bvec_iter.bi_size == bytes) flags |= BCH_READ_LAST_FRAGMENT; - ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter.pos, + ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos, data_btree, k, offset_into_extent, failed, flags); if (ret) @@ -1161,19 +1159,19 @@ retry: swap(bvec_iter.bi_size, bytes); bio_advance_iter(&rbio->bio, &bvec_iter, bytes); - ret = btree_trans_too_many_iters(&trans); + ret = btree_trans_too_many_iters(trans); if (ret) break; } err: - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || ret == READ_RETRY || ret == READ_RETRY_AVOID) goto retry; - bch2_trans_exit(&trans); + bch2_trans_put(trans); bch2_bkey_buf_exit(&sk, c); if (ret) { diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 3439e9553325..659330cbe357 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -322,7 +322,7 @@ static int bch2_write_index_default(struct bch_write_op *op) struct bkey_buf sk; struct keylist *keys = &op->insert_keys; struct bkey_i *k = bch2_keylist_front(keys); - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; subvol_inum inum = { .subvol = op->subvol, @@ -333,30 +333,29 @@ static int bch2_write_index_default(struct bch_write_op *op) BUG_ON(!inum.subvol); bch2_bkey_buf_init(&sk); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); do { - bch2_trans_begin(&trans); + bch2_trans_begin(trans); k = bch2_keylist_front(keys); bch2_bkey_buf_copy(&sk, c, k); - ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &sk.k->k.p.snapshot); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) break; - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, bkey_start_pos(&sk.k->k), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - ret = bch2_extent_update(&trans, inum, &iter, sk.k, + ret = bch2_extent_update(trans, inum, &iter, sk.k, &op->res, op->new_i_size, &op->i_sectors_delta, op->flags & BCH_WRITE_CHECK_ENOSPC); - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; @@ -369,7 +368,7 @@ static int bch2_write_index_default(struct bch_write_op *op) bch2_cut_front(iter.pos, k); } while (!bch2_keylist_empty(keys)); - bch2_trans_exit(&trans); + bch2_trans_put(trans); bch2_bkey_buf_exit(&sk, c); return ret; @@ -1163,20 +1162,18 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) { struct bch_fs *c = op->c; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_i *orig; struct bkey_s_c k; int ret; - bch2_trans_init(&trans, c, 0, 0); - for_each_keylist_key(&op->insert_keys, orig) { - ret = for_each_btree_key_upto_commit(&trans, iter, BTREE_ID_extents, + ret = for_each_btree_key_upto_commit(trans, iter, BTREE_ID_extents, bkey_start_pos(&orig->k), orig->k.p, BTREE_ITER_INTENT, k, NULL, NULL, BTREE_INSERT_NOFAIL, ({ - bch2_nocow_write_convert_one_unwritten(&trans, &iter, orig, k, op->new_i_size); + bch2_nocow_write_convert_one_unwritten(trans, &iter, orig, k, op->new_i_size); })); if (ret && !bch2_err_matches(ret, EROFS)) { @@ -1194,7 +1191,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) } } - bch2_trans_exit(&trans); + bch2_trans_put(trans); } static void __bch2_nocow_write_done(struct bch_write_op *op) @@ -1218,7 +1215,7 @@ static void bch2_nocow_write_done(struct closure *cl) static void bch2_nocow_write(struct bch_write_op *op) { struct bch_fs *c = op->c; - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; struct bkey_ptrs_c ptrs; @@ -1235,15 +1232,15 @@ static void bch2_nocow_write(struct bch_write_op *op) if (op->flags & BCH_WRITE_MOVE) return; - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(&trans, op->subvol, &snapshot); + ret = bch2_subvolume_get_snapshot(trans, op->subvol, &snapshot); if (unlikely(ret)) goto err; - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(op->pos.inode, op->pos.offset, snapshot), BTREE_ITER_SLOTS); while (1) { @@ -1289,7 +1286,7 @@ retry: /* Unlock before taking nocow locks, doing IO: */ bkey_reassemble(op->insert_keys.top, k); - bch2_trans_unlock(&trans); + bch2_trans_unlock(trans); bch2_cut_front(op->pos, op->insert_keys.top); if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN) @@ -1338,7 +1335,7 @@ retry: bch2_btree_iter_advance(&iter); } out: - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; @@ -1353,7 +1350,7 @@ err: op->flags |= BCH_WRITE_DONE; } - bch2_trans_exit(&trans); + bch2_trans_put(trans); /* fallback to cow write path? */ if (!(op->flags & BCH_WRITE_DONE)) { @@ -1431,7 +1428,7 @@ again: * allocations for specific disks may hang arbitrarily long: */ ret = bch2_trans_do(c, NULL, NULL, 0, - bch2_alloc_sectors_start_trans(&trans, + bch2_alloc_sectors_start_trans(trans, op->target, op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED), op->write_point, diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 40455e892112..ad80618d1740 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -834,7 +834,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, break; ret = bch2_trans_run(c, - bch2_trans_mark_metadata_bucket(&trans, ca, + bch2_trans_mark_metadata_bucket(trans, ca, ob[nr_got]->bucket, BCH_DATA_journal, ca->mi.bucket_size)); if (ret) { @@ -915,7 +915,7 @@ err_unblock: if (ret && !new_fs) for (i = 0; i < nr_got; i++) bch2_trans_run(c, - bch2_trans_mark_metadata_bucket(&trans, ca, + bch2_trans_mark_metadata_bucket(trans, ca, bu[i], BCH_DATA_free, 0)); err_free: if (!new_fs) diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c index d6b9f2cdf8e7..1e1a79405693 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -250,20 +250,18 @@ void bch2_blacklist_entries_gc(struct work_struct *work) struct journal_seq_blacklist_table *t; struct bch_sb_field_journal_seq_blacklist *bl; struct journal_seq_blacklist_entry *src, *dst; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); unsigned i, nr, new_nr; int ret; - bch2_trans_init(&trans, c, 0, 0); - for (i = 0; i < BTREE_ID_NR; i++) { struct btree_iter iter; struct btree *b; - bch2_trans_node_iter_init(&trans, &iter, i, POS_MIN, + bch2_trans_node_iter_init(trans, &iter, i, POS_MIN, 0, 0, BTREE_ITER_PREFETCH); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); b = bch2_btree_iter_peek_node(&iter); @@ -275,10 +273,10 @@ retry: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); } - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (ret) return; diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index e133c23ad51c..8640f7dee0de 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -59,9 +59,9 @@ int bch2_resume_logged_ops(struct bch_fs *c) int ret; ret = bch2_trans_run(c, - for_each_btree_key2(&trans, iter, + for_each_btree_key2(trans, iter, BTREE_ID_logged_ops, POS_MIN, BTREE_ITER_PREFETCH, k, - resume_logged_op(&trans, &iter, k))); + resume_logged_op(trans, &iter, k))); if (ret) bch_err_fn(c, ret); return ret; diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 3e8b8f2f38a3..215a653322f3 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -151,10 +151,10 @@ int bch2_check_lrus(struct bch_fs *c) int ret = 0; ret = bch2_trans_run(c, - for_each_btree_key_commit(&trans, iter, + for_each_btree_key_commit(trans, iter, BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW, - bch2_check_lru_key(&trans, &iter, k, &last_flushed_pos))); + bch2_check_lru_key(trans, &iter, k, &last_flushed_pos))); if (ret) bch_err_fn(c, ret); return ret; diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 4746dfa7af97..e3a51f6d6c9b 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -78,34 +78,32 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans, static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; enum btree_id id; int ret = 0; - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - for (id = 0; id < BTREE_ID_NR; id++) { if (!btree_type_has_ptrs(id)) continue; - ret = for_each_btree_key_commit(&trans, iter, id, POS_MIN, + ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, NULL, NULL, BTREE_INSERT_NOFAIL, - bch2_dev_usrdata_drop_key(&trans, &iter, k, dev_idx, flags)); + bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags)); if (ret) break; } - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) { - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct closure cl; struct btree *b; @@ -117,16 +115,16 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) if (flags & BCH_FORCE_IF_METADATA_LOST) return -EINVAL; + trans = bch2_trans_get(c); bch2_bkey_buf_init(&k); - bch2_trans_init(&trans, c, 0, 0); closure_init_stack(&cl); for (id = 0; id < BTREE_ID_NR; id++) { - bch2_trans_node_iter_init(&trans, &iter, id, POS_MIN, 0, 0, + bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0, BTREE_ITER_PREFETCH); retry: ret = 0; - while (bch2_trans_begin(&trans), + while (bch2_trans_begin(trans), (b = bch2_btree_iter_peek_node(&iter)) && !(ret = PTR_ERR_OR_ZERO(b))) { if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx)) @@ -141,7 +139,7 @@ retry: break; } - ret = bch2_btree_node_update_key(&trans, &iter, b, k.k, 0, false); + ret = bch2_btree_node_update_key(trans, &iter, b, k.k, 0, false); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { ret = 0; continue; @@ -157,7 +155,7 @@ next: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); if (ret) goto err; @@ -166,8 +164,8 @@ next: bch2_btree_interior_updates_flush(c); ret = 0; err: - bch2_trans_exit(&trans); bch2_bkey_buf_exit(&k, c); + bch2_trans_put(trans); BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index d62b757536a3..c1aa76f9f845 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -525,7 +525,7 @@ static int __bch2_move_data(struct moving_context *ctxt, struct bch_fs *c = ctxt->c; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct bkey_buf sk; - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct data_update_opts data_opts; @@ -533,7 +533,6 @@ static int __bch2_move_data(struct moving_context *ctxt, int ret = 0, ret2; bch2_bkey_buf_init(&sk); - bch2_trans_init(&trans, c, 0, 0); if (ctxt->stats) { ctxt->stats->data_type = BCH_DATA_user; @@ -541,15 +540,15 @@ static int __bch2_move_data(struct moving_context *ctxt, ctxt->stats->pos = start; } - bch2_trans_iter_init(&trans, &iter, btree_id, start, + bch2_trans_iter_init(trans, &iter, btree_id, start, BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS); if (ctxt->rate) bch2_ratelimit_reset(ctxt->rate); - while (!move_ratelimit(&trans, ctxt)) { - bch2_trans_begin(&trans); + while (!move_ratelimit(trans, ctxt)) { + bch2_trans_begin(trans); k = bch2_btree_iter_peek(&iter); if (!k.k) @@ -570,7 +569,7 @@ static int __bch2_move_data(struct moving_context *ctxt, if (!bkey_extent_is_direct_data(k.k)) goto next_nondata; - ret = move_get_io_opts(&trans, &io_opts, k, &cur_inum); + ret = move_get_io_opts(trans, &io_opts, k, &cur_inum); if (ret) continue; @@ -585,7 +584,7 @@ static int __bch2_move_data(struct moving_context *ctxt, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret2 = bch2_move_extent(&trans, &iter, ctxt, NULL, + ret2 = bch2_move_extent(trans, &iter, ctxt, NULL, io_opts, btree_id, k, data_opts); if (ret2) { if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) @@ -593,7 +592,7 @@ static int __bch2_move_data(struct moving_context *ctxt, if (ret2 == -ENOMEM) { /* memory allocation failure, wait for some IO to finish */ - bch2_move_ctxt_wait_for_io(ctxt, &trans); + bch2_move_ctxt_wait_for_io(ctxt, trans); continue; } @@ -610,8 +609,8 @@ next_nondata: bch2_btree_iter_advance(&iter); } - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); bch2_bkey_buf_exit(&sk, c); return ret; @@ -826,15 +825,14 @@ int bch2_evacuate_bucket(struct bch_fs *c, struct write_point_specifier wp, bool wait_on_copygc) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct moving_context ctxt; int ret; - bch2_trans_init(&trans, c, 0, 0); bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - ret = __bch2_evacuate_bucket(&trans, &ctxt, NULL, bucket, gen, data_opts); + ret = __bch2_evacuate_bucket(trans, &ctxt, NULL, bucket, gen, data_opts); bch2_moving_ctxt_exit(&ctxt); - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } @@ -851,14 +849,13 @@ static int bch2_move_btree(struct bch_fs *c, { bool kthread = (current->flags & PF_KTHREAD) != 0; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct btree *b; enum btree_id id; struct data_update_opts data_opts; int ret = 0; - bch2_trans_init(&trans, c, 0, 0); progress_list_add(c, stats); stats->data_type = BCH_DATA_btree; @@ -871,11 +868,11 @@ static int bch2_move_btree(struct bch_fs *c, if (!bch2_btree_id_root(c, id)->b) continue; - bch2_trans_node_iter_init(&trans, &iter, id, POS_MIN, 0, 0, + bch2_trans_node_iter_init(trans, &iter, id, POS_MIN, 0, 0, BTREE_ITER_PREFETCH); retry: ret = 0; - while (bch2_trans_begin(&trans), + while (bch2_trans_begin(trans), (b = bch2_btree_iter_peek_node(&iter)) && !(ret = PTR_ERR_OR_ZERO(b))) { if (kthread && kthread_should_stop()) @@ -890,7 +887,7 @@ retry: if (!pred(c, arg, b, &io_opts, &data_opts)) goto next; - ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret; + ret = bch2_btree_node_rewrite(trans, &iter, b, 0) ?: ret; if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) @@ -901,13 +898,13 @@ next: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); if (kthread && kthread_should_stop()) break; } - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (ret) bch_err_fn(c, ret); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 874c9324ab66..4017120baeee 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -300,7 +300,7 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c) static int bch2_copygc_thread(void *arg) { struct bch_fs *c = arg; - struct btree_trans trans; + struct btree_trans *trans; struct moving_context ctxt; struct bch_move_stats move_stats; struct io_clock *clock = &c->io_clock[WRITE]; @@ -317,7 +317,7 @@ static int bch2_copygc_thread(void *arg) } set_freezable(); - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); bch2_move_stats_init(&move_stats, "copygc"); bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats, @@ -325,16 +325,16 @@ static int bch2_copygc_thread(void *arg) false); while (!ret && !kthread_should_stop()) { - bch2_trans_unlock(&trans); + bch2_trans_unlock(trans); cond_resched(); if (!c->copy_gc_enabled) { - move_buckets_wait(&trans, &ctxt, &buckets, true); + move_buckets_wait(trans, &ctxt, &buckets, true); kthread_wait_freezable(c->copy_gc_enabled); } if (unlikely(freezing(current))) { - move_buckets_wait(&trans, &ctxt, &buckets, true); + move_buckets_wait(trans, &ctxt, &buckets, true); __refrigerator(false); continue; } @@ -345,7 +345,7 @@ static int bch2_copygc_thread(void *arg) if (wait > clock->max_slop) { c->copygc_wait_at = last; c->copygc_wait = last + wait; - move_buckets_wait(&trans, &ctxt, &buckets, true); + move_buckets_wait(trans, &ctxt, &buckets, true); trace_and_count(c, copygc_wait, c, wait, last + wait); bch2_kthread_io_clock_wait(clock, last + wait, MAX_SCHEDULE_TIMEOUT); @@ -355,15 +355,15 @@ static int bch2_copygc_thread(void *arg) c->copygc_wait = 0; c->copygc_running = true; - ret = bch2_copygc(&trans, &ctxt, &buckets); + ret = bch2_copygc(trans, &ctxt, &buckets); c->copygc_running = false; wake_up(&c->copygc_running_wq); } - move_buckets_wait(&trans, &ctxt, &buckets, true); + move_buckets_wait(trans, &ctxt, &buckets, true); rhashtable_destroy(&buckets.table); - bch2_trans_exit(&trans); + bch2_trans_put(trans); bch2_moving_ctxt_exit(&ctxt); return 0; diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index f16aa3bc9679..36de2f071d80 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -599,7 +599,7 @@ advance: int bch2_fs_quota_read(struct bch_fs *c) { struct bch_sb_field_quota *sb_quota; - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; int ret; @@ -614,16 +614,16 @@ int bch2_fs_quota_read(struct bch_fs *c) bch2_sb_quota_read(c); mutex_unlock(&c->sb_lock); - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); - ret = for_each_btree_key2(&trans, iter, BTREE_ID_quotas, + ret = for_each_btree_key2(trans, iter, BTREE_ID_quotas, POS_MIN, BTREE_ITER_PREFETCH, k, __bch2_quota_set(c, k, NULL)) ?: - for_each_btree_key2(&trans, iter, BTREE_ID_inodes, + for_each_btree_key2(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - bch2_fs_quota_read_inode(&trans, &iter, k)); + bch2_fs_quota_read_inode(trans, &iter, k)); - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (ret) bch_err_fn(c, ret); @@ -956,7 +956,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid)); ret = bch2_trans_do(c, NULL, NULL, 0, - bch2_set_quota_trans(&trans, &new_quota, qdq)) ?: + bch2_set_quota_trans(trans, &new_quota, qdq)) ?: __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq); return bch2_err_class(ret); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index f566c94260d6..1dceb7eeb205 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -165,7 +165,7 @@ static int bch2_journal_replay(struct bch_fs *c) (!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY|BCH_WATERMARK_reclaim : 0), - bch2_journal_replay_key(&trans, k)); + bch2_journal_replay_key(trans, k)); if (ret) { bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s", bch2_btree_ids[k->btree_id], k->level, bch2_err_str(ret)); @@ -466,7 +466,7 @@ noinline_for_stack static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) { int ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW, - __bch2_fs_upgrade_for_subvolumes(&trans)); + __bch2_fs_upgrade_for_subvolumes(trans)); if (ret) bch_err_fn(c, ret); return ret; @@ -1013,7 +1013,7 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_inode_init_early(c, &lostfound_inode); ret = bch2_trans_do(c, NULL, NULL, 0, - bch2_create_trans(&trans, + bch2_create_trans(trans, BCACHEFS_ROOT_SUBVOL_INUM, &root_inode, &lostfound_inode, &lostfound, diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index fb605b25b067..d77d0ea9afff 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -253,7 +253,7 @@ s64 bch2_remap_range(struct bch_fs *c, u64 remap_sectors, u64 new_i_size, s64 *i_sectors_delta) { - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter dst_iter, src_iter; struct bkey_s_c src_k; struct bkey_buf new_dst, new_src; @@ -275,11 +275,11 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_bkey_buf_init(&new_dst); bch2_bkey_buf_init(&new_src); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096); + trans = bch2_trans_get(c); - bch2_trans_iter_init(&trans, &src_iter, BTREE_ID_extents, src_start, + bch2_trans_iter_init(trans, &src_iter, BTREE_ID_extents, src_start, BTREE_ITER_INTENT); - bch2_trans_iter_init(&trans, &dst_iter, BTREE_ID_extents, dst_start, + bch2_trans_iter_init(trans, &dst_iter, BTREE_ID_extents, dst_start, BTREE_ITER_INTENT); while ((ret == 0 || @@ -287,21 +287,21 @@ s64 bch2_remap_range(struct bch_fs *c, bkey_lt(dst_iter.pos, dst_end)) { struct disk_reservation disk_res = { 0 }; - bch2_trans_begin(&trans); + bch2_trans_begin(trans); if (fatal_signal_pending(current)) { ret = -EINTR; break; } - ret = bch2_subvolume_get_snapshot(&trans, src_inum.subvol, + ret = bch2_subvolume_get_snapshot(trans, src_inum.subvol, &src_snapshot); if (ret) continue; bch2_btree_iter_set_snapshot(&src_iter, src_snapshot); - ret = bch2_subvolume_get_snapshot(&trans, dst_inum.subvol, + ret = bch2_subvolume_get_snapshot(trans, dst_inum.subvol, &dst_snapshot); if (ret) continue; @@ -318,7 +318,7 @@ s64 bch2_remap_range(struct bch_fs *c, continue; if (bkey_lt(src_want, src_iter.pos)) { - ret = bch2_fpunch_at(&trans, &dst_iter, dst_inum, + ret = bch2_fpunch_at(trans, &dst_iter, dst_inum, min(dst_end.offset, dst_iter.pos.offset + src_iter.pos.offset - src_want.offset), @@ -332,7 +332,7 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_bkey_buf_reassemble(&new_src, c, src_k); src_k = bkey_i_to_s_c(new_src.k); - ret = bch2_make_extent_indirect(&trans, &src_iter, + ret = bch2_make_extent_indirect(trans, &src_iter, new_src.k); if (ret) continue; @@ -360,14 +360,14 @@ s64 bch2_remap_range(struct bch_fs *c, min(src_k.k->p.offset - src_want.offset, dst_end.offset - dst_iter.pos.offset)); - ret = bch2_extent_update(&trans, dst_inum, &dst_iter, + ret = bch2_extent_update(trans, dst_inum, &dst_iter, new_dst.k, &disk_res, new_i_size, i_sectors_delta, true); bch2_disk_reservation_put(c, &disk_res); } - bch2_trans_iter_exit(&trans, &dst_iter); - bch2_trans_iter_exit(&trans, &src_iter); + bch2_trans_iter_exit(trans, &dst_iter); + bch2_trans_iter_exit(trans, &src_iter); BUG_ON(!ret && !bkey_eq(dst_iter.pos, dst_end)); BUG_ON(bkey_gt(dst_iter.pos, dst_end)); @@ -379,23 +379,23 @@ s64 bch2_remap_range(struct bch_fs *c, struct bch_inode_unpacked inode_u; struct btree_iter inode_iter = { NULL }; - bch2_trans_begin(&trans); + bch2_trans_begin(trans); - ret2 = bch2_inode_peek(&trans, &inode_iter, &inode_u, + ret2 = bch2_inode_peek(trans, &inode_iter, &inode_u, dst_inum, BTREE_ITER_INTENT); if (!ret2 && inode_u.bi_size < new_i_size) { inode_u.bi_size = new_i_size; - ret2 = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, NULL, + ret2 = bch2_inode_write(trans, &inode_iter, &inode_u) ?: + bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); } - bch2_trans_iter_exit(&trans, &inode_iter); + bch2_trans_iter_exit(trans, &inode_iter); } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart)); - bch2_trans_exit(&trans); + bch2_trans_put(trans); bch2_bkey_buf_exit(&new_src, c); bch2_bkey_buf_exit(&new_dst, c); diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 73fca04011ad..ff7f71576d5c 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -610,11 +610,11 @@ int bch2_check_snapshot_trees(struct bch_fs *c) int ret; ret = bch2_trans_run(c, - for_each_btree_key_commit(&trans, iter, + for_each_btree_key_commit(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, - check_snapshot_tree(&trans, &iter, k))); + check_snapshot_tree(trans, &iter, k))); if (ret) bch_err(c, "error %i checking snapshot trees", ret); @@ -883,11 +883,11 @@ int bch2_check_snapshots(struct bch_fs *c) * the parent's depth already be correct: */ ret = bch2_trans_run(c, - for_each_btree_key_reverse_commit(&trans, iter, + for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_snapshots, POS_MAX, BTREE_ITER_PREFETCH, k, NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, - check_snapshot(&trans, &iter, k))); + check_snapshot(trans, &iter, k))); if (ret) bch_err_fn(c, ret); return ret; @@ -1373,7 +1373,7 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans, int bch2_delete_dead_snapshots(struct bch_fs *c) { - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; struct bkey_s_c_snapshot snap; @@ -1390,30 +1390,30 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) } } - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); /* * For every snapshot node: If we have no live children and it's not * pointed to by a subvolume, delete it: */ - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots, + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, NULL, NULL, 0, - bch2_delete_redundant_snapshot(&trans, &iter, k)); + bch2_delete_redundant_snapshot(trans, &iter, k)); if (ret) { bch_err_msg(c, ret, "deleting redundant snapshots"); goto err; } - for_each_btree_key2(&trans, iter, BTREE_ID_snapshots, + for_each_btree_key2(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, - bch2_snapshot_set_equiv(&trans, k)); + bch2_snapshot_set_equiv(trans, k)); if (ret) { bch_err_msg(c, ret, "in bch2_snapshots_set_equiv"); goto err; } - for_each_btree_key(&trans, iter, BTREE_ID_snapshots, + for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, ret) { if (k.k->type != KEY_TYPE_snapshot) continue; @@ -1425,7 +1425,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) break; } } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); if (ret) { bch_err_msg(c, ret, "walking snapshots"); @@ -1440,16 +1440,16 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) if (!btree_type_has_snapshots(id)) continue; - ret = for_each_btree_key_commit(&trans, iter, + ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, &res, NULL, BTREE_INSERT_NOFAIL, - snapshot_delete_key(&trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?: - for_each_btree_key_commit(&trans, iter, + snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?: + for_each_btree_key_commit(trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, &res, NULL, BTREE_INSERT_NOFAIL, - move_key_to_correct_snapshot(&trans, &iter, k)); + move_key_to_correct_snapshot(trans, &iter, k)); bch2_disk_reservation_put(c, &res); darray_exit(&equiv_seen); @@ -1460,7 +1460,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) } } - for_each_btree_key(&trans, iter, BTREE_ID_snapshots, + for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, ret) { u32 snapshot = k.k->p.offset; u32 equiv = bch2_snapshot_equiv(c, snapshot); @@ -1468,23 +1468,23 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) if (equiv != snapshot) snapshot_list_add(c, &deleted_interior, snapshot); } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); /* * Fixing children of deleted snapshots can't be done completely * atomically, if we crash between here and when we delete the interior * nodes some depth fields will be off: */ - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots, POS_MIN, + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_snapshots, POS_MIN, BTREE_ITER_INTENT, k, NULL, NULL, BTREE_INSERT_NOFAIL, - bch2_fix_child_of_deleted_snapshot(&trans, &iter, k, &deleted_interior)); + bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior)); if (ret) goto err; darray_for_each(deleted, i) { - ret = commit_do(&trans, NULL, NULL, 0, - bch2_snapshot_node_delete(&trans, *i)); + ret = commit_do(trans, NULL, NULL, 0, + bch2_snapshot_node_delete(trans, *i)); if (ret) { bch_err_msg(c, ret, "deleting snapshot %u", *i); goto err; @@ -1492,8 +1492,8 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) } darray_for_each(deleted_interior, i) { - ret = commit_do(&trans, NULL, NULL, 0, - bch2_snapshot_node_delete(&trans, *i)); + ret = commit_do(trans, NULL, NULL, 0, + bch2_snapshot_node_delete(trans, *i)); if (ret) { bch_err_msg(c, ret, "deleting snapshot %u", *i); goto err; @@ -1504,7 +1504,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) err: darray_exit(&deleted_interior); darray_exit(&deleted); - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (ret) bch_err_fn(c, ret); return ret; @@ -1671,11 +1671,11 @@ int bch2_snapshots_read(struct bch_fs *c) int ret = 0; ret = bch2_trans_run(c, - for_each_btree_key2(&trans, iter, BTREE_ID_snapshots, + for_each_btree_key2(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, - bch2_mark_snapshot(&trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: - bch2_snapshot_set_equiv(&trans, k)) ?: - for_each_btree_key2(&trans, iter, BTREE_ID_snapshots, + bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: + bch2_snapshot_set_equiv(trans, k)) ?: + for_each_btree_key2(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, (set_is_ancestor_bitmap(c, k.k->p.offset), 0))); if (ret) diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index ca03d585a2fa..caf2dd7dafff 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -86,10 +86,10 @@ int bch2_check_subvols(struct bch_fs *c) int ret; ret = bch2_trans_run(c, - for_each_btree_key_commit(&trans, iter, + for_each_btree_key_commit(trans, iter, BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, - check_subvol(&trans, &iter, k))); + check_subvol(trans, &iter, k))); if (ret) bch_err_fn(c, ret); return ret; @@ -293,7 +293,7 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor bch2_evict_subvolume_inodes(c, &s); for (id = s.data; id < s.data + s.nr; id++) { - ret = bch2_trans_run(c, bch2_subvolume_delete(&trans, *id)); + ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id)); if (ret) { bch_err_msg(c, ret, "deleting subvolume %u", *id); break; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 332951b794b4..a00dc4a4a2c9 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -470,7 +470,6 @@ int bch2_fs_read_write_early(struct bch_fs *c) static void __bch2_fs_free(struct bch_fs *c) { unsigned i; - int cpu; for (i = 0; i < BCH_TIME_STAT_NR; i++) bch2_time_stats_exit(&c->times[i]); @@ -502,12 +501,7 @@ static void __bch2_fs_free(struct bch_fs *c) percpu_free_rwsem(&c->mark_lock); free_percpu(c->online_reserved); - if (c->btree_paths_bufs) - for_each_possible_cpu(cpu) - kfree(per_cpu_ptr(c->btree_paths_bufs, cpu)->path); - darray_exit(&c->btree_roots_extra); - free_percpu(c->btree_paths_bufs); free_percpu(c->pcpu); mempool_exit(&c->large_bkey_pool); mempool_exit(&c->btree_bounce_pool); @@ -829,7 +823,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) BIOSET_NEED_BVECS) || !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) || !(c->online_reserved = alloc_percpu(u64)) || - !(c->btree_paths_bufs = alloc_percpu(struct btree_path_buf)) || mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1, btree_bytes(c)) || mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 1e26c2645ce4..03dbea4d95ce 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -252,7 +252,7 @@ static size_t bch2_btree_cache_size(struct bch_fs *c) static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c) { - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; enum btree_id id; @@ -268,13 +268,13 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c if (!test_bit(BCH_FS_STARTED, &c->flags)) return -EPERM; - bch2_trans_init(&trans, c, 0, 0); + trans = bch2_trans_get(c); for (id = 0; id < BTREE_ID_NR; id++) { if (!btree_type_has_ptrs(id)) continue; - for_each_btree_key(&trans, iter, id, POS_MIN, + for_each_btree_key(trans, iter, id, POS_MIN, BTREE_ITER_ALL_SNAPSHOTS, k, ret) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -308,10 +308,10 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c else if (compressed) nr_compressed_extents++; } - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); } - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (ret) return ret; diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 18ccb37b5a26..c907b3e00176 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -31,7 +31,7 @@ static void delete_test_keys(struct bch_fs *c) static int test_delete(struct bch_fs *c, u64 nr) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_i_cookie k; int ret; @@ -39,44 +39,43 @@ static int test_delete(struct bch_fs *c, u64 nr) bkey_cookie_init(&k.k_i); k.k.p.snapshot = U32_MAX; - bch2_trans_init(&trans, c, 0, 0); - bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p, + bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, k.k.p, BTREE_ITER_INTENT); - ret = commit_do(&trans, NULL, NULL, 0, + ret = commit_do(trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: - bch2_trans_update(&trans, &iter, &k.k_i, 0)); + bch2_trans_update(trans, &iter, &k.k_i, 0)); if (ret) { bch_err_msg(c, ret, "update error"); goto err; } pr_info("deleting once"); - ret = commit_do(&trans, NULL, NULL, 0, + ret = commit_do(trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: - bch2_btree_delete_at(&trans, &iter, 0)); + bch2_btree_delete_at(trans, &iter, 0)); if (ret) { bch_err_msg(c, ret, "delete error (first)"); goto err; } pr_info("deleting twice"); - ret = commit_do(&trans, NULL, NULL, 0, + ret = commit_do(trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: - bch2_btree_delete_at(&trans, &iter, 0)); + bch2_btree_delete_at(trans, &iter, 0)); if (ret) { bch_err_msg(c, ret, "delete error (second)"); goto err; } err: - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); return ret; } static int test_delete_written(struct bch_fs *c, u64 nr) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_i_cookie k; int ret; @@ -84,45 +83,41 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bkey_cookie_init(&k.k_i); k.k.p.snapshot = U32_MAX; - bch2_trans_init(&trans, c, 0, 0); - - bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, k.k.p, + bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, k.k.p, BTREE_ITER_INTENT); - ret = commit_do(&trans, NULL, NULL, 0, + ret = commit_do(trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: - bch2_trans_update(&trans, &iter, &k.k_i, 0)); + bch2_trans_update(trans, &iter, &k.k_i, 0)); if (ret) { bch_err_msg(c, ret, "update error"); goto err; } - bch2_trans_unlock(&trans); + bch2_trans_unlock(trans); bch2_journal_flush_all_pins(&c->journal); - ret = commit_do(&trans, NULL, NULL, 0, + ret = commit_do(trans, NULL, NULL, 0, bch2_btree_iter_traverse(&iter) ?: - bch2_btree_delete_at(&trans, &iter, 0)); + bch2_btree_delete_at(trans, &iter, 0)); if (ret) { bch_err_msg(c, ret, "delete error"); goto err; } err: - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); return ret; } static int test_iterate(struct bch_fs *c, u64 nr) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; struct bkey_s_c k; u64 i; int ret = 0; - bch2_trans_init(&trans, c, 0, 0); - delete_test_keys(c); pr_info("inserting test keys"); @@ -145,7 +140,7 @@ static int test_iterate(struct bch_fs *c, u64 nr) i = 0; - ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_xattrs, + ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), 0, k, ({ BUG_ON(k.k->p.offset != i++); @@ -160,7 +155,7 @@ static int test_iterate(struct bch_fs *c, u64 nr) pr_info("iterating backwards"); - ret = for_each_btree_key_reverse(&trans, iter, BTREE_ID_xattrs, + ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs, SPOS(0, U64_MAX, U32_MAX), 0, k, ({ BUG_ON(k.k->p.offset != --i); @@ -173,21 +168,19 @@ static int test_iterate(struct bch_fs *c, u64 nr) BUG_ON(i); err: - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); return ret; } static int test_iterate_extents(struct bch_fs *c, u64 nr) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; struct bkey_s_c k; u64 i; int ret = 0; - bch2_trans_init(&trans, c, 0, 0); - delete_test_keys(c); pr_info("inserting test extents"); @@ -211,7 +204,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) i = 0; - ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_extents, + ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), 0, k, ({ BUG_ON(bkey_start_offset(k.k) != i); @@ -227,7 +220,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) pr_info("iterating backwards"); - ret = for_each_btree_key_reverse(&trans, iter, BTREE_ID_extents, + ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_extents, SPOS(0, U64_MAX, U32_MAX), 0, k, ({ BUG_ON(k.k->p.offset != i); @@ -241,21 +234,19 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) BUG_ON(i); err: - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); return ret; } static int test_iterate_slots(struct bch_fs *c, u64 nr) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; struct bkey_s_c k; u64 i; int ret = 0; - bch2_trans_init(&trans, c, 0, 0); - delete_test_keys(c); pr_info("inserting test keys"); @@ -278,7 +269,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) i = 0; - ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_xattrs, + ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), 0, k, ({ BUG_ON(k.k->p.offset != i); @@ -296,7 +287,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) i = 0; - ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_xattrs, + ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), BTREE_ITER_SLOTS, k, ({ if (i >= nr * 2) @@ -314,20 +305,18 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) } ret = 0; err: - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; struct bkey_s_c k; u64 i; int ret = 0; - bch2_trans_init(&trans, c, 0, 0); - delete_test_keys(c); pr_info("inserting test keys"); @@ -351,7 +340,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) i = 0; - ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_extents, + ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), 0, k, ({ BUG_ON(bkey_start_offset(k.k) != i + 8); @@ -370,7 +359,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) i = 0; - ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_extents, + ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_extents, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), BTREE_ITER_SLOTS, k, ({ if (i == nr) @@ -388,7 +377,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) } ret = 0; err: - bch2_trans_exit(&trans); + bch2_trans_put(trans); return 0; } @@ -398,43 +387,41 @@ err: */ static int test_peek_end(struct bch_fs *c, u64 nr) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; - bch2_trans_init(&trans, c, 0, 0); - bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, + bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), 0); - lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); BUG_ON(k.k); - lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); BUG_ON(k.k); - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); return 0; } static int test_peek_end_extents(struct bch_fs *c, u64 nr) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; - bch2_trans_init(&trans, c, 0, 0); - bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents, + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(0, 0, U32_MAX), 0); - lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); BUG_ON(k.k); - lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); BUG_ON(k.k); - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); return 0; } @@ -510,7 +497,7 @@ static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start, k.k_i.k.size = len; ret = bch2_trans_do(c, NULL, NULL, 0, - bch2_btree_insert_nonextent(&trans, BTREE_ID_extents, &k.k_i, + bch2_btree_insert_nonextent(trans, BTREE_ID_extents, &k.k_i, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)); if (ret) bch_err_fn(c, ret); @@ -533,7 +520,7 @@ static int test_extent_create_overlapping(struct bch_fs *c, u64 inum) /* Test skipping over keys in unrelated snapshots: */ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) { - struct btree_trans trans; + struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; struct bkey_i_cookie cookie; @@ -545,15 +532,15 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) if (ret) return ret; - bch2_trans_init(&trans, c, 0, 0); - bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, + trans = bch2_trans_get(c); + bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, snapid_lo), 0); - lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX)))); BUG_ON(k.k->p.snapshot != U32_MAX); - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); return ret; } @@ -571,7 +558,7 @@ static int test_snapshots(struct bch_fs *c, u64 nr) return ret; ret = bch2_trans_do(c, NULL, NULL, 0, - bch2_snapshot_node_create(&trans, U32_MAX, + bch2_snapshot_node_create(trans, U32_MAX, snapids, snapid_subvols, 2)); @@ -602,38 +589,34 @@ static u64 test_rand(void) static int rand_insert(struct bch_fs *c, u64 nr) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct bkey_i_cookie k; int ret = 0; u64 i; - bch2_trans_init(&trans, c, 0, 0); - for (i = 0; i < nr; i++) { bkey_cookie_init(&k.k_i); k.k.p.offset = test_rand(); k.k.p.snapshot = U32_MAX; - ret = commit_do(&trans, NULL, NULL, 0, - bch2_btree_insert_trans(&trans, BTREE_ID_xattrs, &k.k_i, 0)); + ret = commit_do(trans, NULL, NULL, 0, + bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k.k_i, 0)); if (ret) break; } - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } static int rand_insert_multi(struct bch_fs *c, u64 nr) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct bkey_i_cookie k[8]; int ret = 0; unsigned j; u64 i; - bch2_trans_init(&trans, c, 0, 0); - for (i = 0; i < nr; i += ARRAY_SIZE(k)) { for (j = 0; j < ARRAY_SIZE(k); j++) { bkey_cookie_init(&k[j].k_i); @@ -641,46 +624,45 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr) k[j].k.p.snapshot = U32_MAX; } - ret = commit_do(&trans, NULL, NULL, 0, - bch2_btree_insert_trans(&trans, BTREE_ID_xattrs, &k[0].k_i, 0) ?: - bch2_btree_insert_trans(&trans, BTREE_ID_xattrs, &k[1].k_i, 0) ?: - bch2_btree_insert_trans(&trans, BTREE_ID_xattrs, &k[2].k_i, 0) ?: - bch2_btree_insert_trans(&trans, BTREE_ID_xattrs, &k[3].k_i, 0) ?: - bch2_btree_insert_trans(&trans, BTREE_ID_xattrs, &k[4].k_i, 0) ?: - bch2_btree_insert_trans(&trans, BTREE_ID_xattrs, &k[5].k_i, 0) ?: - bch2_btree_insert_trans(&trans, BTREE_ID_xattrs, &k[6].k_i, 0) ?: - bch2_btree_insert_trans(&trans, BTREE_ID_xattrs, &k[7].k_i, 0)); + ret = commit_do(trans, NULL, NULL, 0, + bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[0].k_i, 0) ?: + bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[1].k_i, 0) ?: + bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[2].k_i, 0) ?: + bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[3].k_i, 0) ?: + bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[4].k_i, 0) ?: + bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[5].k_i, 0) ?: + bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[6].k_i, 0) ?: + bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[7].k_i, 0)); if (ret) break; } - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } static int rand_lookup(struct bch_fs *c, u64 nr) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; int ret = 0; u64 i; - bch2_trans_init(&trans, c, 0, 0); - bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, + bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), 0); for (i = 0; i < nr; i++) { bch2_btree_iter_set_pos(&iter, SPOS(0, test_rand(), U32_MAX)); - lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(&iter))); ret = bkey_err(k); if (ret) break; } - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); return ret; } @@ -712,26 +694,25 @@ static int rand_mixed_trans(struct btree_trans *trans, static int rand_mixed(struct bch_fs *c, u64 nr) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_i_cookie cookie; int ret = 0; u64 i, rand; - bch2_trans_init(&trans, c, 0, 0); - bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs, + bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), 0); for (i = 0; i < nr; i++) { rand = test_rand(); - ret = commit_do(&trans, NULL, NULL, 0, - rand_mixed_trans(&trans, &iter, &cookie, i, rand)); + ret = commit_do(trans, NULL, NULL, 0, + rand_mixed_trans(trans, &iter, &cookie, i, rand)); if (ret) break; } - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); + bch2_trans_iter_exit(trans, &iter); + bch2_trans_put(trans); return ret; } @@ -759,22 +740,20 @@ err: static int rand_delete(struct bch_fs *c, u64 nr) { - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); int ret = 0; u64 i; - bch2_trans_init(&trans, c, 0, 0); - for (i = 0; i < nr; i++) { struct bpos pos = SPOS(0, test_rand(), U32_MAX); - ret = commit_do(&trans, NULL, NULL, 0, - __do_delete(&trans, pos)); + ret = commit_do(trans, NULL, NULL, 0, + __do_delete(trans, pos)); if (ret) break; } - bch2_trans_exit(&trans); + bch2_trans_put(trans); return ret; } @@ -787,14 +766,14 @@ static int seq_insert(struct bch_fs *c, u64 nr) bkey_cookie_init(&insert.k_i); return bch2_trans_run(c, - for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs, + for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, NULL, NULL, 0, ({ if (iter.pos.offset >= nr) break; insert.k.p = iter.pos; - bch2_trans_update(&trans, &iter, &insert.k_i, 0); + bch2_trans_update(trans, &iter, &insert.k_i, 0); }))); } @@ -804,7 +783,7 @@ static int seq_lookup(struct bch_fs *c, u64 nr) struct bkey_s_c k; return bch2_trans_run(c, - for_each_btree_key2_upto(&trans, iter, BTREE_ID_xattrs, + for_each_btree_key2_upto(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), 0, k, 0)); @@ -816,14 +795,14 @@ static int seq_overwrite(struct bch_fs *c, u64 nr) struct bkey_s_c k; return bch2_trans_run(c, - for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs, + for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), BTREE_ITER_INTENT, k, NULL, NULL, 0, ({ struct bkey_i_cookie u; bkey_reassemble(&u.k_i, k); - bch2_trans_update(&trans, &iter, &u.k_i, 0); + bch2_trans_update(trans, &iter, &u.k_i, 0); }))); } diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 637174b249a2..b069b1a62e25 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -307,24 +307,22 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) { struct bch_fs *c = dentry->d_sb->s_fs_info; struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); - struct btree_trans trans; + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct xattr_buf buf = { .buf = buffer, .len = buffer_size }; u64 offset = 0, inum = inode->ei_inode.bi_inum; u32 snapshot; int ret; - - bch2_trans_init(&trans, c, 0, 0); retry: - bch2_trans_begin(&trans); + bch2_trans_begin(trans); iter = (struct btree_iter) { NULL }; - ret = bch2_subvolume_get_snapshot(&trans, inode->ei_subvol, &snapshot); + ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot); if (ret) goto err; - for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_xattrs, + for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_xattrs, SPOS(inum, offset, snapshot), POS(inum, U64_MAX), 0, k, ret) { if (k.k->type != KEY_TYPE_xattr) @@ -336,12 +334,12 @@ retry: } offset = iter.pos.offset; - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_exit(&trans); + bch2_trans_put(trans); if (ret) goto out; @@ -366,7 +364,7 @@ static int bch2_xattr_get_handler(const struct xattr_handler *handler, struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; int ret = bch2_trans_do(c, NULL, NULL, 0, - bch2_xattr_get_trans(&trans, inode, name, buffer, size, handler->flags)); + bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags)); return bch2_err_class(ret); } @@ -381,18 +379,14 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler, struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct bch_inode_unpacked inode_u; - struct btree_trans trans; int ret; - bch2_trans_init(&trans, c, 0, 0); - - ret = commit_do(&trans, NULL, NULL, 0, - bch2_xattr_set(&trans, inode_inum(inode), &inode_u, + ret = bch2_trans_run(c, + commit_do(trans, NULL, NULL, 0, + bch2_xattr_set(trans, inode_inum(inode), &inode_u, &hash, name, value, size, - handler->flags, flags)); - if (!ret) - bch2_inode_update_after_write(&trans, inode, &inode_u, ATTR_CTIME); - bch2_trans_exit(&trans); + handler->flags, flags)) ?: + (bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME), 0)); return bch2_err_class(ret); } -- cgit v1.2.3 From 7239f8e0ee7fb0504d18b9570172688684f3606d Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 15 Sep 2023 08:51:54 -0400 Subject: bcachefs: initial freeze/unfreeze support Initial support for the vfs superblock freeze and unfreeze operations. Superblock freeze occurs in stages, where the vfs attempts to quiesce high level write operations, page faults, fs internal operations, and then finally calls into the filesystem for any last stage steps (i.e. log flushing, etc.) before marking the superblock frozen. The majority of write paths are covered by freeze protection (i.e. sb_start_write() and friends) in higher level common code, with the exception of the fs-internal SB_FREEZE_FS stage (i.e. sb_start_intwrite()). This typically maps to active filesystem transactions in a manner that allows the vfs to implement a barrier of internal fs operations during the freeze sequence. This is not a viable model for bcachefs, however, because it utilizes transactions both to populate the journal as well as to perform journal reclaim. This means that mapping intwrite protection to transaction lifecycle or transaction commit is likely to deadlock freeze, as quiescing the journal requires transactional operations blocked by the final stage of freeze. The flipside of this is that bcachefs does already maintain its own internal sets of write references for similar purposes, currently utilized for transitions from read-write to read-only mode. Since this largely mirrors the high level sequence involved with freeze, we can simply invoke this mechanism in the freeze callback to fully quiesce the filesystem in the final stage. This means that while the SB_FREEZE_FS stage is essentially a no-op, the ->freeze_fs() callback that immediately follows begins by performing effectively the same step by quiescing all internal write references. One caveat to this approach is that without integration of internal freeze protection, write operations gated on internal write refs will fail with an internal -EROFS error rather than block on acquiring freeze protection. IOW, this is roughly equivalent to only having support for sb_start_intwrite_trylock(), and not the blocking variant. Many of these paths already use non-blocking internal write refs and so would map into an sb_start_intwrite_trylock() anyways. The only instance of this I've been able to uncover that doesn't explicitly rely on a higher level non-blocking write ref is the bch2_rbio_narrow_crcs() path, which updates crcs in certain read cases, and Kent has pointed out isn't critical if it happens to fail due to read-only status. Given that, implement basic freeze support as described above and leave tighter integration with internal freeze protection as a possible future enhancement. There are multiple potential ideas worth exploring here. For example, we could implement a multi-stage freeze callback that might allow bcachefs to quiesce its internal write references without deadlocks, we could integrate intwrite protection with bcachefs' internal write references somehow or another, or perhaps consider implementing blocking support for internal write refs to be used specifically for freeze, etc. In the meantime, this enables functional freeze support and the associated test coverage that comes with it. Signed-off-by: Brian Foster Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index bfbd4f004edc..73a3cebd734f 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1716,6 +1716,35 @@ static void bch2_put_super(struct super_block *sb) __bch2_fs_stop(c); } +/* + * bcachefs doesn't currently integrate intwrite freeze protection but the + * internal write references serve the same purpose. Therefore reuse the + * read-only transition code to perform the quiesce. The caveat is that we don't + * currently have the ability to block tasks that want a write reference while + * the superblock is frozen. This is fine for now, but we should either add + * blocking support or find a way to integrate sb_start_intwrite() and friends. + */ +static int bch2_freeze(struct super_block *sb) +{ + struct bch_fs *c = sb->s_fs_info; + + down_write(&c->state_lock); + bch2_fs_read_only(c); + up_write(&c->state_lock); + return 0; +} + +static int bch2_unfreeze(struct super_block *sb) +{ + struct bch_fs *c = sb->s_fs_info; + int ret; + + down_write(&c->state_lock); + ret = bch2_fs_read_write(c); + up_write(&c->state_lock); + return ret; +} + static const struct super_operations bch_super_operations = { .alloc_inode = bch2_alloc_inode, .destroy_inode = bch2_destroy_inode, @@ -1727,10 +1756,8 @@ static const struct super_operations bch_super_operations = { .show_options = bch2_show_options, .remount_fs = bch2_remount, .put_super = bch2_put_super, -#if 0 .freeze_fs = bch2_freeze, .unfreeze_fs = bch2_unfreeze, -#endif }; static int bch2_set_super(struct super_block *s, void *data) -- cgit v1.2.3 From dc08c661a291f5e479fdde8322a4c295c69a3aef Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Sep 2023 00:50:27 -0400 Subject: bcachefs: Use strsep() in split_devs() Minor refactoring to fix a smatch complaint. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/bcachefs/fs.c') diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 73a3cebd734f..bc009ef497d0 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1595,7 +1595,7 @@ static struct bch_fs *bch2_path_to_fs(const char *path) static char **split_devs(const char *_dev_name, unsigned *nr) { char *dev_name = NULL, **devs = NULL, *s; - size_t i, nr_devs = 0; + size_t i = 0, nr_devs = 0; dev_name = kstrdup(_dev_name, GFP_KERNEL); if (!dev_name) @@ -1610,9 +1610,7 @@ static char **split_devs(const char *_dev_name, unsigned *nr) return NULL; } - for (i = 0, s = dev_name; - s; - (s = strchr(s, ':')) && (*s++ = '\0')) + while ((s = strsep(&dev_name, ":"))) devs[i++] = s; *nr = nr_devs; -- cgit v1.2.3