diff options
Diffstat (limited to 'fs/bcachefs')
59 files changed, 711 insertions, 550 deletions
diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index c9798750202d..bf1c94e51dd0 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -26,6 +26,7 @@ config BCACHEFS_FS select SRCU select SYMBOLIC_ERRNAME select MIN_HEAP + select XARRAY_MULTI help The bcachefs filesystem - a modern, copy on write filesystem, with support for multiple devices, compression, checksumming, etc. diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 99487727ae64..d03adc36100e 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -273,7 +273,7 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu) struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0); - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct posix_acl *acl = NULL; if (rcu) @@ -344,7 +344,7 @@ int bch2_set_acl(struct mnt_idmap *idmap, { struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_iter inode_iter = { NULL }; + struct btree_iter inode_iter = {}; struct bch_inode_unpacked inode_u; struct posix_acl *acl; umode_t mode; diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index c12ca7538e4f..94ea9e49aec4 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -610,7 +610,7 @@ int bch2_alloc_read(struct bch_fs *c) * bch2_check_alloc_key() which runs later: */ if (!ca) { - bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); + bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0)); continue; } @@ -631,17 +631,17 @@ int bch2_alloc_read(struct bch_fs *c) * bch2_check_alloc_key() which runs later: */ if (!ca) { - bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); + bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0)); continue; } if (k.k->p.offset < ca->mi.first_bucket) { - bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode, ca->mi.first_bucket)); + bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode, ca->mi.first_bucket)); continue; } if (k.k->p.offset >= ca->mi.nbuckets) { - bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); + bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0)); continue; } @@ -1039,9 +1039,10 @@ invalid_bucket: * This synthesizes deleted extents for holes, similar to BTREE_ITER_slots for * extents style btrees, but works on non-extents btrees: */ -static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos end, struct bkey *hole) +static struct bkey_s_c bch2_get_key_or_hole(struct btree_trans *trans, struct btree_iter *iter, + struct bpos end, struct bkey *hole) { - struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); + struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter); if (bkey_err(k)) return k; @@ -1052,9 +1053,9 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos struct btree_iter iter2; struct bpos next; - bch2_trans_copy_iter(&iter2, iter); + bch2_trans_copy_iter(trans, &iter2, iter); - struct btree_path *path = btree_iter_path(iter->trans, iter); + struct btree_path *path = btree_iter_path(trans, iter); if (!bpos_eq(path->l[0].b->key.k.p, SPOS_MAX)) end = bkey_min(end, bpos_nosnap_successor(path->l[0].b->key.k.p)); @@ -1064,9 +1065,9 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos * btree node min/max is a closed interval, upto takes a half * open interval: */ - k = bch2_btree_iter_peek_max(&iter2, end); + k = bch2_btree_iter_peek_max(trans, &iter2, end); next = iter2.pos; - bch2_trans_iter_exit(iter->trans, &iter2); + bch2_trans_iter_exit(trans, &iter2); BUG_ON(next.offset >= iter->pos.offset + U32_MAX); @@ -1107,13 +1108,14 @@ static bool next_bucket(struct bch_fs *c, struct bch_dev **ca, struct bpos *buck return *ca != NULL; } -static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter, - struct bch_dev **ca, struct bkey *hole) +static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_dev **ca, struct bkey *hole) { - struct bch_fs *c = iter->trans->c; + struct bch_fs *c = trans->c; struct bkey_s_c k; again: - k = bch2_get_key_or_hole(iter, POS_MAX, hole); + k = bch2_get_key_or_hole(trans, iter, POS_MAX, hole); if (bkey_err(k)) return k; @@ -1126,7 +1128,7 @@ again: if (!next_bucket(c, ca, &hole_start)) return bkey_s_c_null; - bch2_btree_iter_set_pos(iter, hole_start); + bch2_btree_iter_set_pos(trans, iter, hole_start); goto again; } @@ -1167,8 +1169,8 @@ int bch2_check_alloc_key(struct btree_trans *trans, a = bch2_alloc_to_v4(alloc_k, &a_convert); - bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p); - k = bch2_btree_iter_peek_slot(discard_iter); + bch2_btree_iter_set_pos(trans, discard_iter, alloc_k.k->p); + k = bch2_btree_iter_peek_slot(trans, discard_iter); ret = bkey_err(k); if (ret) goto err; @@ -1181,8 +1183,8 @@ int bch2_check_alloc_key(struct btree_trans *trans, goto err; } - bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); - k = bch2_btree_iter_peek_slot(freespace_iter); + bch2_btree_iter_set_pos(trans, freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); + k = bch2_btree_iter_peek_slot(trans, freespace_iter); ret = bkey_err(k); if (ret) goto err; @@ -1195,8 +1197,8 @@ int bch2_check_alloc_key(struct btree_trans *trans, goto err; } - bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset)); - k = bch2_btree_iter_peek_slot(bucket_gens_iter); + bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset)); + k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter); ret = bkey_err(k); if (ret) goto err; @@ -1249,9 +1251,9 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, if (!ca->mi.freespace_initialized) return 0; - bch2_btree_iter_set_pos(freespace_iter, start); + bch2_btree_iter_set_pos(trans, freespace_iter, start); - k = bch2_btree_iter_peek_slot(freespace_iter); + k = bch2_btree_iter_peek_slot(trans, freespace_iter); ret = bkey_err(k); if (ret) goto err; @@ -1300,9 +1302,9 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, unsigned i, gens_offset, gens_end_offset; int ret; - bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset)); + bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(start, &gens_offset)); - k = bch2_btree_iter_peek_slot(bucket_gens_iter); + k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter); ret = bkey_err(k); if (ret) goto err; @@ -1435,7 +1437,7 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite *gen = a->gen; out: fsck_err: - bch2_set_btree_iter_dontneed(&alloc_iter); + bch2_set_btree_iter_dontneed(trans, &alloc_iter); bch2_trans_iter_exit(trans, &alloc_iter); printbuf_exit(&buf); return ret; @@ -1572,7 +1574,7 @@ int bch2_check_alloc_info(struct bch_fs *c) bch2_trans_begin(trans); - k = bch2_get_key_or_real_bucket_hole(&iter, &ca, &hole); + k = bch2_get_key_or_real_bucket_hole(trans, &iter, &ca, &hole); ret = bkey_err(k); if (ret) goto bkey_err; @@ -1610,7 +1612,7 @@ int bch2_check_alloc_info(struct bch_fs *c) if (ret) goto bkey_err; - bch2_btree_iter_set_pos(&iter, next); + bch2_btree_iter_set_pos(trans, &iter, next); bkey_err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; @@ -1638,7 +1640,7 @@ bkey_err: BTREE_ITER_prefetch); while (1) { bch2_trans_begin(trans); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(trans, &iter); if (!k.k) break; @@ -1657,7 +1659,7 @@ bkey_err: break; } - bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos)); + bch2_btree_iter_set_pos(trans, &iter, bpos_nosnap_successor(iter.pos)); } bch2_trans_iter_exit(trans, &iter); if (ret) @@ -1685,7 +1687,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, struct printbuf buf = PRINTBUF; int ret; - alloc_k = bch2_btree_iter_peek(alloc_iter); + alloc_k = bch2_btree_iter_peek(trans, alloc_iter); if (!alloc_k.k) return 0; @@ -1826,7 +1828,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bpos pos = need_discard_iter->pos; - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct bkey_s_c k; struct bkey_i_alloc_v4 *a; struct printbuf buf = PRINTBUF; @@ -1950,7 +1952,7 @@ static void bch2_do_discards_work(struct work_struct *work) trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); bch2_write_ref_put(c, BCH_WRITE_REF_discard); } @@ -1967,7 +1969,7 @@ void bch2_dev_do_discards(struct bch_dev *ca) if (queue_work(c->write_ref_wq, &ca->discard_work)) return; - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); put_write_ref: bch2_write_ref_put(c, BCH_WRITE_REF_discard); } @@ -2045,7 +2047,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work) trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); bch2_trans_put(trans); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); } @@ -2065,7 +2067,7 @@ static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket) if (queue_work(c->write_ref_wq, &ca->discard_fast_work)) return; - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); put_ref: bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); } @@ -2082,6 +2084,9 @@ static int invalidate_one_bp(struct btree_trans *trans, if (ret) return ret; + if (!extent_k.k) + return 0; + struct bkey_i *n = bch2_bkey_make_mut(trans, &extent_iter, &extent_k, BTREE_UPDATE_internal_snapshot_node); @@ -2199,9 +2204,9 @@ static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter { struct bkey_s_c k; again: - k = bch2_btree_iter_peek_max(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); + k = bch2_btree_iter_peek_max(trans, iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); if (!k.k && !*wrapped) { - bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0)); + bch2_btree_iter_set_pos(trans, iter, lru_pos(ca->dev_idx, 0, 0)); *wrapped = true; goto again; } @@ -2251,12 +2256,12 @@ restart_err: if (ret) break; - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); } bch2_trans_iter_exit(trans, &iter); err: bch2_trans_put(trans); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); bch2_bkey_buf_exit(&last_flushed, c); bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); } @@ -2274,7 +2279,7 @@ void bch2_dev_do_invalidates(struct bch_dev *ca) if (queue_work(c->write_ref_wq, &ca->invalidate_work)) return; - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); put_ref: bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); } @@ -2321,7 +2326,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, break; } - k = bch2_get_key_or_hole(&iter, end, &hole); + k = bch2_get_key_or_hole(trans, &iter, end, &hole); ret = bkey_err(k); if (ret) goto bkey_err; @@ -2340,7 +2345,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, if (ret) goto bkey_err; - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); } else { struct bkey_i *freespace; @@ -2360,7 +2365,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, if (ret) goto bkey_err; - bch2_btree_iter_set_pos(&iter, k.k->p); + bch2_btree_iter_set_pos(trans, &iter, k.k->p); } bkey_err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -2506,7 +2511,7 @@ void bch2_recalc_capacity(struct bch_fs *c) bch2_set_ra_pages(c, ra_pages); - for_each_rw_member(c, ca) { + __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) { u64 dev_reserve = 0; /* diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index c556ccaffe89..34b3d6ac4fbb 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -321,11 +321,11 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca, { u64 want_free = ca->mi.nbuckets >> 7; u64 free = max_t(s64, 0, - u.d[BCH_DATA_free].buckets - + u.d[BCH_DATA_need_discard].buckets + u.buckets[BCH_DATA_free] + + u.buckets[BCH_DATA_need_discard] - bch2_dev_buckets_reserved(ca, BCH_WATERMARK_stripe)); - return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets); + return clamp_t(s64, want_free - free, 0, u.buckets[BCH_DATA_cached]); } void bch2_dev_do_invalidates(struct bch_dev *); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index da0d72928b5b..7c930ef77380 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -327,7 +327,7 @@ again: bucket = sector_to_bucket(ca, round_up(bucket_to_sector(ca, bucket) + 1, 1ULL << ca->mi.btree_bitmap_shift)); - bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, bucket)); + bch2_btree_iter_set_pos(trans, &iter, POS(ca->dev_idx, bucket)); s->buckets_seen++; s->skipped_mi_btree_bitmap++; continue; @@ -355,7 +355,7 @@ again: watermark, s, cl) : NULL; next: - bch2_set_btree_iter_dontneed(&citer); + bch2_set_btree_iter_dontneed(trans, &citer); bch2_trans_iter_exit(trans, &citer); if (ob) break; @@ -417,7 +417,7 @@ again: 1ULL << ca->mi.btree_bitmap_shift)); alloc_cursor = bucket|(iter.pos.offset & (~0ULL << 56)); - bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor)); + bch2_btree_iter_set_pos(trans, &iter, POS(ca->dev_idx, alloc_cursor)); s->skipped_mi_btree_bitmap++; goto next; } @@ -426,7 +426,7 @@ again: if (ob) { if (!IS_ERR(ob)) *dev_alloc_cursor = iter.pos.offset; - bch2_set_btree_iter_dontneed(&iter); + bch2_set_btree_iter_dontneed(trans, &iter); break; } @@ -469,7 +469,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca, prt_printf(&buf, "watermark\t%s\n", bch2_watermarks[watermark]); prt_printf(&buf, "data type\t%s\n", __bch2_data_types[data_type]); prt_printf(&buf, "blocking\t%u\n", cl != NULL); - prt_printf(&buf, "free\t%llu\n", usage->d[BCH_DATA_free].buckets); + prt_printf(&buf, "free\t%llu\n", usage->buckets[BCH_DATA_free]); prt_printf(&buf, "avail\t%llu\n", dev_buckets_free(ca, *usage, watermark)); prt_printf(&buf, "copygc_wait\t%lu/%lli\n", bch2_copygc_wait_amount(c), @@ -524,10 +524,10 @@ again: bch2_dev_usage_read_fast(ca, usage); avail = dev_buckets_free(ca, *usage, watermark); - if (usage->d[BCH_DATA_need_discard].buckets > avail) + if (usage->buckets[BCH_DATA_need_discard] > avail) bch2_dev_do_discards(ca); - if (usage->d[BCH_DATA_need_gc_gens].buckets > avail) + if (usage->buckets[BCH_DATA_need_gc_gens] > avail) bch2_gc_gens_async(c); if (should_invalidate_buckets(ca, *usage)) @@ -606,8 +606,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, static int __dev_stripe_cmp(struct dev_stripe_state *stripe, unsigned l, unsigned r) { - return ((stripe->next_alloc[l] > stripe->next_alloc[r]) - - (stripe->next_alloc[l] < stripe->next_alloc[r])); + return cmp_int(stripe->next_alloc[l], stripe->next_alloc[r]); } #define dev_stripe_cmp(l, r) __dev_stripe_cmp(stripe, l, r) @@ -626,25 +625,62 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c, return ret; } +static const u64 stripe_clock_hand_rescale = 1ULL << 62; /* trigger rescale at */ +static const u64 stripe_clock_hand_max = 1ULL << 56; /* max after rescale */ +static const u64 stripe_clock_hand_inv = 1ULL << 52; /* max increment, if a device is empty */ + +static noinline void bch2_stripe_state_rescale(struct dev_stripe_state *stripe) +{ + /* + * Avoid underflowing clock hands if at all possible, if clock hands go + * to 0 then we lose information - clock hands can be in a wide range if + * we have devices we rarely try to allocate from, if we generally + * allocate from a specified target but only sometimes have to fall back + * to the whole filesystem. + */ + u64 scale_max = U64_MAX; /* maximum we can subtract without underflow */ + u64 scale_min = 0; /* minumum we must subtract to avoid overflow */ + + for (u64 *v = stripe->next_alloc; + v < stripe->next_alloc + ARRAY_SIZE(stripe->next_alloc); v++) { + if (*v) + scale_max = min(scale_max, *v); + if (*v > stripe_clock_hand_max) + scale_min = max(scale_min, *v - stripe_clock_hand_max); + } + + u64 scale = max(scale_min, scale_max); + + for (u64 *v = stripe->next_alloc; + v < stripe->next_alloc + ARRAY_SIZE(stripe->next_alloc); v++) + *v = *v < scale ? 0 : *v - scale; +} + static inline void bch2_dev_stripe_increment_inlined(struct bch_dev *ca, struct dev_stripe_state *stripe, struct bch_dev_usage *usage) { + /* + * Stripe state has a per device clock hand: we allocate from the device + * with the smallest clock hand. + * + * When we allocate, we don't do a simple increment; we add the inverse + * of the device's free space. This results in round robin behavior that + * biases in favor of the device(s) with more free space. + */ + u64 *v = stripe->next_alloc + ca->dev_idx; u64 free_space = __dev_buckets_available(ca, *usage, BCH_WATERMARK_normal); u64 free_space_inv = free_space - ? div64_u64(1ULL << 48, free_space) - : 1ULL << 48; - u64 scale = *v / 4; + ? div64_u64(stripe_clock_hand_inv, free_space) + : stripe_clock_hand_inv; - if (*v + free_space_inv >= *v) - *v += free_space_inv; - else - *v = U64_MAX; + /* Saturating add, avoid overflow: */ + u64 sum = *v + free_space_inv; + *v = sum >= *v ? sum : U64_MAX; - for (v = stripe->next_alloc; - v < stripe->next_alloc + ARRAY_SIZE(stripe->next_alloc); v++) - *v = *v < scale ? 0 : *v - scale; + if (unlikely(*v > stripe_clock_hand_rescale)) + bch2_stripe_state_rescale(stripe); } void bch2_dev_stripe_increment(struct bch_dev *ca, @@ -1633,7 +1669,7 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c) void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) { struct bch_fs *c = ca->fs; - struct bch_dev_usage stats = bch2_dev_usage_read(ca); + struct bch_dev_usage_full stats = bch2_dev_usage_full_read(ca); unsigned nr[BCH_DATA_NR]; memset(nr, 0, sizeof(nr)); @@ -1656,7 +1692,8 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) printbuf_tabstop_push(out, 16); prt_printf(out, "open buckets\t%i\r\n", ca->nr_open_buckets); - prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats)); + prt_printf(out, "buckets to invalidate\t%llu\r\n", + should_invalidate_buckets(ca, bch2_dev_usage_read(ca))); } static noinline void bch2_print_allocator_stuck(struct bch_fs *c) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 21d1d86d5008..ff26bb515150 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -252,12 +252,24 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, 0, bp.v->level, iter_flags); - struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); + struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter); if (bkey_err(k)) { bch2_trans_iter_exit(trans, iter); return k; } + /* + * peek_slot() doesn't normally return NULL - except when we ask for a + * key at a btree level that doesn't exist. + * + * We may want to revisit this and change peek_slot(): + */ + if (!k.k) { + bkey_init(&iter->k); + iter->k.p = bp.v->pos; + k.k = &iter->k; + } + if (k.k && extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp)) return k; @@ -293,7 +305,7 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, 0, bp.v->level - 1, 0); - struct btree *b = bch2_btree_iter_peek_node(iter); + struct btree *b = bch2_btree_iter_peek_node(trans, iter); if (IS_ERR_OR_NULL(b)) goto err; @@ -321,7 +333,7 @@ static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, st return 0; struct bch_fs *c = trans->c; - struct btree_iter alloc_iter = { NULL }; + struct btree_iter alloc_iter = {}; struct bkey_s_c alloc_k; struct printbuf buf = PRINTBUF; int ret = 0; @@ -462,7 +474,7 @@ err: if (bio) bio_put(bio); kvfree(data_buf); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); printbuf_exit(&buf); return ret; } @@ -650,7 +662,7 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans, retry: bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); - b = bch2_btree_iter_peek_node(&iter); + b = bch2_btree_iter_peek_node(trans, &iter); ret = PTR_ERR_OR_ZERO(b); if (ret) goto err; @@ -934,7 +946,7 @@ static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k, { struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0); - struct btree *b = bch2_btree_iter_peek_node(&iter); + struct btree *b = bch2_btree_iter_peek_node(trans, &iter); int ret = PTR_ERR_OR_ZERO(b); if (ret) goto err; diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index f52311017aee..5d9f208a1bb7 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -524,8 +524,8 @@ struct bch_dev { struct percpu_ref ref; #endif struct completion ref_completion; - struct percpu_ref io_ref; - struct completion io_ref_completion; + struct percpu_ref io_ref[2]; + struct completion io_ref_completion[2]; struct bch_fs *fs; @@ -562,7 +562,8 @@ struct bch_dev { unsigned long *bucket_backpointer_mismatches; unsigned long *bucket_backpointer_empty; - struct bch_dev_usage __percpu *usage; + struct bch_dev_usage_full __percpu + *usage; /* Allocator: */ u64 alloc_cursor[3]; diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 2025d408979c..7b98ba2dec64 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -691,7 +691,7 @@ retry_root: struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, 0, bch2_btree_id_root(c, btree)->b->c.level, 0); - struct btree *b = bch2_btree_iter_peek_node(&iter); + struct btree *b = bch2_btree_iter_peek_node(trans, &iter); ret = PTR_ERR_OR_ZERO(b); if (ret) goto err_root; @@ -1199,7 +1199,7 @@ int bch2_gc_gens(struct bch_fs *c) BCH_TRANS_COMMIT_no_enospc, ({ ca = bch2_dev_iterate(c, ca, k.k->p.inode); if (!ca) { - bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); + bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0)); continue; } bch2_alloc_write_oldest_gen(trans, ca, &iter, k); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 1d94a2bf706d..5fd4a58d2ad2 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1353,7 +1353,7 @@ start: "btree read error %s for %s", bch2_blk_status_to_str(bio->bi_status), buf.buf); if (rb->have_ioref) - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); rb->have_ioref = false; bch2_mark_io_failure(&failed, &rb->pick, false); @@ -1609,6 +1609,7 @@ static void btree_node_read_all_replicas_endio(struct bio *bio) struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev); bch2_latency_acct(ca, rb->start_time, READ); + percpu_ref_put(&ca->io_ref[READ]); } ra->err[rb->idx] = bio->bi_status; @@ -1908,7 +1909,8 @@ static void btree_node_scrub_work(struct work_struct *work) scrub->key.k->k.p, 0, scrub->level - 1, 0); struct btree *b; - int ret = lockrestart_do(trans, PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(&iter))); + int ret = lockrestart_do(trans, + PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(trans, &iter))); if (ret) goto err; @@ -1927,7 +1929,7 @@ err: printbuf_exit(&err); bch2_bkey_buf_exit(&scrub->key, c);; btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf); - percpu_ref_put(&scrub->ca->io_ref); + percpu_ref_put(&scrub->ca->io_ref[READ]); kfree(scrub); bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub); } @@ -1996,7 +1998,7 @@ int bch2_btree_node_scrub(struct btree_trans *trans, return 0; err_free: btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); err: bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub); return ret; @@ -2144,6 +2146,7 @@ static void btree_node_write_endio(struct bio *bio) if (ca && bio->bi_status) { struct printbuf buf = PRINTBUF; + buf.atomic++; prt_printf(&buf, "btree write error: %s\n ", bch2_blk_status_to_str(bio->bi_status)); bch2_btree_pos_to_text(&buf, c, b); @@ -2158,8 +2161,12 @@ static void btree_node_write_endio(struct bio *bio) spin_unlock_irqrestore(&c->btree_write_error_lock, flags); } + /* + * XXX: we should be using io_ref[WRITE], but we aren't retrying failed + * btree writes yet (due to device removal/ro): + */ if (wbio->have_ioref) - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); if (parent) { bio_put(bio); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index a9c110b846b5..e34e9598ef25 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -244,10 +244,8 @@ void bch2_trans_verify_paths(struct btree_trans *trans) bch2_btree_path_verify(trans, path); } -static void bch2_btree_iter_verify(struct btree_iter *iter) +static void bch2_btree_iter_verify(struct btree_trans *trans, struct btree_iter *iter) { - struct btree_trans *trans = iter->trans; - BUG_ON(!!(iter->flags & BTREE_ITER_cached) != btree_iter_path(trans, iter)->cached); BUG_ON((iter->flags & BTREE_ITER_is_extents) && @@ -276,9 +274,9 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) bkey_gt(iter->pos, iter->k.p))); } -static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) +static int bch2_btree_iter_verify_ret(struct btree_trans *trans, + struct btree_iter *iter, struct bkey_s_c k) { - struct btree_trans *trans = iter->trans; struct btree_iter copy; struct bkey_s_c prev; int ret = 0; @@ -299,7 +297,7 @@ static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k bch2_trans_iter_init(trans, ©, iter->btree_id, iter->pos, BTREE_ITER_nopreserve| BTREE_ITER_all_snapshots); - prev = bch2_btree_iter_prev(©); + prev = bch2_btree_iter_prev(trans, ©); if (!prev.k) goto out; @@ -365,9 +363,11 @@ static inline void bch2_btree_path_verify_level(struct btree_trans *trans, struct btree_path *path, unsigned l) {} static inline void bch2_btree_path_verify(struct btree_trans *trans, struct btree_path *path) {} -static inline void bch2_btree_iter_verify(struct btree_iter *iter) {} +static inline void bch2_btree_iter_verify(struct btree_trans *trans, + struct btree_iter *iter) {} static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {} -static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; } +static inline int bch2_btree_iter_verify_ret(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_s_c k) { return 0; } #endif @@ -1855,10 +1855,8 @@ hole: return (struct bkey_s_c) { u, NULL }; } -void bch2_set_btree_iter_dontneed(struct btree_iter *iter) +void bch2_set_btree_iter_dontneed(struct btree_trans *trans, struct btree_iter *iter) { - struct btree_trans *trans = iter->trans; - if (!iter->path || trans->restarted) return; @@ -1870,17 +1868,14 @@ void bch2_set_btree_iter_dontneed(struct btree_iter *iter) /* Btree iterators: */ int __must_check -__bch2_btree_iter_traverse(struct btree_iter *iter) +__bch2_btree_iter_traverse(struct btree_trans *trans, struct btree_iter *iter) { - return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); + return bch2_btree_path_traverse(trans, iter->path, iter->flags); } int __must_check -bch2_btree_iter_traverse(struct btree_iter *iter) +bch2_btree_iter_traverse(struct btree_trans *trans, struct btree_iter *iter) { - struct btree_trans *trans = iter->trans; - int ret; - bch2_trans_verify_not_unlocked_or_in_restart(trans); iter->path = bch2_btree_path_set_pos(trans, iter->path, @@ -1888,7 +1883,7 @@ bch2_btree_iter_traverse(struct btree_iter *iter) iter->flags & BTREE_ITER_intent, btree_iter_ip_allocated(iter)); - ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags); + int ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (ret) return ret; @@ -1900,14 +1895,14 @@ bch2_btree_iter_traverse(struct btree_iter *iter) /* Iterate across nodes (leaf and interior nodes) */ -struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) +struct btree *bch2_btree_iter_peek_node(struct btree_trans *trans, + struct btree_iter *iter) { - struct btree_trans *trans = iter->trans; struct btree *b = NULL; int ret; EBUG_ON(trans->paths[iter->path].cached); - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (ret) @@ -1929,7 +1924,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter) btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter)); out: bch2_btree_iter_verify_entry_exit(iter); - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); return b; err: @@ -1938,26 +1933,26 @@ err: } /* Only kept for -tools */ -struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter) +struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_trans *trans, + struct btree_iter *iter) { struct btree *b; - while (b = bch2_btree_iter_peek_node(iter), + while (b = bch2_btree_iter_peek_node(trans, iter), bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart)) - bch2_trans_begin(iter->trans); + bch2_trans_begin(trans); return b; } -struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) +struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_iter *iter) { - struct btree_trans *trans = iter->trans; struct btree *b = NULL; int ret; EBUG_ON(trans->paths[iter->path].cached); bch2_trans_verify_not_unlocked_or_in_restart(trans); - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (ret) @@ -2024,7 +2019,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) EBUG_ON(btree_iter_path(trans, iter)->uptodate); out: bch2_btree_iter_verify_entry_exit(iter); - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); return b; err: @@ -2034,7 +2029,7 @@ err: /* Iterate across keys (in leaf nodes only) */ -inline bool bch2_btree_iter_advance(struct btree_iter *iter) +inline bool bch2_btree_iter_advance(struct btree_trans *trans, struct btree_iter *iter) { struct bpos pos = iter->k.p; bool ret = !(iter->flags & BTREE_ITER_all_snapshots @@ -2043,11 +2038,11 @@ inline bool bch2_btree_iter_advance(struct btree_iter *iter) if (ret && !(iter->flags & BTREE_ITER_is_extents)) pos = bkey_successor(iter, pos); - bch2_btree_iter_set_pos(iter, pos); + bch2_btree_iter_set_pos(trans, iter, pos); return ret; } -inline bool bch2_btree_iter_rewind(struct btree_iter *iter) +inline bool bch2_btree_iter_rewind(struct btree_trans *trans, struct btree_iter *iter) { struct bpos pos = bkey_start_pos(&iter->k); bool ret = !(iter->flags & BTREE_ITER_all_snapshots @@ -2056,7 +2051,7 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter) if (ret && !(iter->flags & BTREE_ITER_is_extents)) pos = bkey_predecessor(iter, pos); - bch2_btree_iter_set_pos(iter, pos); + bch2_btree_iter_set_pos(trans, iter, pos); return ret; } @@ -2183,9 +2178,9 @@ void btree_trans_peek_prev_journal(struct btree_trans *trans, * bkey_s_c_null: */ static noinline -struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos) +struct bkey_s_c btree_trans_peek_key_cache(struct btree_trans *trans, struct btree_iter *iter, + struct bpos pos) { - struct btree_trans *trans = iter->trans; struct bch_fs *c = trans->c; struct bkey u; struct bkey_s_c k; @@ -2231,14 +2226,14 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos return k; } -static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key) +static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key) { - struct btree_trans *trans = iter->trans; struct bkey_s_c k, k2; int ret; EBUG_ON(btree_iter_path(trans, iter)->cached); - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); while (1) { iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, @@ -2248,7 +2243,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) { /* ensure that iter->k is consistent with iter->pos: */ - bch2_btree_iter_set_pos(iter, iter->pos); + bch2_btree_iter_set_pos(trans, iter, iter->pos); k = bkey_s_c_err(ret); break; } @@ -2258,7 +2253,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp if (unlikely(!l->b)) { /* No btree nodes at requested level: */ - bch2_btree_iter_set_pos(iter, SPOS_MAX); + bch2_btree_iter_set_pos(trans, iter, SPOS_MAX); k = bkey_s_c_null; break; } @@ -2269,10 +2264,10 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp if (unlikely(iter->flags & BTREE_ITER_with_key_cache) && k.k && - (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { + (k2 = btree_trans_peek_key_cache(trans, iter, k.k->p)).k) { k = k2; if (bkey_err(k)) { - bch2_btree_iter_set_pos(iter, iter->pos); + bch2_btree_iter_set_pos(trans, iter, iter->pos); break; } } @@ -2305,27 +2300,28 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp search_key = bpos_successor(l->b->key.k.p); } else { /* End of btree: */ - bch2_btree_iter_set_pos(iter, SPOS_MAX); + bch2_btree_iter_set_pos(trans, iter, SPOS_MAX); k = bkey_s_c_null; break; } } - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); return k; } /** * bch2_btree_iter_peek_max() - returns first key greater than or equal to * iterator's current position + * @trans: btree transaction object * @iter: iterator to peek from * @end: search limit: returns keys less than or equal to @end * * Returns: key if found, or an error extractable with bkey_err(). */ -struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos end) +struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree_iter *iter, + struct bpos end) { - struct btree_trans *trans = iter->trans; struct bpos search_key = btree_iter_search_key(iter); struct bkey_s_c k; struct bpos iter_pos = iter->pos; @@ -2348,7 +2344,7 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en } while (1) { - k = __bch2_btree_iter_peek(iter, search_key); + k = __bch2_btree_iter_peek(trans, iter, search_key); if (unlikely(!k.k)) goto end; if (unlikely(bkey_err(k))) @@ -2462,9 +2458,9 @@ out_no_locked: if (!(iter->flags & BTREE_ITER_all_snapshots)) iter->pos.snapshot = iter->snapshot; - ret = bch2_btree_iter_verify_ret(iter, k); + ret = bch2_btree_iter_verify_ret(trans, iter, k); if (unlikely(ret)) { - bch2_btree_iter_set_pos(iter, iter->pos); + bch2_btree_iter_set_pos(trans, iter, iter->pos); k = bkey_s_c_err(ret); } @@ -2472,7 +2468,7 @@ out_no_locked: return k; end: - bch2_btree_iter_set_pos(iter, end); + bch2_btree_iter_set_pos(trans, iter, end); k = bkey_s_c_null; goto out_no_locked; } @@ -2480,24 +2476,25 @@ end: /** * bch2_btree_iter_next() - returns first key greater than iterator's current * position + * @trans: btree transaction object * @iter: iterator to peek from * * Returns: key if found, or an error extractable with bkey_err(). */ -struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter) +struct bkey_s_c bch2_btree_iter_next(struct btree_trans *trans, struct btree_iter *iter) { - if (!bch2_btree_iter_advance(iter)) + if (!bch2_btree_iter_advance(trans, iter)) return bkey_s_c_null; - return bch2_btree_iter_peek(iter); + return bch2_btree_iter_peek(trans, iter); } -static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, struct bpos search_key) +static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, struct btree_iter *iter, + struct bpos search_key) { - struct btree_trans *trans = iter->trans; struct bkey_s_c k, k2; - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); while (1) { iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key, @@ -2507,7 +2504,7 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru int ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) { /* ensure that iter->k is consistent with iter->pos: */ - bch2_btree_iter_set_pos(iter, iter->pos); + bch2_btree_iter_set_pos(trans, iter, iter->pos); k = bkey_s_c_err(ret); break; } @@ -2517,7 +2514,7 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru if (unlikely(!l->b)) { /* No btree nodes at requested level: */ - bch2_btree_iter_set_pos(iter, SPOS_MAX); + bch2_btree_iter_set_pos(trans, iter, SPOS_MAX); k = bkey_s_c_null; break; } @@ -2533,10 +2530,10 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru if (unlikely(iter->flags & BTREE_ITER_with_key_cache) && k.k && - (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { + (k2 = btree_trans_peek_key_cache(trans, iter, k.k->p)).k) { k = k2; if (bkey_err(k2)) { - bch2_btree_iter_set_pos(iter, iter->pos); + bch2_btree_iter_set_pos(trans, iter, iter->pos); break; } } @@ -2557,25 +2554,27 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru search_key = bpos_predecessor(path->l[0].b->data->min_key); } else { /* Start of btree: */ - bch2_btree_iter_set_pos(iter, POS_MIN); + bch2_btree_iter_set_pos(trans, iter, POS_MIN); k = bkey_s_c_null; break; } } - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); return k; } /** * bch2_btree_iter_peek_prev_min() - returns first key less than or equal to * iterator's current position + * @trans: btree transaction object * @iter: iterator to peek from * @end: search limit: returns keys greater than or equal to @end * * Returns: key if found, or an error extractable with bkey_err(). */ -struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bpos end) +struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct btree_iter *iter, + struct bpos end) { if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) && !bkey_eq(iter->pos, POS_MAX)) { @@ -2587,7 +2586,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp * real visible extents - easiest to just use peek_slot() (which * internally uses peek() for extents) */ - struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); + struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter); if (bkey_err(k)) return k; @@ -2597,7 +2596,6 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp return k; } - struct btree_trans *trans = iter->trans; struct bpos search_key = iter->pos; struct bkey_s_c k; btree_path_idx_t saved_path = 0; @@ -2613,7 +2611,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp } while (1) { - k = __bch2_btree_iter_peek_prev(iter, search_key); + k = __bch2_btree_iter_peek_prev(trans, iter, search_key); if (unlikely(!k.k)) goto end; if (unlikely(bkey_err(k))) @@ -2704,10 +2702,10 @@ out_no_locked: bch2_path_put_nokeep(trans, saved_path, iter->flags & BTREE_ITER_intent); bch2_btree_iter_verify_entry_exit(iter); - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); return k; end: - bch2_btree_iter_set_pos(iter, end); + bch2_btree_iter_set_pos(trans, iter, end); k = bkey_s_c_null; goto out_no_locked; } @@ -2715,27 +2713,27 @@ end: /** * bch2_btree_iter_prev() - returns first key less than iterator's current * position + * @trans: btree transaction object * @iter: iterator to peek from * * Returns: key if found, or an error extractable with bkey_err(). */ -struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) +struct bkey_s_c bch2_btree_iter_prev(struct btree_trans *trans, struct btree_iter *iter) { - if (!bch2_btree_iter_rewind(iter)) + if (!bch2_btree_iter_rewind(trans, iter)) return bkey_s_c_null; - return bch2_btree_iter_peek_prev(iter); + return bch2_btree_iter_peek_prev(trans, iter); } -struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) +struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btree_iter *iter) { - struct btree_trans *trans = iter->trans; struct bpos search_key; struct bkey_s_c k; int ret; bch2_trans_verify_not_unlocked_or_in_restart(trans); - bch2_btree_iter_verify(iter); + bch2_btree_iter_verify(trans, iter); bch2_btree_iter_verify_entry_exit(iter); EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache)); @@ -2751,7 +2749,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) if (iter->pos.inode == KEY_INODE_MAX) return bkey_s_c_null; - bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos)); + bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos)); } search_key = btree_iter_search_key(iter); @@ -2785,7 +2783,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) goto out; if (unlikely(iter->flags & BTREE_ITER_with_key_cache) && - (k = btree_trans_peek_key_cache(iter, iter->pos)).k) { + (k = btree_trans_peek_key_cache(trans, iter, iter->pos)).k) { if (!bkey_err(k)) iter->k = *k.k; /* We're not returning a key from iter->path: */ @@ -2812,8 +2810,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) if (iter->flags & BTREE_ITER_intent) { struct btree_iter iter2; - bch2_trans_copy_iter(&iter2, iter); - k = bch2_btree_iter_peek_max(&iter2, end); + bch2_trans_copy_iter(trans, &iter2, iter); + k = bch2_btree_iter_peek_max(trans, &iter2, end); if (k.k && !bkey_err(k)) { swap(iter->key_cache_path, iter2.key_cache_path); @@ -2824,9 +2822,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } else { struct bpos pos = iter->pos; - k = bch2_btree_iter_peek_max(iter, end); + k = bch2_btree_iter_peek_max(trans, iter, end); if (unlikely(bkey_err(k))) - bch2_btree_iter_set_pos(iter, pos); + bch2_btree_iter_set_pos(trans, iter, pos); else iter->pos = pos; } @@ -2857,39 +2855,39 @@ out: btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter)); out_no_locked: bch2_btree_iter_verify_entry_exit(iter); - bch2_btree_iter_verify(iter); - ret = bch2_btree_iter_verify_ret(iter, k); + bch2_btree_iter_verify(trans, iter); + ret = bch2_btree_iter_verify_ret(trans, iter, k); if (unlikely(ret)) return bkey_s_c_err(ret); return k; } -struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter) +struct bkey_s_c bch2_btree_iter_next_slot(struct btree_trans *trans, struct btree_iter *iter) { - if (!bch2_btree_iter_advance(iter)) + if (!bch2_btree_iter_advance(trans, iter)) return bkey_s_c_null; - return bch2_btree_iter_peek_slot(iter); + return bch2_btree_iter_peek_slot(trans, iter); } -struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter) +struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_trans *trans, struct btree_iter *iter) { - if (!bch2_btree_iter_rewind(iter)) + if (!bch2_btree_iter_rewind(trans, iter)) return bkey_s_c_null; - return bch2_btree_iter_peek_slot(iter); + return bch2_btree_iter_peek_slot(trans, iter); } /* Obsolete, but still used by rust wrapper in -tools */ -struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter) +struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *trans, struct btree_iter *iter) { struct bkey_s_c k; - while (btree_trans_too_many_iters(iter->trans) || - (k = bch2_btree_iter_peek_type(iter, iter->flags), + while (btree_trans_too_many_iters(trans) || + (k = bch2_btree_iter_peek_type(trans, iter, iter->flags), bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart))) - bch2_trans_begin(iter->trans); + bch2_trans_begin(trans); return k; } @@ -3035,7 +3033,6 @@ void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter) iter->path = 0; iter->update_path = 0; iter->key_cache_path = 0; - iter->trans = NULL; } void bch2_trans_iter_init_outlined(struct btree_trans *trans, @@ -3075,10 +3072,9 @@ void bch2_trans_node_iter_init(struct btree_trans *trans, BUG_ON(iter->min_depth != depth); } -void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src) +void bch2_trans_copy_iter(struct btree_trans *trans, + struct btree_iter *dst, struct btree_iter *src) { - struct btree_trans *trans = src->trans; - *dst = *src; #ifdef TRACK_PATH_ALLOCATED dst->ip_allocated = _RET_IP_; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index e6f51a3b8187..9d2cccf5d21a 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -393,36 +393,37 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree_path *, struct void bch2_trans_node_drop(struct btree_trans *trans, struct btree *); void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *); -int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter); -int __must_check bch2_btree_iter_traverse(struct btree_iter *); +int __must_check __bch2_btree_iter_traverse(struct btree_trans *, struct btree_iter *); +int __must_check bch2_btree_iter_traverse(struct btree_trans *, struct btree_iter *); -struct btree *bch2_btree_iter_peek_node(struct btree_iter *); -struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *); -struct btree *bch2_btree_iter_next_node(struct btree_iter *); +struct btree *bch2_btree_iter_peek_node(struct btree_trans *, struct btree_iter *); +struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_trans *, struct btree_iter *); +struct btree *bch2_btree_iter_next_node(struct btree_trans *, struct btree_iter *); -struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *, struct bpos); -struct bkey_s_c bch2_btree_iter_next(struct btree_iter *); +struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *, struct btree_iter *, struct bpos); +struct bkey_s_c bch2_btree_iter_next(struct btree_trans *, struct btree_iter *); -static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) +static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_trans *trans, + struct btree_iter *iter) { - return bch2_btree_iter_peek_max(iter, SPOS_MAX); + return bch2_btree_iter_peek_max(trans, iter, SPOS_MAX); } -struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *, struct bpos); +struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *, struct btree_iter *, struct bpos); -static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter) +static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_trans *trans, struct btree_iter *iter) { - return bch2_btree_iter_peek_prev_min(iter, POS_MIN); + return bch2_btree_iter_peek_prev_min(trans, iter, POS_MIN); } -struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *); +struct bkey_s_c bch2_btree_iter_prev(struct btree_trans *, struct btree_iter *); -struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *); -struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *); -struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *); +struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *, struct btree_iter *); +struct bkey_s_c bch2_btree_iter_next_slot(struct btree_trans *, struct btree_iter *); +struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_trans *, struct btree_iter *); -bool bch2_btree_iter_advance(struct btree_iter *); -bool bch2_btree_iter_rewind(struct btree_iter *); +bool bch2_btree_iter_advance(struct btree_trans *, struct btree_iter *); +bool bch2_btree_iter_rewind(struct btree_trans *, struct btree_iter *); static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) { @@ -433,10 +434,9 @@ static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpo iter->k.size = 0; } -static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) +static inline void bch2_btree_iter_set_pos(struct btree_trans *trans, + struct btree_iter *iter, struct bpos new_pos) { - struct btree_trans *trans = iter->trans; - if (unlikely(iter->update_path)) bch2_path_put(trans, iter->update_path, iter->flags & BTREE_ITER_intent); @@ -454,13 +454,14 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it iter->pos = bkey_start_pos(&iter->k); } -static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 snapshot) +static inline void bch2_btree_iter_set_snapshot(struct btree_trans *trans, + struct btree_iter *iter, u32 snapshot) { struct bpos pos = iter->pos; iter->snapshot = snapshot; pos.snapshot = snapshot; - bch2_btree_iter_set_pos(iter, pos); + bch2_btree_iter_set_pos(trans, iter, pos); } void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *); @@ -502,7 +503,6 @@ static inline void bch2_trans_iter_init_common(struct btree_trans *trans, unsigned flags, unsigned long ip) { - iter->trans = trans; iter->update_path = 0; iter->key_cache_path = 0; iter->btree_id = btree_id; @@ -539,9 +539,9 @@ static inline void bch2_trans_iter_init(struct btree_trans *trans, void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *, enum btree_id, struct bpos, unsigned, unsigned, unsigned); -void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *); +void bch2_trans_copy_iter(struct btree_trans *, struct btree_iter *, struct btree_iter *); -void bch2_set_btree_iter_dontneed(struct btree_iter *); +void bch2_set_btree_iter_dontneed(struct btree_trans *, struct btree_iter *); void *__bch2_trans_kmalloc(struct btree_trans *, size_t); @@ -588,7 +588,7 @@ static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans, struct bkey_s_c k; bch2_trans_iter_init(trans, iter, btree_id, pos, flags); - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(trans, iter); if (!bkey_err(k) && type && k.k->type != type) k = bkey_s_c_err(-BCH_ERR_ENOENT_bkey_type_mismatch); @@ -658,14 +658,14 @@ u32 bch2_trans_begin(struct btree_trans *); int _ret3 = 0; \ do { \ _ret3 = lockrestart_do((_trans), ({ \ - struct btree *_b = bch2_btree_iter_peek_node(&_iter); \ + struct btree *_b = bch2_btree_iter_peek_node(_trans, &_iter);\ if (!_b) \ break; \ \ PTR_ERR_OR_ZERO(_b) ?: (_do); \ })) ?: \ lockrestart_do((_trans), \ - PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(&_iter))); \ + PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(_trans, &_iter)));\ } while (!_ret3); \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ @@ -677,31 +677,34 @@ u32 bch2_trans_begin(struct btree_trans *); __for_each_btree_node(_trans, _iter, _btree_id, _start, \ 0, 0, _flags, _b, _do) -static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter, +static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_trans *trans, + struct btree_iter *iter, unsigned flags) { - return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(iter) : - bch2_btree_iter_peek_prev(iter); + return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(trans, iter) : + bch2_btree_iter_peek_prev(trans, iter); } -static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter, +static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_trans *trans, + struct btree_iter *iter, unsigned flags) { - return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(iter) : - bch2_btree_iter_peek(iter); + return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(trans, iter) : + bch2_btree_iter_peek(trans, iter); } -static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_iter *iter, - struct bpos end, - unsigned flags) +static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_trans *trans, + struct btree_iter *iter, + struct bpos end, + unsigned flags) { if (!(flags & BTREE_ITER_slots)) - return bch2_btree_iter_peek_max(iter, end); + return bch2_btree_iter_peek_max(trans, iter, end); if (bkey_gt(iter->pos, end)) return bkey_s_c_null; - return bch2_btree_iter_peek_slot(iter); + return bch2_btree_iter_peek_slot(trans, iter); } int __bch2_btree_trans_too_many_iters(struct btree_trans *); @@ -768,14 +771,14 @@ transaction_restart: \ \ do { \ _ret3 = lockrestart_do(_trans, ({ \ - (_k) = bch2_btree_iter_peek_max_type(&(_iter), \ + (_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), \ _end, (_flags)); \ if (!(_k).k) \ break; \ \ bkey_err(_k) ?: (_do); \ })); \ - } while (!_ret3 && bch2_btree_iter_advance(&(_iter))); \ + } while (!_ret3 && bch2_btree_iter_advance(_trans, &(_iter))); \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ _ret3; \ @@ -813,14 +816,14 @@ transaction_restart: \ \ do { \ _ret3 = lockrestart_do(_trans, ({ \ - (_k) = bch2_btree_iter_peek_prev_type(&(_iter), \ + (_k) = bch2_btree_iter_peek_prev_type(_trans, &(_iter), \ (_flags)); \ if (!(_k).k) \ break; \ \ bkey_err(_k) ?: (_do); \ })); \ - } while (!_ret3 && bch2_btree_iter_rewind(&(_iter))); \ + } while (!_ret3 && bch2_btree_iter_rewind(_trans, &(_iter))); \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ _ret3; \ @@ -850,37 +853,38 @@ transaction_restart: \ (_do) ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_commit_flags))) -struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); +struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *, + struct btree_iter *); #define for_each_btree_key_max_norestart(_trans, _iter, _btree_id, \ _start, _end, _flags, _k, _ret) \ for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ (_start), (_flags)); \ - (_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags),\ + (_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), _end, _flags),\ !((_ret) = bkey_err(_k)) && (_k).k; \ - bch2_btree_iter_advance(&(_iter))) + bch2_btree_iter_advance(_trans, &(_iter))) -#define for_each_btree_key_max_continue_norestart(_iter, _end, _flags, _k, _ret)\ +#define for_each_btree_key_max_continue_norestart(_trans, _iter, _end, _flags, _k, _ret)\ for (; \ - (_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags), \ + (_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), _end, _flags), \ !((_ret) = bkey_err(_k)) && (_k).k; \ - bch2_btree_iter_advance(&(_iter))) + bch2_btree_iter_advance(_trans, &(_iter))) #define for_each_btree_key_norestart(_trans, _iter, _btree_id, \ _start, _flags, _k, _ret) \ for_each_btree_key_max_norestart(_trans, _iter, _btree_id, _start,\ SPOS_MAX, _flags, _k, _ret) -#define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \ - _start, _flags, _k, _ret) \ - for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ - (_start), (_flags)); \ - (_k) = bch2_btree_iter_peek_prev_type(&(_iter), _flags), \ - !((_ret) = bkey_err(_k)) && (_k).k; \ - bch2_btree_iter_rewind(&(_iter))) +#define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \ + _start, _flags, _k, _ret) \ + for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ + (_start), (_flags)); \ + (_k) = bch2_btree_iter_peek_prev_type(_trans, &(_iter), _flags), \ + !((_ret) = bkey_err(_k)) && (_k).k; \ + bch2_btree_iter_rewind(_trans, &(_iter))) -#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \ - for_each_btree_key_max_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret) +#define for_each_btree_key_continue_norestart(_trans, _iter, _flags, _k, _ret) \ + for_each_btree_key_max_continue_norestart(_trans, _iter, SPOS_MAX, _flags, _k, _ret) /* * This should not be used in a fastpath, without first trying _do in diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index edce59433375..2b186584a291 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -287,6 +287,19 @@ err: return ret; } +static noinline_for_stack void do_trace_key_cache_fill(struct btree_trans *trans, + struct btree_path *ck_path, + struct bkey_s_c k) +{ + struct printbuf buf = PRINTBUF; + + bch2_bpos_to_text(&buf, ck_path->pos); + prt_char(&buf, ' '); + bch2_bkey_val_to_text(&buf, trans->c, k); + trace_key_cache_fill(trans, buf.buf); + printbuf_exit(&buf); +} + static noinline int btree_key_cache_fill(struct btree_trans *trans, struct btree_path *ck_path, unsigned flags) @@ -306,7 +319,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, BTREE_ITER_key_cache_fill| BTREE_ITER_cached_nofill); iter.flags &= ~BTREE_ITER_with_journal; - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); ret = bkey_err(k); if (ret) goto err; @@ -320,18 +333,11 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, if (ret) goto err; - if (trace_key_cache_fill_enabled()) { - struct printbuf buf = PRINTBUF; - - bch2_bpos_to_text(&buf, ck_path->pos); - prt_char(&buf, ' '); - bch2_bkey_val_to_text(&buf, trans->c, k); - trace_key_cache_fill(trans, buf.buf); - printbuf_exit(&buf); - } + if (trace_key_cache_fill_enabled()) + do_trace_key_cache_fill(trans, ck_path, k); out: /* We're not likely to need this iterator again: */ - bch2_set_btree_iter_dontneed(&iter); + bch2_set_btree_iter_dontneed(trans, &iter); err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -412,7 +418,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, BTREE_ITER_intent); b_iter.flags &= ~BTREE_ITER_with_key_cache; - ret = bch2_btree_iter_traverse(&c_iter); + ret = bch2_btree_iter_traverse(trans, &c_iter); if (ret) goto out; @@ -444,7 +450,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, !test_bit(JOURNAL_space_low, &c->journal.flags)) commit_flags |= BCH_TRANS_COMMIT_no_journal_res; - struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(&b_iter); + struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(trans, &b_iter); ret = bkey_err(btree_k); if (ret) goto err; diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 25d54b77cdc2..8c9fdb7263fe 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -271,7 +271,7 @@ static int read_btree_nodes_worker(void *p) err: bio_put(bio); free_page((unsigned long) buf); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); closure_put(w->cl); kfree(w); return 0; @@ -291,7 +291,7 @@ static int read_btree_nodes(struct find_btree_nodes *f) struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL); if (!w) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); ret = -ENOMEM; goto err; } @@ -303,14 +303,14 @@ static int read_btree_nodes(struct find_btree_nodes *f) struct task_struct *t = kthread_create(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name); ret = PTR_ERR_OR_ZERO(t); if (ret) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); kfree(w); bch_err_msg(c, ret, "starting kthread"); break; } closure_get(&cl); - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[READ]); wake_up_process(t); } err: diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 77578da2d23f..023c472dc9ee 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -367,7 +367,6 @@ static inline unsigned long btree_path_ip_allocated(struct btree_path *path) * @nodes_intent_locked - bitmask indicating which locks are intent locks */ struct btree_iter { - struct btree_trans *trans; btree_path_idx_t path; btree_path_idx_t update_path; btree_path_idx_t key_cache_path; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index c05394f56424..1e6b7836cc01 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -126,7 +126,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, struct bpos new_pos) { struct bch_fs *c = trans->c; - struct btree_iter old_iter, new_iter = { NULL }; + struct btree_iter old_iter, new_iter = {}; struct bkey_s_c old_k, new_k; snapshot_id_list s; struct bkey_i *update; @@ -140,7 +140,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, bch2_trans_iter_init(trans, &old_iter, id, old_pos, BTREE_ITER_not_extents| BTREE_ITER_all_snapshots); - while ((old_k = bch2_btree_iter_prev(&old_iter)).k && + while ((old_k = bch2_btree_iter_prev(trans, &old_iter)).k && !(ret = bkey_err(old_k)) && bkey_eq(old_pos, old_k.k->p)) { struct bpos whiteout_pos = @@ -296,7 +296,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans, BTREE_ITER_intent| BTREE_ITER_with_updates| BTREE_ITER_not_extents); - k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX)); + k = bch2_btree_iter_peek_max(trans, &iter, POS(insert->k.p.inode, U64_MAX)); if ((ret = bkey_err(k))) goto err; if (!k.k) @@ -322,8 +322,8 @@ static int bch2_trans_update_extent(struct btree_trans *trans, if (done) goto out; next: - bch2_btree_iter_advance(&iter); - k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX)); + bch2_btree_iter_advance(trans, &iter); + k = bch2_btree_iter_peek_max(trans, &iter, POS(insert->k.p.inode, U64_MAX)); if ((ret = bkey_err(k))) goto err; if (!k.k) @@ -592,13 +592,13 @@ int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, enum btree_id btree, struct bpos end) { bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent); - struct bkey_s_c k = bch2_btree_iter_peek_prev(iter); + struct bkey_s_c k = bch2_btree_iter_peek_prev(trans, iter); int ret = bkey_err(k); if (ret) goto err; - bch2_btree_iter_advance(iter); - k = bch2_btree_iter_peek_slot(iter); + bch2_btree_iter_advance(trans, iter); + k = bch2_btree_iter_peek_slot(trans, iter); ret = bkey_err(k); if (ret) goto err; @@ -634,7 +634,7 @@ int bch2_btree_insert_nonextent(struct btree_trans *trans, BTREE_ITER_cached| BTREE_ITER_not_extents| BTREE_ITER_intent); - ret = bch2_btree_iter_traverse(&iter) ?: + ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_trans_update(trans, &iter, k, flags); bch2_trans_iter_exit(trans, &iter); return ret; @@ -646,7 +646,7 @@ int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id, struct btree_iter iter; bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k), BTREE_ITER_intent|flags); - int ret = bch2_btree_iter_traverse(&iter) ?: + int ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_trans_update(trans, &iter, k, flags); bch2_trans_iter_exit(trans, &iter); return ret; @@ -695,7 +695,7 @@ int bch2_btree_delete(struct btree_trans *trans, bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_cached| BTREE_ITER_intent); - ret = bch2_btree_iter_traverse(&iter) ?: + ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_delete_at(trans, &iter, update_flags); bch2_trans_iter_exit(trans, &iter); @@ -713,7 +713,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, int ret = 0; bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent); - while ((k = bch2_btree_iter_peek_max(&iter, end)).k) { + while ((k = bch2_btree_iter_peek_max(trans, &iter, end)).k) { struct disk_reservation disk_res = bch2_disk_reservation_init(trans->c, 0); struct bkey_i delete; @@ -808,7 +808,7 @@ int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, struct btree_iter iter; bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_intent); - int ret = bch2_btree_iter_traverse(&iter) ?: + int ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_bit_mod_iter(trans, &iter, set); bch2_trans_iter_exit(trans, &iter); return ret; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index bf7e1dac7f46..55fbeeb8eaaa 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2147,7 +2147,7 @@ static int get_iter_to_node(struct btree_trans *trans, struct btree_iter *iter, bch2_trans_node_iter_init(trans, iter, b->c.btree_id, b->key.k.p, BTREE_MAX_DEPTH, b->c.level, BTREE_ITER_intent); - int ret = bch2_btree_iter_traverse(iter); + int ret = bch2_btree_iter_traverse(trans, iter); if (ret) goto err; @@ -2239,7 +2239,7 @@ static int bch2_btree_node_rewrite_key(struct btree_trans *trans, bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, BTREE_MAX_DEPTH, level, 0); - struct btree *b = bch2_btree_iter_peek_node(&iter); + struct btree *b = bch2_btree_iter_peek_node(trans, &iter); int ret = PTR_ERR_OR_ZERO(b); if (ret) goto out; @@ -2262,7 +2262,7 @@ int bch2_btree_node_rewrite_pos(struct btree_trans *trans, /* Traverse one depth lower to get a pointer to the node itself: */ struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, btree, pos, 0, level - 1, 0); - struct btree *b = bch2_btree_iter_peek_node(&iter); + struct btree *b = bch2_btree_iter_peek_node(trans, &iter); int ret = PTR_ERR_OR_ZERO(b); if (ret) goto err; @@ -2406,7 +2406,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bool skip_triggers) { struct bch_fs *c = trans->c; - struct btree_iter iter2 = { NULL }; + struct btree_iter iter2 = {}; struct btree *parent; int ret; @@ -2430,7 +2430,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, parent = btree_node_parent(btree_iter_path(trans, iter), b); if (parent) { - bch2_trans_copy_iter(&iter2, iter); + bch2_trans_copy_iter(trans, &iter2, iter); iter2.path = bch2_btree_path_make_mut(trans, iter2.path, iter2.flags & BTREE_ITER_intent, @@ -2444,7 +2444,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, trans->paths_sorted = false; - ret = bch2_btree_iter_traverse(&iter2) ?: + ret = bch2_btree_iter_traverse(trans, &iter2) ?: bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun); if (ret) goto err; diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 2c09d19dd621..adbe576ec77e 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -144,7 +144,7 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite EBUG_ON(!trans->c->btree_write_buffer.flushing.pin.seq); EBUG_ON(trans->c->btree_write_buffer.flushing.pin.seq > wb->journal_seq); - ret = bch2_btree_iter_traverse(iter); + ret = bch2_btree_iter_traverse(trans, iter); if (ret) return ret; @@ -208,7 +208,7 @@ btree_write_buffered_insert(struct btree_trans *trans, trans->journal_res.seq = wb->journal_seq; - ret = bch2_btree_iter_traverse(&iter) ?: + ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_trans_update(trans, &iter, &wb->k, BTREE_UPDATE_internal_snapshot_node); bch2_trans_iter_exit(trans, &iter); @@ -285,7 +285,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) struct bch_fs *c = trans->c; struct journal *j = &c->journal; struct btree_write_buffer *wb = &c->btree_write_buffer; - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; size_t overwritten = 0, fast = 0, slowpath = 0, could_not_insert = 0; bool write_locked = false; bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags); @@ -368,7 +368,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) write_locked = false; ret = lockrestart_do(trans, - bch2_btree_iter_traverse(&iter) ?: + bch2_btree_iter_traverse(trans, &iter) ?: bch2_foreground_maybe_merge(trans, iter.path, 0, BCH_WATERMARK_reclaim| BCH_TRANS_COMMIT_journal_reclaim| @@ -385,7 +385,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) BTREE_ITER_intent|BTREE_ITER_all_snapshots); } - bch2_btree_iter_set_pos(&iter, k->k.k.p); + bch2_btree_iter_set_pos(trans, &iter, k->k.k.p); btree_iter_path(trans, &iter)->preserve = false; bool accounting_accumulated = false; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 0903311cc71e..fea61e60a9ee 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -30,6 +30,12 @@ void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage) { + for (unsigned i = 0; i < BCH_DATA_NR; i++) + usage->buckets[i] = percpu_u64_get(&ca->usage->d[i].buckets); +} + +void bch2_dev_usage_full_read_fast(struct bch_dev *ca, struct bch_dev_usage_full *usage) +{ memset(usage, 0, sizeof(*usage)); acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, dev_usage_u64s()); } @@ -75,7 +81,7 @@ bch2_fs_usage_read_short(struct bch_fs *c) void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev *ca, - struct bch_dev_usage *usage) + struct bch_dev_usage_full *usage) { if (out->nr_tabstops < 5) { printbuf_tabstops_reset(out); @@ -365,7 +371,7 @@ found: struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level, BTREE_ITER_intent|BTREE_ITER_all_snapshots); - ret = bch2_btree_iter_traverse(&iter) ?: + ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node| BTREE_TRIGGER_norun); @@ -707,7 +713,7 @@ err: struct disk_accounting_pos acc; memset(&acc, 0, sizeof(acc)); acc.type = BCH_DISK_ACCOUNTING_replicas; - memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e)); + unsafe_memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e), "VLA"); gc_stripe_unlock(m); acc.replicas.data_type = data_type; @@ -1132,7 +1138,7 @@ int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c, for_each_online_member(c, ca) { int ret = bch2_trans_mark_dev_sb(c, ca, flags); if (ret) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); return ret; } } @@ -1331,7 +1337,7 @@ void bch2_dev_buckets_free(struct bch_dev *ca) int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) { - ca->usage = alloc_percpu(struct bch_dev_usage); + ca->usage = alloc_percpu(struct bch_dev_usage_full); if (!ca->usage) return -BCH_ERR_ENOMEM_usage_init; diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index c5363256e363..1c38b165f48b 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -172,7 +172,16 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) return ret; } -void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage *); +void bch2_dev_usage_full_read_fast(struct bch_dev *, struct bch_dev_usage_full *); +static inline struct bch_dev_usage_full bch2_dev_usage_full_read(struct bch_dev *ca) +{ + struct bch_dev_usage_full ret; + + bch2_dev_usage_full_read_fast(ca, &ret); + return ret; +} + +void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage_full *); static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark) { @@ -207,7 +216,7 @@ static inline u64 dev_buckets_free(struct bch_dev *ca, enum bch_watermark watermark) { return max_t(s64, 0, - usage.d[BCH_DATA_free].buckets - + usage.buckets[BCH_DATA_free]- ca->nr_open_buckets - bch2_dev_buckets_reserved(ca, watermark)); } @@ -217,10 +226,10 @@ static inline u64 __dev_buckets_available(struct bch_dev *ca, enum bch_watermark watermark) { return max_t(s64, 0, - usage.d[BCH_DATA_free].buckets - + usage.d[BCH_DATA_cached].buckets - + usage.d[BCH_DATA_need_gc_gens].buckets - + usage.d[BCH_DATA_need_discard].buckets + usage.buckets[BCH_DATA_free] + + usage.buckets[BCH_DATA_cached] + + usage.buckets[BCH_DATA_need_gc_gens] + + usage.buckets[BCH_DATA_need_discard] - ca->nr_open_buckets - bch2_dev_buckets_reserved(ca, watermark)); } diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 900b8680c8b5..0aed2500ade3 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -54,7 +54,12 @@ struct bucket_gens { u8 b[] __counted_by(nbuckets); }; +/* Only info on bucket countns: */ struct bch_dev_usage { + u64 buckets[BCH_DATA_NR]; +}; + +struct bch_dev_usage_full { struct bch_dev_usage_type { u64 buckets; u64 sectors; /* _compressed_ sectors: */ diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 584f4a3eb670..5891b3a1e61c 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -350,8 +350,8 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, if (ctx->arg.op == BCH_DATA_OP_scrub) { struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev); if (ca) { - struct bch_dev_usage u; - bch2_dev_usage_read_fast(ca, &u); + struct bch_dev_usage_full u; + bch2_dev_usage_full_read_fast(ca, &u); for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++) if (ctx->arg.scrub.data_types & BIT(i)) e.p.sectors_total += u.d[i].sectors; @@ -473,7 +473,7 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c, struct bch_ioctl_dev_usage __user *user_arg) { struct bch_ioctl_dev_usage arg; - struct bch_dev_usage src; + struct bch_dev_usage_full src; struct bch_dev *ca; unsigned i; @@ -493,7 +493,7 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c, if (IS_ERR(ca)) return PTR_ERR(ca); - src = bch2_dev_usage_read(ca); + src = bch2_dev_usage_full_read(ca); arg.state = ca->mi.state; arg.bucket_size = ca->mi.bucket_size; @@ -514,7 +514,7 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, struct bch_ioctl_dev_usage_v2 __user *user_arg) { struct bch_ioctl_dev_usage_v2 arg; - struct bch_dev_usage src; + struct bch_dev_usage_full src; struct bch_dev *ca; int ret = 0; @@ -534,7 +534,7 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, if (IS_ERR(ca)) return PTR_ERR(ca); - src = bch2_dev_usage_read(ca); + src = bch2_dev_usage_full_read(ca); arg.state = ca->mi.state; arg.bucket_size = ca->mi.bucket_size; @@ -615,7 +615,7 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c, for_each_online_member(c, ca) if (ca->dev == dev) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); return ca->dev_idx; } diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c index 1f8e035d7119..d6dd12d74d4f 100644 --- a/fs/bcachefs/clock.c +++ b/fs/bcachefs/clock.c @@ -121,7 +121,7 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock, } while (0); __set_current_state(TASK_RUNNING); - del_timer_sync(&wait.cpu_timer); + timer_delete_sync(&wait.cpu_timer); destroy_timer_on_stack(&wait.cpu_timer); bch2_io_timer_del(clock, &wait.io_timer); } diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index 85fc90342492..28ed32449913 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -371,13 +371,14 @@ static int attempt_compress(struct bch_fs *c, }; zlib_set_workspace(&strm, workspace); - zlib_deflateInit2(&strm, + if (zlib_deflateInit2(&strm, compression.level ? clamp_t(unsigned, compression.level, Z_BEST_SPEED, Z_BEST_COMPRESSION) : Z_DEFAULT_COMPRESSION, Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, - Z_DEFAULT_STRATEGY); + Z_DEFAULT_STRATEGY) != Z_OK) + return 0; if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END) return 0; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index fe400dfc5d76..de02ebf847ec 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -216,7 +216,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, bch2_trans_begin(trans); - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); ret = bkey_err(k); if (ret) goto err; @@ -398,7 +398,7 @@ restart_drop_extra_replicas: BCH_TRANS_COMMIT_no_enospc| m->data_opts.btree_insert_flags); if (!ret) { - bch2_btree_iter_set_pos(&iter, next_pos); + bch2_btree_iter_set_pos(trans, &iter, next_pos); this_cpu_add(c->counters[BCH_COUNTER_io_move_finish], new->k.size); if (trace_io_move_finish_enabled()) @@ -426,7 +426,7 @@ nowork: count_event(c, io_move_fail); - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); goto next; } out: @@ -497,7 +497,7 @@ static int bch2_update_unwritten_extent(struct btree_trans *trans, bch2_trans_iter_init(trans, &iter, update->btree_id, update->op.pos, BTREE_ITER_slots); ret = lockrestart_do(trans, ({ - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); bkey_err(k); })); bch2_trans_iter_exit(trans, &iter); diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 788af88f6979..5a8bc7013512 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -57,7 +57,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, submit_bio_wait(bio); bio_put(bio); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); memcpy(n_ondisk, n_sorted, btree_buf_bytes(b)); @@ -297,7 +297,7 @@ out: if (bio) bio_put(bio); kvfree(n_ondisk); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); } #ifdef CONFIG_DEBUG_FS diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index d7f9f79318a2..bf53a029f356 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -417,8 +417,8 @@ int bch2_dirent_rename(struct btree_trans *trans, enum bch_rename_mode mode) { struct qstr src_name_lookup, dst_name_lookup; - struct btree_iter src_iter = { NULL }; - struct btree_iter dst_iter = { NULL }; + struct btree_iter src_iter = {}; + struct btree_iter dst_iter = {}; struct bkey_s_c old_src, old_dst = bkey_s_c_null; struct bkey_i_dirent *new_src = NULL, *new_dst = NULL; struct bpos dst_pos = @@ -586,16 +586,16 @@ out_set_src: } if (delete_src) { - bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot); - ret = bch2_btree_iter_traverse(&src_iter) ?: + bch2_btree_iter_set_snapshot(trans, &src_iter, old_src.k->p.snapshot); + ret = bch2_btree_iter_traverse(trans, &src_iter) ?: bch2_btree_delete_at(trans, &src_iter, BTREE_UPDATE_internal_snapshot_node); if (ret) goto out; } if (delete_dst) { - bch2_btree_iter_set_snapshot(&dst_iter, old_dst.k->p.snapshot); - ret = bch2_btree_iter_traverse(&dst_iter) ?: + bch2_btree_iter_set_snapshot(trans, &dst_iter, old_dst.k->p.snapshot); + ret = bch2_btree_iter_traverse(trans, &dst_iter) ?: bch2_btree_delete_at(trans, &dst_iter, BTREE_UPDATE_internal_snapshot_node); if (ret) goto out; @@ -642,7 +642,7 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir, const struct qstr *name, subvol_inum *inum) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; int ret = lockrestart_do(trans, bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0)); @@ -771,7 +771,7 @@ int bch2_fsck_remove_dirent(struct btree_trans *trans, struct bpos pos) bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent); - ret = bch2_btree_iter_traverse(&iter) ?: + ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_hash_delete_at(trans, bch2_dirent_hash_desc, &dir_hash_info, &iter, BTREE_UPDATE_internal_snapshot_node); diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index a59f6c12529b..b007319b72e9 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -739,7 +739,7 @@ int bch2_accounting_read(struct bch_fs *c) struct disk_accounting_pos next; memset(&next, 0, sizeof(next)); next.type = acc_k.type + 1; - bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); + bch2_btree_iter_set_pos(trans, &iter, disk_accounting_pos_to_bpos(&next)); continue; } @@ -930,7 +930,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c) struct disk_accounting_pos next; memset(&next, 0, sizeof(next)); next.type = acc_k.type + 1; - bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); + bch2_btree_iter_set_pos(trans, &iter, disk_accounting_pos_to_bpos(&next)); continue; } diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index 5df8de0b8c02..1186280b29e9 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -555,9 +555,9 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) ? rcu_dereference(c->devs[t.dev]) : NULL; - if (ca && percpu_ref_tryget(&ca->io_ref)) { + if (ca && percpu_ref_tryget(&ca->io_ref[READ])) { prt_printf(out, "/dev/%s", ca->name); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); } else if (ca) { prt_printf(out, "offline device %u", t.dev); } else { diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 6faeda7ad03d..a396865e8b17 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -105,6 +105,7 @@ struct ec_bio { struct bch_dev *ca; struct ec_stripe_buf *buf; size_t idx; + int rw; u64 submit_time; struct bio bio; }; @@ -462,7 +463,8 @@ int bch2_trigger_stripe(struct btree_trans *trans, return ret; if (gc) - memcpy(&gc->r.e, &acc.replicas, replicas_entry_bytes(&acc.replicas)); + unsafe_memcpy(&gc->r.e, &acc.replicas, + replicas_entry_bytes(&acc.replicas), "VLA"); } if (old_s) { @@ -703,6 +705,7 @@ static void ec_block_endio(struct bio *bio) struct bch_extent_ptr *ptr = &v->ptrs[ec_bio->idx]; struct bch_dev *ca = ec_bio->ca; struct closure *cl = bio->bi_private; + int rw = ec_bio->rw; bch2_account_io_completion(ca, bio_data_dir(bio), ec_bio->submit_time, !bio->bi_status); @@ -724,7 +727,7 @@ static void ec_block_endio(struct bio *bio) } bio_put(&ec_bio->bio); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[rw]); closure_put(cl); } @@ -775,6 +778,7 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, ec_bio->ca = ca; ec_bio->buf = buf; ec_bio->idx = idx; + ec_bio->rw = rw; ec_bio->submit_time = local_clock(); ec_bio->bio.bi_iter.bi_sector = ptr->offset + buf->offset + (offset >> 9); @@ -784,14 +788,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b); closure_get(cl); - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[rw]); submit_bio(&ec_bio->bio); offset += b; } - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[rw]); } static int get_stripe_key_trans(struct btree_trans *trans, u64 idx, @@ -1264,7 +1268,7 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c, ob->sectors_free, GFP_KERNEL, 0); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); if (ret) s->err = ret; @@ -1836,7 +1840,7 @@ static int __get_existing_stripe(struct btree_trans *trans, ret = 1; } out: - bch2_set_btree_iter_dontneed(&iter); + bch2_set_btree_iter_dontneed(trans, &iter); err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -1949,7 +1953,7 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st if (bkey_gt(k.k->p, POS(0, U32_MAX))) { if (start_pos.offset) { start_pos = min_pos; - bch2_btree_iter_set_pos(&iter, start_pos); + bch2_btree_iter_set_pos(trans, &iter, start_pos); continue; } diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index d4dfd13a8076..baf5dfb32298 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -34,7 +34,7 @@ bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out) journal_cur_seq(&c->journal)); return true; case BCH_ON_ERROR_panic: - bch2_print_string_as_lines(KERN_ERR, out->buf); + bch2_print_string_as_lines_nonblocking(KERN_ERR, out->buf); panic(bch2_fmt(c, "panic after error")); return true; default: @@ -45,6 +45,8 @@ bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out) bool bch2_inconsistent_error(struct bch_fs *c) { struct printbuf buf = PRINTBUF; + buf.atomic++; + printbuf_indent_add_nextline(&buf, 2); bool ret = __bch2_inconsistent_error(c, &buf); @@ -59,6 +61,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra const char *fmt, va_list args) { struct printbuf buf = PRINTBUF; + buf.atomic++; bch2_log_msg_start(c, &buf); @@ -68,7 +71,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra if (trans) bch2_trans_updates_to_text(&buf, trans); bool ret = __bch2_inconsistent_error(c, &buf); - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf); printbuf_exit(&buf); return ret; diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index 6aac579a692a..6bb42985306e 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -112,7 +112,7 @@ int bch2_extent_atomic_end(struct btree_trans *trans, unsigned nr_iters = 0; int ret; - ret = bch2_btree_iter_traverse(iter); + ret = bch2_btree_iter_traverse(trans, iter); if (ret) return ret; @@ -126,9 +126,9 @@ int bch2_extent_atomic_end(struct btree_trans *trans, if (ret < 0) return ret; - bch2_trans_copy_iter(©, iter); + bch2_trans_copy_iter(trans, ©, iter); - for_each_btree_key_max_continue_norestart(copy, insert->k.p, 0, k, ret) { + for_each_btree_key_max_continue_norestart(trans, copy, insert->k.p, 0, k, ret) { unsigned offset = 0; if (bkey_gt(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index a03e2c780cba..19d4599918dc 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -183,12 +183,12 @@ static void bchfs_read(struct btree_trans *trans, if (ret) goto err; - bch2_btree_iter_set_snapshot(&iter, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter, snapshot); - bch2_btree_iter_set_pos(&iter, + bch2_btree_iter_set_pos(trans, &iter, POS(inum.inum, rbio->bio.bi_iter.bi_sector)); - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); ret = bkey_err(k); if (ret) goto err; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index c80ed3a54e70..65c2c33d253d 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -48,7 +48,7 @@ static void nocow_flush_endio(struct bio *_bio) struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio); closure_put(bio->cl); - percpu_ref_put(&bio->ca->io_ref); + percpu_ref_put(&bio->ca->io_ref[WRITE]); bio_put(&bio->bio); } @@ -71,7 +71,7 @@ void bch2_inode_flush_nocow_writes_async(struct bch_fs *c, for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) { rcu_read_lock(); ca = rcu_dereference(c->devs[dev]); - if (ca && !percpu_ref_tryget(&ca->io_ref)) + if (ca && !percpu_ref_tryget(&ca->io_ref[WRITE])) ca = NULL; rcu_read_unlock(); @@ -636,9 +636,9 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, if (ret) goto bkey_err; - bch2_btree_iter_set_snapshot(&iter, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter, snapshot); - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); if ((ret = bkey_err(k))) goto bkey_err; @@ -649,13 +649,13 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, /* already reserved */ if (bkey_extent_is_reservation(k) && bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) { - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); continue; } if (bkey_extent_is_data(k.k) && !(mode & FALLOC_FL_ZERO_RANGE)) { - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); continue; } @@ -676,7 +676,7 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, if (ret) goto bkey_err; } - bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start)); + bch2_btree_iter_set_pos(trans, &iter, POS(iter.pos.inode, hole_start)); if (ret) goto bkey_err; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index fc834bdf1f52..5a41b1a8e54f 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -88,7 +88,7 @@ int __must_check bch2_write_inode(struct bch_fs *c, void *p, unsigned fields) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct bch_inode_unpacked inode_u; int ret; retry: @@ -1075,7 +1075,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_qid qid; struct btree_trans *trans; - struct btree_iter inode_iter = { NULL }; + struct btree_iter inode_iter = {}; struct bch_inode_unpacked inode_u; struct posix_acl *acl = NULL; kuid_t kuid; @@ -1330,9 +1330,9 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (ret) continue; - bch2_btree_iter_set_snapshot(&iter, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter, snapshot); - k = bch2_btree_iter_peek_max(&iter, end); + k = bch2_btree_iter_peek_max(trans, &iter, end); ret = bkey_err(k); if (ret) continue; @@ -1342,7 +1342,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (!bkey_extent_is_data(k.k) && k.k->type != KEY_TYPE_reservation) { - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); continue; } @@ -1380,7 +1380,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bkey_copy(prev.k, cur.k); have_extent = true; - bch2_btree_iter_set_pos(&iter, + bch2_btree_iter_set_pos(trans, &iter, POS(iter.pos.inode, iter.pos.offset + sectors)); } bch2_trans_iter_exit(trans, &iter); @@ -1697,17 +1697,17 @@ retry: if (ret) goto err; - bch2_btree_iter_set_snapshot(&iter1, snapshot); - bch2_btree_iter_set_snapshot(&iter2, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter1, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter2, snapshot); ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u); if (ret) goto err; if (inode_u.bi_dir == dir->ei_inode.bi_inum) { - bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset)); + bch2_btree_iter_set_pos(trans, &iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset)); - k = bch2_btree_iter_peek_slot(&iter1); + k = bch2_btree_iter_peek_slot(trans, &iter1); ret = bkey_err(k); if (ret) goto err; @@ -1731,7 +1731,7 @@ retry: * File with multiple hardlinks and our backref is to the wrong * directory - linear search: */ - for_each_btree_key_continue_norestart(iter2, 0, k, ret) { + for_each_btree_key_continue_norestart(trans, iter2, 0, k, ret) { if (k.k->p.inode > dir->ei_inode.bi_inum) break; @@ -2237,7 +2237,7 @@ got_sb: /* XXX: create an anonymous device for multi device filesystems */ sb->s_bdev = bdev; sb->s_dev = bdev->bd_dev; - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); break; } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 52320295dcf6..18308f3d64a1 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -186,7 +186,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, { struct bch_fs *c = trans->c; struct qstr lostfound_str = QSTR("lost+found"); - struct btree_iter lostfound_iter = { NULL }; + struct btree_iter lostfound_iter = {}; u64 inum = 0; unsigned d_type = 0; int ret; @@ -295,8 +295,8 @@ create_lostfound: if (ret) goto err; - bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot); - ret = bch2_btree_iter_traverse(&lostfound_iter); + bch2_btree_iter_set_snapshot(trans, &lostfound_iter, snapshot); + ret = bch2_btree_iter_traverse(trans, &lostfound_iter); if (ret) goto err; @@ -544,7 +544,7 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub new_inode.bi_subvol = subvolid; int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?: - bch2_btree_iter_traverse(&inode_iter) ?: + bch2_btree_iter_traverse(trans, &inode_iter) ?: bch2_inode_write(trans, &inode_iter, &new_inode); bch2_trans_iter_exit(trans, &inode_iter); if (ret) @@ -609,7 +609,7 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 struct btree_iter iter = {}; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); - struct bkey_s_c k = bch2_btree_iter_peek_prev_min(&iter, POS(inum, 0)); + struct bkey_s_c k = bch2_btree_iter_peek_prev_min(trans, &iter, POS(inum, 0)); bch2_trans_iter_exit(trans, &iter); int ret = bkey_err(k); if (ret) @@ -1557,7 +1557,7 @@ static int overlapping_extents_found(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; - struct btree_iter iter1, iter2 = { NULL }; + struct btree_iter iter1, iter2 = {}; struct bkey_s_c k1, k2; int ret; @@ -1566,7 +1566,7 @@ static int overlapping_extents_found(struct btree_trans *trans, bch2_trans_iter_init(trans, &iter1, btree, pos1, BTREE_ITER_all_snapshots| BTREE_ITER_not_extents); - k1 = bch2_btree_iter_peek_max(&iter1, POS(pos1.inode, U64_MAX)); + k1 = bch2_btree_iter_peek_max(trans, &iter1, POS(pos1.inode, U64_MAX)); ret = bkey_err(k1); if (ret) goto err; @@ -1586,12 +1586,12 @@ static int overlapping_extents_found(struct btree_trans *trans, goto err; } - bch2_trans_copy_iter(&iter2, &iter1); + bch2_trans_copy_iter(trans, &iter2, &iter1); while (1) { - bch2_btree_iter_advance(&iter2); + bch2_btree_iter_advance(trans, &iter2); - k2 = bch2_btree_iter_peek_max(&iter2, POS(pos1.inode, U64_MAX)); + k2 = bch2_btree_iter_peek_max(trans, &iter2, POS(pos1.inode, U64_MAX)); ret = bkey_err(k2); if (ret) goto err; @@ -1791,9 +1791,9 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { struct btree_iter iter2; - bch2_trans_copy_iter(&iter2, iter); - bch2_btree_iter_set_snapshot(&iter2, i->snapshot); - ret = bch2_btree_iter_traverse(&iter2) ?: + bch2_trans_copy_iter(trans, &iter2, iter); + bch2_btree_iter_set_snapshot(trans, &iter2, i->snapshot); + ret = bch2_btree_iter_traverse(trans, &iter2) ?: bch2_btree_delete_at(trans, &iter2, BTREE_UPDATE_internal_snapshot_node); bch2_trans_iter_exit(trans, &iter2); @@ -2185,7 +2185,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, BTREE_ID_dirents, SPOS(k.k->p.inode, k.k->p.offset, *i), BTREE_ITER_intent); - ret = bch2_btree_iter_traverse(&delete_iter) ?: + ret = bch2_btree_iter_traverse(trans, &delete_iter) ?: bch2_hash_delete_at(trans, bch2_dirent_hash_desc, hash_info, &delete_iter, @@ -2412,7 +2412,7 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, bch2_trans_iter_exit(trans, &parent_iter); bch2_trans_iter_init(trans, &parent_iter, BTREE_ID_subvolumes, POS(0, parent), 0); - k = bch2_btree_iter_peek_slot(&parent_iter); + k = bch2_btree_iter_peek_slot(trans, &parent_iter); ret = bkey_err(k); if (ret) goto err; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 80051073f613..b51d98cf8a80 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -940,7 +940,7 @@ int bch2_inode_create(struct btree_trans *trans, BTREE_ITER_intent); struct bkey_s_c k; again: - while ((k = bch2_btree_iter_peek(iter)).k && + while ((k = bch2_btree_iter_peek(trans, iter)).k && !(ret = bkey_err(k)) && bkey_lt(k.k->p, POS(0, max))) { if (pos < iter->pos.offset) @@ -951,7 +951,7 @@ again: * we've found just one: */ pos = iter->pos.offset + 1; - bch2_btree_iter_set_pos(iter, POS(0, pos)); + bch2_btree_iter_set_pos(trans, iter, POS(0, pos)); } if (!ret && pos < max) @@ -967,12 +967,12 @@ again: /* Retry from start */ pos = start = min; - bch2_btree_iter_set_pos(iter, POS(0, pos)); + bch2_btree_iter_set_pos(trans, iter, POS(0, pos)); le32_add_cpu(&cursor->v.gen, 1); goto again; found_slot: - bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot)); - k = bch2_btree_iter_peek_slot(iter); + bch2_btree_iter_set_pos(trans, iter, SPOS(0, pos, snapshot)); + k = bch2_btree_iter_peek_slot(trans, iter); ret = bkey_err(k); if (ret) { bch2_trans_iter_exit(trans, iter); @@ -1009,9 +1009,9 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, if (ret) goto err; - bch2_btree_iter_set_snapshot(&iter, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter, snapshot); - k = bch2_btree_iter_peek_max(&iter, end); + k = bch2_btree_iter_peek_max(trans, &iter, end); ret = bkey_err(k); if (ret) goto err; @@ -1042,7 +1042,7 @@ err: int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) { struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct bkey_s_c k; u32 snapshot; int ret; @@ -1207,7 +1207,7 @@ int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_i static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) { struct bch_fs *c = trans->c; - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct bkey_i_inode_generation delete; struct bch_inode_unpacked inode_u; struct bkey_s_c k; diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 6b842c8d21be..cc07729a4b62 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -43,7 +43,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, bch2_bkey_buf_init(&new); closure_init_stack(&cl); - k = bch2_btree_iter_peek_slot(iter); + k = bch2_btree_iter_peek_slot(trans, iter); ret = bkey_err(k); if (ret) return ret; @@ -164,12 +164,12 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, if (ret) continue; - bch2_btree_iter_set_snapshot(iter, snapshot); + bch2_btree_iter_set_snapshot(trans, iter, snapshot); /* * peek_max() doesn't have ideal semantics for extents: */ - k = bch2_btree_iter_peek_max(iter, end_pos); + k = bch2_btree_iter_peek_max(trans, iter, end_pos); if (!k.k) break; @@ -230,7 +230,7 @@ static int truncate_set_isize(struct btree_trans *trans, u64 new_i_size, bool warn) { - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct bch_inode_unpacked inode_u; int ret; @@ -399,7 +399,7 @@ case LOGGED_OP_FINSERT_start: if (ret) goto err; } else { - bch2_btree_iter_set_pos(&iter, POS(inum.inum, src_offset)); + bch2_btree_iter_set_pos(trans, &iter, POS(inum.inum, src_offset)); ret = bch2_fpunch_at(trans, &iter, inum, src_offset + len, i_sectors_delta); if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -425,12 +425,12 @@ case LOGGED_OP_FINSERT_shift_extents: if (ret) goto btree_err; - bch2_btree_iter_set_snapshot(&iter, snapshot); - bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot)); + bch2_btree_iter_set_snapshot(trans, &iter, snapshot); + bch2_btree_iter_set_pos(trans, &iter, SPOS(inum.inum, pos, snapshot)); k = insert - ? bch2_btree_iter_peek_prev_min(&iter, POS(inum.inum, 0)) - : bch2_btree_iter_peek_max(&iter, POS(inum.inum, U64_MAX)); + ? bch2_btree_iter_peek_prev_min(trans, &iter, POS(inum.inum, 0)) + : bch2_btree_iter_peek_max(trans, &iter, POS(inum.inum, U64_MAX)); if ((ret = bkey_err(k))) goto btree_err; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index fd01e67b3e84..417bb0c7bbfa 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -394,7 +394,7 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) if (rbio->have_ioref) { struct bch_dev *ca = bch2_dev_have_ref(rbio->c, rbio->pick.ptr.dev); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); } if (rbio->split) { @@ -909,7 +909,7 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, prt_printf(&buf, "memory gen: %u", gen); - ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); + ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(trans, &iter))); if (!ret) { prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, k); @@ -1003,7 +1003,7 @@ retry_pick: unlikely(dev_ptr_stale(ca, &pick.ptr))) { read_from_stale_dirty_pointer(trans, ca, k, pick.ptr); bch2_mark_io_failure(failed, &pick, false); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); goto retry_pick; } @@ -1036,7 +1036,7 @@ retry_pick: */ if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) { if (ca) - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); rbio->ret = -BCH_ERR_data_read_buffer_too_small; goto out_read_done; } @@ -1285,12 +1285,12 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, if (ret) goto err; - bch2_btree_iter_set_snapshot(&iter, snapshot); + bch2_btree_iter_set_snapshot(trans, &iter, snapshot); - bch2_btree_iter_set_pos(&iter, + bch2_btree_iter_set_pos(trans, &iter, POS(inum.inum, bvec_iter.bi_sector)); - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); ret = bkey_err(k); if (ret) goto err; diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 07b55839768e..a418fa62f09d 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -168,9 +168,9 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, *i_sectors_delta = 0; *disk_sectors_delta = 0; - bch2_trans_copy_iter(&iter, extent_iter); + bch2_trans_copy_iter(trans, &iter, extent_iter); - for_each_btree_key_max_continue_norestart(iter, + for_each_btree_key_max_continue_norestart(trans, iter, new->k.p, BTREE_ITER_slots, old, ret) { s64 sectors = min(new->k.p.offset, old.k->p.offset) - max(bkey_start_offset(&new->k), @@ -292,7 +292,7 @@ int bch2_extent_update(struct btree_trans *trans, * path already traversed at iter->pos because * bch2_trans_extent_update() will use it to attempt extent merging */ - ret = __bch2_btree_iter_traverse(iter); + ret = __bch2_btree_iter_traverse(trans, iter); if (ret) return ret; @@ -337,7 +337,7 @@ int bch2_extent_update(struct btree_trans *trans, if (i_sectors_delta_total) *i_sectors_delta_total += i_sectors_delta; - bch2_btree_iter_set_pos(iter, next_pos); + bch2_btree_iter_set_pos(trans, iter, next_pos); return 0; } @@ -445,6 +445,11 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, BUG_ON(c->opts.nochanges); bkey_for_each_ptr(ptrs, ptr) { + /* + * XXX: btree writes should be using io_ref[WRITE], but we + * aren't retrying failed btree writes yet (due to device + * removal/ro): + */ struct bch_dev *ca = nocow ? bch2_dev_have_ref(c, ptr->dev) : bch2_dev_get_ioref(c, ptr->dev, type == BCH_DATA_btree ? READ : WRITE); @@ -697,12 +702,19 @@ static void bch2_write_endio(struct bio *bio) bch2_account_io_completion(ca, BCH_MEMBER_ERROR_write, wbio->submit_time, !bio->bi_status); - if (bio->bi_status) { - bch_err_inum_offset_ratelimited(ca, - op->pos.inode, - wbio->inode_offset << 9, - "data write error: %s", - bch2_blk_status_to_str(bio->bi_status)); + if (unlikely(bio->bi_status)) { + if (ca) + bch_err_inum_offset_ratelimited(ca, + op->pos.inode, + wbio->inode_offset << 9, + "data write error: %s", + bch2_blk_status_to_str(bio->bi_status)); + else + bch_err_inum_offset_ratelimited(c, + op->pos.inode, + wbio->inode_offset << 9, + "data write error: %s", + bch2_blk_status_to_str(bio->bi_status)); set_bit(wbio->dev, op->failed.d); op->flags |= BCH_WRITE_io_error; } @@ -715,7 +727,7 @@ static void bch2_write_endio(struct bio *bio) } if (wbio->have_ioref) - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); if (wbio->bounce) bch2_bio_free_pages_pool(c, bio); @@ -1293,7 +1305,7 @@ retry: if (ret) break; - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); ret = bkey_err(k); if (ret) break; @@ -1377,7 +1389,7 @@ retry: bch2_keylist_push(&op->insert_keys); if (op->flags & BCH_WRITE_submitted) break; - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); } out: bch2_trans_iter_exit(trans, &iter); @@ -1414,7 +1426,7 @@ err: return; err_get_ioref: darray_for_each(buckets, i) - percpu_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref); + percpu_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref[WRITE]); /* Fall back to COW path: */ goto out; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 8a36d5536668..d8f74b6d0a75 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1315,7 +1315,7 @@ int bch2_fs_journal_alloc(struct bch_fs *c) int ret = bch2_dev_journal_alloc(ca, true); if (ret) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); return ret; } } @@ -1404,6 +1404,14 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) nr = cur_seq - last_seq; + /* + * Extra fudge factor, in case we crashed when the journal pin fifo was + * nearly or completely full. We'll need to be able to open additional + * journal entries (at least a few) in order for journal replay to get + * going: + */ + nr += nr / 4; + if (nr + 1 > j->pin.size) { free_fifo(&j->pin); init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL); @@ -1461,11 +1469,9 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) j->reservations.idx = journal_cur_seq(j); c->last_bucket_seq_cleanup = journal_cur_seq(j); - - bch2_journal_space_available(j); spin_unlock(&j->lock); - return bch2_journal_reclaim_start(j); + return 0; } /* init/exit: */ diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 2debc213e47c..1b7961f4f609 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1218,7 +1218,7 @@ static CLOSURE_CALLBACK(bch2_journal_read_device) out: bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret); kvfree(buf.data); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); closure_return(cl); return; err: @@ -1253,7 +1253,7 @@ int bch2_journal_read(struct bch_fs *c, if ((ca->mi.state == BCH_MEMBER_STATE_rw || ca->mi.state == BCH_MEMBER_STATE_ro) && - percpu_ref_tryget(&ca->io_ref)) + percpu_ref_tryget(&ca->io_ref[READ])) closure_call(&ca->journal.read, bch2_journal_read_device, system_unbound_wq, @@ -1768,7 +1768,7 @@ static void journal_write_endio(struct bio *bio) } closure_put(&w->io); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[WRITE]); } static CLOSURE_CALLBACK(journal_write_submit) @@ -1843,7 +1843,7 @@ static CLOSURE_CALLBACK(journal_write_preflush) if (w->separate_flush) { for_each_rw_member(c, ca) { - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[WRITE]); struct journal_device *ja = &ca->journal; struct bio *bio = &ja->bio[w->idx]->bio; diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 57ad662871ba..90dcf80bd64a 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -130,7 +130,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, retry: ret = 0; while (bch2_trans_begin(trans), - (b = bch2_btree_iter_peek_node(&iter)) && + (b = bch2_btree_iter_peek_node(trans, &iter)) && !(ret = PTR_ERR_OR_ZERO(b))) { bch2_progress_update_iter(trans, progress, &iter, "dropping metadata"); @@ -154,7 +154,7 @@ retry: if (ret) break; next: - bch2_btree_iter_next_node(&iter); + bch2_btree_iter_next_node(trans, &iter); } if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 5d41260e10da..fc396b9fa754 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -545,7 +545,7 @@ static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans * BTREE_ID_reflink, reflink_pos, BTREE_ITER_not_extents); - struct bkey_s_c k = bch2_btree_iter_peek(iter); + struct bkey_s_c k = bch2_btree_iter_peek(trans, iter); if (!k.k || bkey_err(k)) { bch2_trans_iter_exit(trans, iter); return k; @@ -603,7 +603,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, bch2_trans_begin(trans); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(trans, &iter); if (!k.k) break; @@ -681,7 +681,7 @@ next: if (ctxt->stats) atomic64_add(k.k->size, &ctxt->stats->sectors_seen); next_nondata: - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); } bch2_trans_iter_exit(trans, &reflink_iter); @@ -794,7 +794,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, bch2_trans_begin(trans); - k = bch2_btree_iter_peek(&bp_iter); + k = bch2_btree_iter_peek(trans, &bp_iter); ret = bkey_err(k); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; @@ -876,7 +876,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, if (ctxt->stats) atomic64_add(sectors, &ctxt->stats->sectors_seen); next: - bch2_btree_iter_advance(&bp_iter); + bch2_btree_iter_advance(trans, &bp_iter); } err: bch2_trans_iter_exit(trans, &bp_iter); @@ -991,7 +991,7 @@ static int bch2_move_btree(struct bch_fs *c, retry: ret = 0; while (bch2_trans_begin(trans), - (b = bch2_btree_iter_peek_node(&iter)) && + (b = bch2_btree_iter_peek_node(trans, &iter)) && !(ret = PTR_ERR_OR_ZERO(b))) { if (kthread && kthread_should_stop()) break; @@ -1011,7 +1011,7 @@ retry: if (ret) break; next: - bch2_btree_iter_next_node(&iter); + bch2_btree_iter_next_node(trans, &iter); } if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 5126c870ce5b..159410c50861 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -280,7 +280,11 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c) s64 wait = S64_MAX, fragmented_allowed, fragmented; for_each_rw_member(c, ca) { - struct bch_dev_usage usage = bch2_dev_usage_read(ca); + struct bch_dev_usage_full usage_full = bch2_dev_usage_full_read(ca); + struct bch_dev_usage usage; + + for (unsigned i = 0; i < BCH_DATA_NR; i++) + usage.buckets[i] = usage_full.d[i].buckets; fragmented_allowed = ((__dev_buckets_available(ca, usage, BCH_WATERMARK_stripe) * ca->mi.bucket_size) >> 1); @@ -288,7 +292,7 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c) for (unsigned i = 0; i < BCH_DATA_NR; i++) if (data_type_movable(i)) - fragmented += usage.d[i].fragmented; + fragmented += usage_full.d[i].fragmented; wait = min(wait, max(0LL, fragmented_allowed - fragmented)); } diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index ee7251709fb9..0d65ea96f7a2 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -28,8 +28,8 @@ int bch2_create_trans(struct btree_trans *trans, unsigned flags) { struct bch_fs *c = trans->c; - struct btree_iter dir_iter = { NULL }; - struct btree_iter inode_iter = { NULL }; + struct btree_iter dir_iter = {}; + struct btree_iter inode_iter = {}; subvol_inum new_inum = dir; u64 now = bch2_current_time(c); u64 cpu = raw_smp_processor_id(); @@ -127,8 +127,8 @@ int bch2_create_trans(struct btree_trans *trans, if (ret) goto err; - bch2_btree_iter_set_snapshot(&dir_iter, dir_snapshot); - ret = bch2_btree_iter_traverse(&dir_iter); + bch2_btree_iter_set_snapshot(trans, &dir_iter, dir_snapshot); + ret = bch2_btree_iter_traverse(trans, &dir_iter); if (ret) goto err; } @@ -177,9 +177,9 @@ int bch2_create_trans(struct btree_trans *trans, new_inode->bi_depth = dir_u->bi_depth + 1; inode_iter.flags &= ~BTREE_ITER_all_snapshots; - bch2_btree_iter_set_snapshot(&inode_iter, snapshot); + bch2_btree_iter_set_snapshot(trans, &inode_iter, snapshot); - ret = bch2_btree_iter_traverse(&inode_iter) ?: + ret = bch2_btree_iter_traverse(trans, &inode_iter) ?: bch2_inode_write(trans, &inode_iter, new_inode); err: bch2_trans_iter_exit(trans, &inode_iter); @@ -193,8 +193,8 @@ int bch2_link_trans(struct btree_trans *trans, const struct qstr *name) { struct bch_fs *c = trans->c; - struct btree_iter dir_iter = { NULL }; - struct btree_iter inode_iter = { NULL }; + struct btree_iter dir_iter = {}; + struct btree_iter inode_iter = {}; struct bch_hash_info dir_hash; u64 now = bch2_current_time(c); u64 dir_offset = 0; @@ -253,9 +253,9 @@ int bch2_unlink_trans(struct btree_trans *trans, bool deleting_subvol) { struct bch_fs *c = trans->c; - struct btree_iter dir_iter = { NULL }; - struct btree_iter dirent_iter = { NULL }; - struct btree_iter inode_iter = { NULL }; + struct btree_iter dir_iter = {}; + struct btree_iter dirent_iter = {}; + struct btree_iter inode_iter = {}; struct bch_hash_info dir_hash; subvol_inum inum; u64 now = bch2_current_time(c); @@ -301,7 +301,7 @@ int bch2_unlink_trans(struct btree_trans *trans, if (ret) goto err; - k = bch2_btree_iter_peek_slot(&dirent_iter); + k = bch2_btree_iter_peek_slot(trans, &dirent_iter); ret = bkey_err(k); if (ret) goto err; @@ -310,8 +310,8 @@ int bch2_unlink_trans(struct btree_trans *trans, * If we're deleting a subvolume, we need to really delete the * dirent, not just emit a whiteout in the current snapshot: */ - bch2_btree_iter_set_snapshot(&dirent_iter, k.k->p.snapshot); - ret = bch2_btree_iter_traverse(&dirent_iter); + bch2_btree_iter_set_snapshot(trans, &dirent_iter, k.k->p.snapshot); + ret = bch2_btree_iter_traverse(trans, &dirent_iter); if (ret) goto err; } else { @@ -390,10 +390,10 @@ int bch2_rename_trans(struct btree_trans *trans, enum bch_rename_mode mode) { struct bch_fs *c = trans->c; - struct btree_iter src_dir_iter = { NULL }; - struct btree_iter dst_dir_iter = { NULL }; - struct btree_iter src_inode_iter = { NULL }; - struct btree_iter dst_inode_iter = { NULL }; + struct btree_iter src_dir_iter = {}; + struct btree_iter dst_dir_iter = {}; + struct btree_iter src_inode_iter = {}; + struct btree_iter dst_inode_iter = {}; struct bch_hash_info src_hash, dst_hash; subvol_inum src_inum, dst_inum; u64 src_offset, dst_offset; @@ -666,7 +666,7 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; - struct btree_iter bp_iter = { NULL }; + struct btree_iter bp_iter = {}; int ret = 0; if (inode_points_to_dirent(target, d)) diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index 8b857fc33244..3d4755d73af7 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -516,7 +516,7 @@ static int bch2_fs_quota_read_inode(struct btree_trans *trans, bch2_quota_acct(c, bch_qid(&u), Q_INO, 1, KEY_TYPE_QUOTA_NOCHECK); advance: - bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos)); + bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos)); return 0; } diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index b9bde04b66c0..c63fa53f30d2 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -233,7 +233,7 @@ int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum) bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), BTREE_ITER_intent); - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); ret = bkey_err(k); if (ret) goto err; @@ -281,7 +281,7 @@ static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), BTREE_ITER_intent); - k = bch2_btree_iter_peek_slot(&iter); + k = bch2_btree_iter_peek_slot(trans, &iter); ret = bkey_err(k); if (ret) goto err; @@ -301,7 +301,7 @@ static struct bkey_s_c next_rebalance_entry(struct btree_trans *trans, struct btree_iter *work_iter) { return !kthread_should_stop() - ? bch2_btree_iter_peek(work_iter) + ? bch2_btree_iter_peek(trans, work_iter) : bkey_s_c_null; } @@ -335,7 +335,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink, work_pos, BTREE_ITER_all_snapshots); - struct bkey_s_c k = bch2_btree_iter_peek_slot(extent_iter); + struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, extent_iter); if (bkey_err(k)) return k; @@ -511,7 +511,7 @@ static int do_rebalance(struct moving_context *ctxt) struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; struct bch_fs_rebalance *r = &c->rebalance; - struct btree_iter rebalance_work_iter, extent_iter = { NULL }; + struct btree_iter rebalance_work_iter, extent_iter = {}; struct bkey_s_c k; int ret = 0; @@ -552,7 +552,7 @@ static int do_rebalance(struct moving_context *ctxt) if (ret) break; - bch2_btree_iter_advance(&rebalance_work_iter); + bch2_btree_iter_advance(trans, &rebalance_work_iter); } bch2_trans_iter_exit(trans, &extent_iter); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 266c5770c824..79fd18a5a07c 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -198,7 +198,7 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans, bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, k->level, BTREE_ITER_intent); - int ret = bch2_btree_iter_traverse(&iter); + int ret = bch2_btree_iter_traverse(trans, &iter); if (ret) goto out; @@ -261,7 +261,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans, bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, k->level, iter_flags); - ret = bch2_btree_iter_traverse(&iter); + ret = bch2_btree_iter_traverse(trans, &iter); if (ret) goto out; @@ -270,7 +270,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans, bch2_trans_iter_exit(trans, &iter); bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, 0, iter_flags); - ret = bch2_btree_iter_traverse(&iter) ?: + ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_increase_depth(trans, iter.path, 0) ?: -BCH_ERR_transaction_restart_nested; goto out; diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index ee23f1f93acc..710178e3da4c 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -495,7 +495,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, bool reflink_p_may_update_opts_field) { struct bch_fs *c = trans->c; - struct btree_iter reflink_iter = { NULL }; + struct btree_iter reflink_iter = {}; struct bkey_s_c k; struct bkey_i *r_v; struct bkey_i_reflink_p *r_p; @@ -507,7 +507,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, bch2_trans_iter_init(trans, &reflink_iter, BTREE_ID_reflink, POS_MAX, BTREE_ITER_intent); - k = bch2_btree_iter_peek_prev(&reflink_iter); + k = bch2_btree_iter_peek_prev(trans, &reflink_iter); ret = bkey_err(k); if (ret) goto err; @@ -569,12 +569,13 @@ err: return ret; } -static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) +static struct bkey_s_c get_next_src(struct btree_trans *trans, + struct btree_iter *iter, struct bpos end) { struct bkey_s_c k; int ret; - for_each_btree_key_max_continue_norestart(*iter, end, 0, k, ret) { + for_each_btree_key_max_continue_norestart(trans, *iter, end, 0, k, ret) { if (bkey_extent_is_unwritten(k)) continue; @@ -583,7 +584,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) } if (bkey_ge(iter->pos, end)) - bch2_btree_iter_set_pos(iter, end); + bch2_btree_iter_set_pos(trans, iter, end); return ret ? bkey_s_c_err(ret) : bkey_s_c_null; } @@ -647,27 +648,27 @@ s64 bch2_remap_range(struct bch_fs *c, if (ret) continue; - bch2_btree_iter_set_snapshot(&src_iter, src_snapshot); + bch2_btree_iter_set_snapshot(trans, &src_iter, src_snapshot); ret = bch2_subvolume_get_snapshot(trans, dst_inum.subvol, &dst_snapshot); if (ret) continue; - bch2_btree_iter_set_snapshot(&dst_iter, dst_snapshot); + bch2_btree_iter_set_snapshot(trans, &dst_iter, dst_snapshot); if (dst_inum.inum < src_inum.inum) { /* Avoid some lock cycle transaction restarts */ - ret = bch2_btree_iter_traverse(&dst_iter); + ret = bch2_btree_iter_traverse(trans, &dst_iter); if (ret) continue; } dst_done = dst_iter.pos.offset - dst_start.offset; src_want = POS(src_start.inode, src_start.offset + dst_done); - bch2_btree_iter_set_pos(&src_iter, src_want); + bch2_btree_iter_set_pos(trans, &src_iter, src_want); - src_k = get_next_src(&src_iter, src_end); + src_k = get_next_src(trans, &src_iter, src_end); ret = bkey_err(src_k); if (ret) continue; @@ -738,7 +739,7 @@ s64 bch2_remap_range(struct bch_fs *c, do { struct bch_inode_unpacked inode_u; - struct btree_iter inode_iter = { NULL }; + struct btree_iter inode_iter = {}; bch2_trans_begin(trans); diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 38261638a611..06bb41a3f360 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -20,7 +20,7 @@ struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i); static inline bool bch2_dev_is_online(struct bch_dev *ca) { - return !percpu_ref_is_zero(&ca->io_ref); + return !percpu_ref_is_zero(&ca->io_ref[READ]); } static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *, unsigned); @@ -156,33 +156,34 @@ static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c, struct bch_dev *ca, - unsigned state_mask) + unsigned state_mask, + int rw) { rcu_read_lock(); if (ca) - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[rw]); while ((ca = __bch2_next_dev(c, ca, NULL)) && (!((1 << ca->mi.state) & state_mask) || - !percpu_ref_tryget(&ca->io_ref))) + !percpu_ref_tryget(&ca->io_ref[rw]))) ; rcu_read_unlock(); return ca; } -#define __for_each_online_member(_c, _ca, state_mask) \ +#define __for_each_online_member(_c, _ca, state_mask, rw) \ for (struct bch_dev *_ca = NULL; \ - (_ca = bch2_get_next_online_dev(_c, _ca, state_mask));) + (_ca = bch2_get_next_online_dev(_c, _ca, state_mask, rw));) #define for_each_online_member(c, ca) \ - __for_each_online_member(c, ca, ~0) + __for_each_online_member(c, ca, ~0, READ) #define for_each_rw_member(c, ca) \ - __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw)) + __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), WRITE) #define for_each_readable_member(c, ca) \ - __for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_STATE_ro)) + __for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_STATE_ro), READ) static inline bool bch2_dev_exists(const struct bch_fs *c, unsigned dev) { @@ -287,7 +288,7 @@ static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev, rcu_read_lock(); struct bch_dev *ca = bch2_dev_rcu(c, dev); - if (ca && !percpu_ref_tryget(&ca->io_ref)) + if (ca && !percpu_ref_tryget(&ca->io_ref[rw])) ca = NULL; rcu_read_unlock(); @@ -297,7 +298,7 @@ static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev, return ca; if (ca) - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[rw]); return NULL; } diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 0c65065b08ec..b7de29aed839 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -843,9 +843,6 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) { struct bch_fs *c = trans->c; - if (bch2_snapshot_exists(c, id)) - return 0; - /* Do we need to reconstruct the snapshot_tree entry as well? */ struct btree_iter iter; struct bkey_s_c k; @@ -1074,9 +1071,9 @@ static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s) static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) { struct bch_fs *c = trans->c; - struct btree_iter iter, p_iter = (struct btree_iter) { NULL }; - struct btree_iter c_iter = (struct btree_iter) { NULL }; - struct btree_iter tree_iter = (struct btree_iter) { NULL }; + struct btree_iter iter, p_iter = {}; + struct btree_iter c_iter = {}; + struct btree_iter tree_iter = {}; struct bkey_s_c_snapshot s; u32 parent_id, child_id; unsigned i; @@ -1193,13 +1190,13 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots, POS_MIN, BTREE_ITER_intent); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek(trans, &iter); ret = bkey_err(k); if (ret) goto err; for (i = 0; i < nr_snapids; i++) { - k = bch2_btree_iter_prev_slot(&iter); + k = bch2_btree_iter_prev_slot(trans, &iter); ret = bkey_err(k); if (ret) goto err; diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c index 602afca2f5ef..a90bf7b8a2b4 100644 --- a/fs/bcachefs/str_hash.c +++ b/fs/bcachefs/str_hash.c @@ -195,7 +195,7 @@ int __bch2_str_hash_check_key(struct btree_trans *trans, struct btree_iter *k_iter, struct bkey_s_c hash_k) { struct bch_fs *c = trans->c; - struct btree_iter iter = { NULL }; + struct btree_iter iter = {}; struct printbuf buf = PRINTBUF; struct bkey_s_c k; int ret = 0; diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 575ad1e03904..09a354a26c3b 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -231,11 +231,11 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans, struct bkey_s_c k; int ret; - bch2_trans_copy_iter(&iter, start); + bch2_trans_copy_iter(trans, &iter, start); - bch2_btree_iter_advance(&iter); + bch2_btree_iter_advance(trans, &iter); - for_each_btree_key_continue_norestart(iter, BTREE_ITER_slots, k, ret) { + for_each_btree_key_continue_norestart(trans, iter, BTREE_ITER_slots, k, ret) { if (k.k->type != desc.key_type && k.k->type != KEY_TYPE_hash_whiteout) break; @@ -280,7 +280,7 @@ struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans, } if (!slot.path && !(flags & STR_HASH_must_replace)) - bch2_trans_copy_iter(&slot, iter); + bch2_trans_copy_iter(trans, &slot, iter); if (k.k->type != KEY_TYPE_hash_whiteout) goto not_found; diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index cd0d8e5e44e7..5537283d0bea 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -275,7 +275,7 @@ int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol) struct btree_iter iter; bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolume_children, POS(subvol, 0), 0); - struct bkey_s_c k = bch2_btree_iter_peek(&iter); + struct bkey_s_c k = bch2_btree_iter_peek(trans, &iter); bch2_trans_iter_exit(trans, &iter); return bkey_err(k) ?: k.k && k.k->p.inode == subvol @@ -574,7 +574,7 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode, bool ro) { struct bch_fs *c = trans->c; - struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL }; + struct btree_iter dst_iter, src_iter = {}; struct bkey_i_subvolume *new_subvol = NULL; struct bkey_i_subvolume *src_subvol = NULL; u32 parent = 0, new_nodes[2], snapshot_subvols[2]; diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index 910f6196700e..f640c1e3d639 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -33,16 +33,16 @@ int bch2_subvol_is_ro_trans(struct btree_trans *, u32); int bch2_subvol_is_ro(struct bch_fs *, u32); static inline struct bkey_s_c -bch2_btree_iter_peek_in_subvolume_max_type(struct btree_iter *iter, struct bpos end, - u32 subvolid, unsigned flags) +bch2_btree_iter_peek_in_subvolume_max_type(struct btree_trans *trans, struct btree_iter *iter, + struct bpos end, u32 subvolid, unsigned flags) { u32 snapshot; - int ret = bch2_subvolume_get_snapshot(iter->trans, subvolid, &snapshot); + int ret = bch2_subvolume_get_snapshot(trans, subvolid, &snapshot); if (ret) return bkey_s_c_err(ret); - bch2_btree_iter_set_snapshot(iter, snapshot); - return bch2_btree_iter_peek_max_type(iter, end, flags); + bch2_btree_iter_set_snapshot(trans, iter, snapshot); + return bch2_btree_iter_peek_max_type(trans, iter, end, flags); } #define for_each_btree_key_in_subvolume_max_continue(_trans, _iter, \ @@ -53,14 +53,14 @@ bch2_btree_iter_peek_in_subvolume_max_type(struct btree_iter *iter, struct bpos \ do { \ _ret3 = lockrestart_do(_trans, ({ \ - (_k) = bch2_btree_iter_peek_in_subvolume_max_type(&(_iter), \ + (_k) = bch2_btree_iter_peek_in_subvolume_max_type(trans, &(_iter),\ _end, _subvolid, (_flags)); \ if (!(_k).k) \ break; \ \ bkey_err(_k) ?: (_do); \ })); \ - } while (!_ret3 && bch2_btree_iter_advance(&(_iter))); \ + } while (!_ret3 && bch2_btree_iter_advance(_trans, &(_iter))); \ \ bch2_trans_iter_exit((_trans), &(_iter)); \ _ret3; \ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 572b06bfa0b8..e27422b6d9c6 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -248,7 +248,7 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, struct bch_sb_handle *dev_sb = &ca->disk_sb; if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); return NULL; } } @@ -945,7 +945,7 @@ static void write_super_endio(struct bio *bio) } closure_put(&ca->fs->sb_write); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); } static void read_back_super(struct bch_fs *c, struct bch_dev *ca) @@ -963,7 +963,7 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca) this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio)); - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[READ]); closure_bio_submit(bio, &c->sb_write); } @@ -989,7 +989,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb], bio_sectors(bio)); - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[READ]); closure_bio_submit(bio, &c->sb_write); } @@ -1014,13 +1014,20 @@ int bch2_write_super(struct bch_fs *c) closure_init_stack(cl); memset(&sb_written, 0, sizeof(sb_written)); + /* + * Note: we do writes to RO devices here, and we might want to change + * that in the future. + * + * For now, we expect to be able to call write_super() when we're not + * yet RW: + */ for_each_online_member(c, ca) { ret = darray_push(&online_devices, ca); if (bch2_fs_fatal_err_on(ret, c, "%s: error allocating online devices", __func__)) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); goto out; } - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[READ]); } /* Make sure we're using the new magic numbers: */ @@ -1186,7 +1193,7 @@ out: /* Make new options visible after they're persistent: */ bch2_sb_update(c); darray_for_each(online_devices, ca) - percpu_ref_put(&(*ca)->io_ref); + percpu_ref_put(&(*ca)->io_ref[READ]); darray_exit(&online_devices); printbuf_exit(&err); return ret; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 20208f3c5d8b..a58edde43bee 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -185,6 +185,7 @@ static void bch2_dev_unlink(struct bch_dev *); static void bch2_dev_free(struct bch_dev *); static int bch2_dev_alloc(struct bch_fs *, unsigned); static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *); +static void bch2_dev_io_ref_stop(struct bch_dev *, int); static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *); struct bch_fs *bch2_dev_to_fs(dev_t dev) @@ -294,8 +295,10 @@ static void __bch2_fs_read_only(struct bch_fs *c) /* * After stopping journal: */ - for_each_member_device(c, ca) + for_each_member_device(c, ca) { + bch2_dev_io_ref_stop(ca, WRITE); bch2_dev_allocator_remove(c, ca); + } } #ifndef BCH_WRITE_REF_DEBUG @@ -465,10 +468,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) if (ret) goto err; - ret = bch2_fs_mark_dirty(c); - if (ret) - goto err; - clear_bit(BCH_FS_clean_shutdown, &c->flags); /* @@ -480,10 +479,24 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) set_bit(JOURNAL_need_flush_write, &c->journal.flags); set_bit(JOURNAL_running, &c->journal.flags); - for_each_rw_member(c, ca) + __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) { bch2_dev_allocator_add(c, ca); + percpu_ref_reinit(&ca->io_ref[WRITE]); + } bch2_recalc_capacity(c); + ret = bch2_fs_mark_dirty(c); + if (ret) + goto err; + + spin_lock(&c->journal.lock); + bch2_journal_space_available(&c->journal); + spin_unlock(&c->journal.lock); + + ret = bch2_journal_reclaim_start(&c->journal); + if (ret) + goto err; + set_bit(BCH_FS_rw, &c->flags); set_bit(BCH_FS_was_rw, &c->flags); @@ -495,11 +508,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) atomic_long_inc(&c->writes[i]); } #endif - - ret = bch2_journal_reclaim_start(&c->journal); - if (ret) - goto err; - if (!early) { ret = bch2_fs_read_write_late(c); if (ret) @@ -675,6 +683,7 @@ void bch2_fs_free(struct bch_fs *c) if (ca) { EBUG_ON(atomic_long_read(&ca->ref) != 1); + bch2_dev_io_ref_stop(ca, READ); bch2_free_super(&ca->disk_sb); bch2_dev_free(ca); } @@ -1199,6 +1208,15 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, /* Device startup/shutdown: */ +static void bch2_dev_io_ref_stop(struct bch_dev *ca, int rw) +{ + if (!percpu_ref_is_zero(&ca->io_ref[rw])) { + reinit_completion(&ca->io_ref_completion[rw]); + percpu_ref_kill(&ca->io_ref[rw]); + wait_for_completion(&ca->io_ref_completion[rw]); + } +} + static void bch2_dev_release(struct kobject *kobj) { struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj); @@ -1208,6 +1226,9 @@ static void bch2_dev_release(struct kobject *kobj) static void bch2_dev_free(struct bch_dev *ca) { + WARN_ON(!percpu_ref_is_zero(&ca->io_ref[WRITE])); + WARN_ON(!percpu_ref_is_zero(&ca->io_ref[READ])); + cancel_work_sync(&ca->io_error_work); bch2_dev_unlink(ca); @@ -1226,7 +1247,8 @@ static void bch2_dev_free(struct bch_dev *ca) bch2_time_stats_quantiles_exit(&ca->io_latency[WRITE]); bch2_time_stats_quantiles_exit(&ca->io_latency[READ]); - percpu_ref_exit(&ca->io_ref); + percpu_ref_exit(&ca->io_ref[WRITE]); + percpu_ref_exit(&ca->io_ref[READ]); #ifndef CONFIG_BCACHEFS_DEBUG percpu_ref_exit(&ca->ref); #endif @@ -1238,14 +1260,12 @@ static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca) lockdep_assert_held(&c->state_lock); - if (percpu_ref_is_zero(&ca->io_ref)) + if (percpu_ref_is_zero(&ca->io_ref[READ])) return; __bch2_dev_read_only(c, ca); - reinit_completion(&ca->io_ref_completion); - percpu_ref_kill(&ca->io_ref); - wait_for_completion(&ca->io_ref_completion); + bch2_dev_io_ref_stop(ca, READ); bch2_dev_unlink(ca); @@ -1262,11 +1282,18 @@ static void bch2_dev_ref_complete(struct percpu_ref *ref) } #endif -static void bch2_dev_io_ref_complete(struct percpu_ref *ref) +static void bch2_dev_io_ref_read_complete(struct percpu_ref *ref) +{ + struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref[READ]); + + complete(&ca->io_ref_completion[READ]); +} + +static void bch2_dev_io_ref_write_complete(struct percpu_ref *ref) { - struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref); + struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref[WRITE]); - complete(&ca->io_ref_completion); + complete(&ca->io_ref_completion[WRITE]); } static void bch2_dev_unlink(struct bch_dev *ca) @@ -1330,7 +1357,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, kobject_init(&ca->kobj, &bch2_dev_ktype); init_completion(&ca->ref_completion); - init_completion(&ca->io_ref_completion); + init_completion(&ca->io_ref_completion[READ]); + init_completion(&ca->io_ref_completion[WRITE]); INIT_WORK(&ca->io_error_work, bch2_io_error_work); @@ -1356,7 +1384,9 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, bch2_dev_allocator_background_init(ca); - if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete, + if (percpu_ref_init(&ca->io_ref[READ], bch2_dev_io_ref_read_complete, + PERCPU_REF_INIT_DEAD, GFP_KERNEL) || + percpu_ref_init(&ca->io_ref[WRITE], bch2_dev_io_ref_write_complete, PERCPU_REF_INIT_DEAD, GFP_KERNEL) || !(ca->sb_read_scratch = kmalloc(BCH_SB_READ_SCRATCH_BUF_SIZE, GFP_KERNEL)) || bch2_dev_buckets_alloc(c, ca) || @@ -1419,7 +1449,8 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) return -BCH_ERR_device_size_too_small; } - BUG_ON(!percpu_ref_is_zero(&ca->io_ref)); + BUG_ON(!percpu_ref_is_zero(&ca->io_ref[READ])); + BUG_ON(!percpu_ref_is_zero(&ca->io_ref[WRITE])); ret = bch2_dev_journal_init(ca, sb->sb); if (ret) @@ -1438,7 +1469,7 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) ca->dev = ca->disk_sb.bdev->bd_dev; - percpu_ref_reinit(&ca->io_ref); + percpu_ref_reinit(&ca->io_ref[READ]); return 0; } @@ -1568,6 +1599,8 @@ static bool bch2_fs_may_start(struct bch_fs *c) static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) { + bch2_dev_io_ref_stop(ca, WRITE); + /* * The allocator thread itself allocates btree nodes, so stop it first: */ @@ -1584,6 +1617,10 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); + + if (percpu_ref_is_zero(&ca->io_ref[WRITE])) + percpu_ref_reinit(&ca->io_ref[WRITE]); + bch2_dev_do_discards(ca); } @@ -1731,7 +1768,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) return 0; err: if (ca->mi.state == BCH_MEMBER_STATE_rw && - !percpu_ref_is_zero(&ca->io_ref)) + !percpu_ref_is_zero(&ca->io_ref[READ])) __bch2_dev_read_write(c, ca); up_write(&c->state_lock); return ret; diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 6c6469814637..c265b102267a 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -43,7 +43,7 @@ static int test_delete(struct bch_fs *c, u64 nr) BTREE_ITER_intent); ret = commit_do(trans, NULL, NULL, 0, - bch2_btree_iter_traverse(&iter) ?: + bch2_btree_iter_traverse(trans, &iter) ?: bch2_trans_update(trans, &iter, &k.k_i, 0)); bch_err_msg(c, ret, "update error"); if (ret) @@ -51,7 +51,7 @@ static int test_delete(struct bch_fs *c, u64 nr) pr_info("deleting once"); ret = commit_do(trans, NULL, NULL, 0, - bch2_btree_iter_traverse(&iter) ?: + bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_delete_at(trans, &iter, 0)); bch_err_msg(c, ret, "delete error (first)"); if (ret) @@ -59,7 +59,7 @@ static int test_delete(struct bch_fs *c, u64 nr) pr_info("deleting twice"); ret = commit_do(trans, NULL, NULL, 0, - bch2_btree_iter_traverse(&iter) ?: + bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_delete_at(trans, &iter, 0)); bch_err_msg(c, ret, "delete error (second)"); if (ret) @@ -84,7 +84,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr) BTREE_ITER_intent); ret = commit_do(trans, NULL, NULL, 0, - bch2_btree_iter_traverse(&iter) ?: + bch2_btree_iter_traverse(trans, &iter) ?: bch2_trans_update(trans, &iter, &k.k_i, 0)); bch_err_msg(c, ret, "update error"); if (ret) @@ -94,7 +94,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr) bch2_journal_flush_all_pins(&c->journal); ret = commit_do(trans, NULL, NULL, 0, - bch2_btree_iter_traverse(&iter) ?: + bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_delete_at(trans, &iter, 0)); bch_err_msg(c, ret, "delete error"); if (ret) @@ -349,10 +349,10 @@ static int test_peek_end(struct bch_fs *c, u64 nr) bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), 0); - lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); BUG_ON(k.k); - lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); BUG_ON(k.k); bch2_trans_iter_exit(trans, &iter); @@ -369,10 +369,10 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr) bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(0, 0, U32_MAX), 0); - lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); BUG_ON(k.k); - lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); BUG_ON(k.k); bch2_trans_iter_exit(trans, &iter); @@ -488,7 +488,7 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) trans = bch2_trans_get(c); bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, snapid_lo), 0); - lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); BUG_ON(k.k->p.snapshot != U32_MAX); @@ -602,9 +602,9 @@ static int rand_lookup(struct bch_fs *c, u64 nr) SPOS(0, 0, U32_MAX), 0); for (i = 0; i < nr; i++) { - bch2_btree_iter_set_pos(&iter, SPOS(0, test_rand(), U32_MAX)); + bch2_btree_iter_set_pos(trans, &iter, SPOS(0, test_rand(), U32_MAX)); - lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(&iter))); + lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(trans, &iter))); ret = bkey_err(k); if (ret) break; @@ -623,9 +623,9 @@ static int rand_mixed_trans(struct btree_trans *trans, struct bkey_s_c k; int ret; - bch2_btree_iter_set_pos(iter, SPOS(0, pos, U32_MAX)); + bch2_btree_iter_set_pos(trans, iter, SPOS(0, pos, U32_MAX)); - k = bch2_btree_iter_peek(iter); + k = bch2_btree_iter_peek(trans, iter); ret = bkey_err(k); bch_err_msg(trans->c, ret, "lookup error"); if (ret) @@ -672,7 +672,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos) bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos, BTREE_ITER_intent); - k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX)); + k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)); ret = bkey_err(k); if (ret) goto err; diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index f9667b944c0d..651da52b2cbc 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -168,7 +168,7 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, int type, int flags) { struct bch_fs *c = trans->c; - struct btree_iter inode_iter = { NULL }; + struct btree_iter inode_iter = {}; int ret; ret = bch2_subvol_is_ro_trans(trans, inum.subvol) ?: |