summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/alloc_background.c236
-rw-r--r--fs/bcachefs/alloc_background.h2
-rw-r--r--fs/bcachefs/buckets.c8
-rw-r--r--fs/bcachefs/ec.c10
-rw-r--r--fs/bcachefs/journal_io.c2
-rw-r--r--fs/bcachefs/super.c13
6 files changed, 109 insertions, 162 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 82a68fabdc5f..25c18b8cd3a6 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -129,15 +129,21 @@ static inline void put_alloc_field(struct bkey_i_alloc *a, void **p,
*p += bytes;
}
-struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *a)
+struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
{
- struct bkey_alloc_unpacked ret = { .gen = a->gen };
- const void *d = a->data;
- unsigned idx = 0;
+ struct bkey_alloc_unpacked ret = { .gen = 0 };
+
+ if (k.k->type == KEY_TYPE_alloc) {
+ const struct bch_alloc *a = bkey_s_c_to_alloc(k).v;
+ const void *d = a->data;
+ unsigned idx = 0;
+
+ ret.gen = a->gen;
#define x(_name, _bits) ret._name = get_alloc_field(a, &d, idx++);
- BCH_ALLOC_FIELDS()
+ BCH_ALLOC_FIELDS()
#undef x
+ }
return ret;
}
@@ -199,66 +205,18 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
get_alloc_field(a.v, &d, i));
}
-static void __alloc_read_key(struct bucket *g, const struct bch_alloc *a)
-{
- const void *d = a->data;
- unsigned idx = 0, data_type, dirty_sectors, cached_sectors;
- struct bucket_mark m;
-
- g->io_time[READ] = get_alloc_field(a, &d, idx++);
- g->io_time[WRITE] = get_alloc_field(a, &d, idx++);
- data_type = get_alloc_field(a, &d, idx++);
- dirty_sectors = get_alloc_field(a, &d, idx++);
- cached_sectors = get_alloc_field(a, &d, idx++);
- g->oldest_gen = get_alloc_field(a, &d, idx++);
-
- bucket_cmpxchg(g, m, ({
- m.gen = a->gen;
- m.data_type = data_type;
- m.dirty_sectors = dirty_sectors;
- m.cached_sectors = cached_sectors;
- }));
-
- g->gen_valid = 1;
-}
-
-static void __alloc_write_key(struct bkey_i_alloc *a, struct bucket *g,
- struct bucket_mark m)
+static inline struct bkey_alloc_unpacked
+alloc_mem_to_key(struct bucket *g, struct bucket_mark m)
{
- unsigned idx = 0;
- void *d = a->v.data;
-
- a->v.fields = 0;
- a->v.gen = m.gen;
-
- d = a->v.data;
- put_alloc_field(a, &d, idx++, g->io_time[READ]);
- put_alloc_field(a, &d, idx++, g->io_time[WRITE]);
- put_alloc_field(a, &d, idx++, m.data_type);
- put_alloc_field(a, &d, idx++, m.dirty_sectors);
- put_alloc_field(a, &d, idx++, m.cached_sectors);
- put_alloc_field(a, &d, idx++, g->oldest_gen);
-
- set_bkey_val_bytes(&a->k, (void *) d - (void *) &a->v);
-}
-
-static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k)
-{
- struct bch_dev *ca;
- struct bkey_s_c_alloc a;
-
- if (k.k->type != KEY_TYPE_alloc)
- return;
-
- a = bkey_s_c_to_alloc(k);
- ca = bch_dev_bkey_exists(c, a.k->p.inode);
-
- if (a.k->p.offset >= ca->mi.nbuckets)
- return;
-
- percpu_down_read(&c->mark_lock);
- __alloc_read_key(bucket(ca, a.k->p.offset), a.v);
- percpu_up_read(&c->mark_lock);
+ return (struct bkey_alloc_unpacked) {
+ .gen = m.gen,
+ .oldest_gen = g->oldest_gen,
+ .data_type = m.data_type,
+ .dirty_sectors = m.dirty_sectors,
+ .cached_sectors = m.cached_sectors,
+ .read_time = g->io_time[READ],
+ .write_time = g->io_time[WRITE],
+ };
}
int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
@@ -274,7 +232,7 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_ALLOC, POS_MIN, 0, k, ret)
- bch2_alloc_read_key(c, k);
+ bch2_mark_key(c, k, true, 0, NULL, 0, 0);
ret = bch2_trans_exit(&trans) ?: ret;
if (ret) {
@@ -284,7 +242,8 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
for_each_journal_key(*journal_keys, j)
if (j->btree_id == BTREE_ID_ALLOC)
- bch2_alloc_read_key(c, bkey_i_to_s_c(j->k));
+ bch2_mark_key(c, bkey_i_to_s_c(j->k),
+ true, 0, NULL, 0, 0);
percpu_down_write(&c->mark_lock);
bch2_dev_usage_from_buckets(c);
@@ -352,81 +311,32 @@ err:
return ret;
}
-static int __bch2_alloc_write_key(struct btree_trans *trans, struct bch_dev *ca,
- size_t b, struct btree_iter *iter,
- unsigned flags)
-{
- struct bch_fs *c = trans->c;
-#if 0
- __BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key;
-#else
- /* hack: */
- __BKEY_PADDED(k, 8) alloc_key;
-#endif
- struct bkey_i_alloc *a = bkey_alloc_init(&alloc_key.k);
- struct bucket *g;
- struct bucket_mark m, new;
- int ret;
-
- BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
-
- a->k.p = POS(ca->dev_idx, b);
-
- bch2_btree_iter_set_pos(iter, a->k.p);
-
- ret = bch2_btree_iter_traverse(iter);
- if (ret)
- return ret;
-
- percpu_down_read(&c->mark_lock);
- g = bucket(ca, b);
- m = READ_ONCE(g->mark);
-
- if (!m.dirty) {
- percpu_up_read(&c->mark_lock);
- return 0;
- }
-
- __alloc_write_key(a, g, m);
- percpu_up_read(&c->mark_lock);
-
- bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
-
- ret = bch2_trans_commit(trans, NULL, NULL,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_NOMARK|
- flags);
- if (ret)
- return ret;
-
- new = m;
- new.dirty = false;
- atomic64_cmpxchg(&g->_mark.v, m.v.counter, new.v.counter);
-
- if (ca->buckets_written)
- set_bit(b, ca->buckets_written);
-
- return 0;
-}
-
int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
{
struct btree_trans trans;
struct btree_iter *iter;
struct bucket_array *buckets;
struct bch_dev *ca;
+ struct bucket *g;
+ struct bucket_mark m, new;
+ struct bkey_alloc_unpacked old_u, new_u;
+ __BKEY_PADDED(k, 8) alloc_key; /* hack: */
+ struct bkey_i_alloc *a;
+ struct bkey_s_c k;
unsigned i;
size_t b;
int ret = 0;
+ BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
+
bch2_trans_init(&trans, c);
iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
for_each_rw_member(ca, c, i) {
-relock:
down_read(&ca->bucket_lock);
+restart:
buckets = bucket_array(ca);
for (b = buckets->first_bucket;
@@ -435,27 +345,70 @@ relock:
if (!buckets->b[b].mark.dirty)
continue;
+ bch2_btree_iter_set_pos(iter, POS(i, b));
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ old_u = bch2_alloc_unpack(k);
+
+ percpu_down_read(&c->mark_lock);
+ g = bucket(ca, b);
+ m = READ_ONCE(g->mark);
+ new_u = alloc_mem_to_key(g, m);
+ percpu_up_read(&c->mark_lock);
+
+ if (!m.dirty)
+ continue;
+
if ((flags & BTREE_INSERT_LAZY_RW) &&
percpu_ref_is_zero(&c->writes)) {
up_read(&ca->bucket_lock);
bch2_trans_unlock(&trans);
ret = bch2_fs_read_write_early(c);
+ down_read(&ca->bucket_lock);
+
if (ret)
- goto out;
- goto relock;
+ goto err;
+ goto restart;
}
- ret = __bch2_alloc_write_key(&trans, ca, b,
- iter, flags);
+ a = bkey_alloc_init(&alloc_key.k);
+ a->k.p = iter->pos;
+ bch2_alloc_pack(a, new_u);
+
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
+ ret = bch2_trans_commit(&trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_NOMARK|
+ flags);
+err:
+ if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) {
+ bch_err(c, "error %i writing alloc info", ret);
+ printk(KERN_CONT "dev %llu bucket %llu\n",
+ iter->pos.inode, iter->pos.offset);
+ printk(KERN_CONT "gen %u -> %u\n", old_u.gen, new_u.gen);
+#define x(_name, _bits) printk(KERN_CONT #_name " %u -> %u\n", old_u._name, new_u._name);
+ BCH_ALLOC_FIELDS()
+#undef x
+ }
if (ret)
break;
+ new = m;
+ new.dirty = false;
+ atomic64_cmpxchg(&g->_mark.v, m.v.counter, new.v.counter);
+
+ if (ca->buckets_written)
+ set_bit(b, ca->buckets_written);
+
bch2_trans_cond_resched(&trans);
*wrote = true;
}
up_read(&ca->bucket_lock);
-out:
+
if (ret) {
percpu_ref_put(&ca->io_ref);
break;
@@ -922,6 +875,7 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct bkey_i_alloc *a;
struct bkey_alloc_unpacked u;
+ struct bucket *g;
struct bucket_mark m;
struct bkey_s_c k;
bool invalidating_cached_data;
@@ -941,7 +895,6 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
BUG_ON(!fifo_push(&ca->free_inc, b));
bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0);
- m = bucket(ca, b)->mark;
spin_unlock(&c->freelist_lock);
percpu_up_read(&c->mark_lock);
@@ -955,27 +908,26 @@ retry:
if (ret)
return ret;
- if (k.k && k.k->type == KEY_TYPE_alloc)
- u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
- else
- memset(&u, 0, sizeof(u));
+ /*
+ * The allocator has to start before journal replay is finished - thus,
+ * we have to trust the in memory bucket @m, not the version in the
+ * btree:
+ */
+ percpu_down_read(&c->mark_lock);
+ g = bucket(ca, b);
+ m = READ_ONCE(g->mark);
+ u = alloc_mem_to_key(g, m);
+ percpu_up_read(&c->mark_lock);
invalidating_cached_data = m.cached_sectors != 0;
+ u.gen++;
u.data_type = 0;
u.dirty_sectors = 0;
u.cached_sectors = 0;
u.read_time = c->bucket_clock[READ].hand;
u.write_time = c->bucket_clock[WRITE].hand;
- /*
- * The allocator has to start before journal replay is finished - thus,
- * we have to trust the in memory bucket @m, not the version in the
- * btree:
- */
- //BUG_ON(u.dirty_sectors);
- u.gen = m.gen + 1;
-
a = bkey_alloc_init(&alloc_key.k);
a->k.p = iter->pos;
bch2_alloc_pack(a, u);
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index 02354c80a102..0c1a0f0dd2ab 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -13,7 +13,7 @@ struct bkey_alloc_unpacked {
#undef x
};
-struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *);
+struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
void bch2_alloc_pack(struct bkey_i_alloc *,
const struct bkey_alloc_unpacked);
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 6d04474f0e3a..2479ad37775b 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -649,9 +649,13 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
if (flags & BCH_BUCKET_MARK_GC)
return 0;
- u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
ca = bch_dev_bkey_exists(c, k.k->p.inode);
+
+ if (k.k->p.offset >= ca->mi.nbuckets)
+ return 0;
+
g = __bucket(ca, k.k->p.offset, gc);
+ u = bch2_alloc_unpack(k);
old = bucket_data_cmpxchg(c, ca, fs_usage, g, m, ({
m.gen = u.gen;
@@ -1381,7 +1385,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
goto out;
}
- u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
+ u = bch2_alloc_unpack(k);
if (gen_after(u.gen, p.ptr.gen)) {
ret = 1;
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 6761b5c24a12..07245717ca4e 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -1234,11 +1234,6 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
return ret;
}
-static void bch2_stripe_read_key(struct bch_fs *c, struct bkey_s_c k)
-{
- bch2_mark_key(c, k, true, 0, NULL, 0, 0);
-}
-
int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
{
struct journal_key *i;
@@ -1254,7 +1249,7 @@ int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
bch2_trans_init(&trans, c);
for_each_btree_key(&trans, iter, BTREE_ID_EC, POS_MIN, 0, k, ret)
- bch2_stripe_read_key(c, k);
+ bch2_mark_key(c, k, true, 0, NULL, 0, 0);
ret = bch2_trans_exit(&trans) ?: ret;
if (ret) {
@@ -1264,7 +1259,8 @@ int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
for_each_journal_key(*journal_keys, i)
if (i->btree_id == BTREE_ID_EC)
- bch2_stripe_read_key(c, bkey_i_to_s_c(i->k));
+ bch2_mark_key(c, bkey_i_to_s_c(i->k),
+ true, 0, NULL, 0, 0);
return 0;
}
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 4fd7b048050b..4e0c63f0076f 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -947,7 +947,6 @@ out:
return;
err:
bch2_fatal_error(c);
- bch2_journal_halt(j);
spin_lock(&j->lock);
goto out;
}
@@ -1059,7 +1058,6 @@ void bch2_journal_write(struct closure *cl)
spin_unlock(&j->lock);
if (ret) {
- bch2_journal_halt(j);
bch_err(c, "Unable to allocate journal write");
bch2_fatal_error(c);
continue_at(cl, journal_write_done, system_highpri_wq);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 959638c986a0..8f25c1d9b8cb 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -198,17 +198,14 @@ static void __bch2_fs_read_only(struct bch_fs *c)
do {
wrote = false;
- ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
- if (ret) {
- bch2_fs_inconsistent(c, "error writing out stripes");
- break;
- }
+ ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?:
+ bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
- ret = bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
- if (ret) {
+ if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
+
+ if (ret)
break;
- }
for_each_member_device(ca, c, i)
bch2_dev_allocator_quiesce(c, ca);