summaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-10-27 15:23:46 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-11-01 21:11:08 -0400
commitbe9e782df3cb557715630a61dc79d9f966737859 (patch)
tree68c77fe233bb9de8c3b3d9140e64e0ea152dfead /fs/bcachefs
parent2e7acdfbcad8b60eeef29d3beb3eb9a7085e3768 (diff)
downloadlwn-be9e782df3cb557715630a61dc79d9f966737859.tar.gz
lwn-be9e782df3cb557715630a61dc79d9f966737859.zip
bcachefs: Don't downgrade locks on transaction restart
We should only be downgrading locks on success - otherwise, our transaction restarts won't be getting the correct locks and we'll livelock. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/btree_iter.c3
-rw-r--r--fs/bcachefs/btree_key_cache.c2
-rw-r--r--fs/bcachefs/btree_locking.c38
-rw-r--r--fs/bcachefs/btree_locking.h18
-rw-r--r--fs/bcachefs/btree_trans_commit.c9
-rw-r--r--fs/bcachefs/btree_types.h2
-rw-r--r--fs/bcachefs/btree_update_interior.c2
-rw-r--r--fs/bcachefs/data_update.c12
-rw-r--r--fs/bcachefs/trace.h47
9 files changed, 96 insertions, 37 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 3b629420655a..0622f729411f 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -1523,6 +1523,7 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans,
path->ref = 0;
path->intent_ref = 0;
path->nodes_locked = 0;
+ path->alloc_seq++;
btree_path_list_add(trans, pos, path);
trans->paths_sorted = false;
@@ -1598,7 +1599,7 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
locks_want = min(locks_want, BTREE_MAX_DEPTH);
if (locks_want > path->locks_want)
- bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want);
+ bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL);
return path;
}
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 634ffdcb55f9..3304bff7d464 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -509,7 +509,7 @@ fill:
* path->uptodate yet:
*/
if (!path->locks_want &&
- !__bch2_btree_path_upgrade(trans, path, 1)) {
+ !__bch2_btree_path_upgrade(trans, path, 1, NULL)) {
trace_and_count(trans->c, trans_restart_key_cache_upgrade, trans, _THIS_IP_);
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade);
goto err;
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index 40c8ed8f7bf1..bc45cd2a34a4 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -431,7 +431,8 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
static inline bool btree_path_get_locks(struct btree_trans *trans,
struct btree_path *path,
- bool upgrade)
+ bool upgrade,
+ struct get_locks_fail *f)
{
unsigned l = path->level;
int fail_idx = -1;
@@ -442,8 +443,14 @@ static inline bool btree_path_get_locks(struct btree_trans *trans,
if (!(upgrade
? bch2_btree_node_upgrade(trans, path, l)
- : bch2_btree_node_relock(trans, path, l)))
- fail_idx = l;
+ : bch2_btree_node_relock(trans, path, l))) {
+ fail_idx = l;
+
+ if (f) {
+ f->l = l;
+ f->b = path->l[l].b;
+ }
+ }
l++;
} while (l < path->locks_want);
@@ -584,7 +591,9 @@ __flatten
bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
struct btree_path *path, unsigned long trace_ip)
{
- return btree_path_get_locks(trans, path, false);
+ struct get_locks_fail f;
+
+ return btree_path_get_locks(trans, path, false, &f);
}
int __bch2_btree_path_relock(struct btree_trans *trans,
@@ -600,22 +609,24 @@ int __bch2_btree_path_relock(struct btree_trans *trans,
bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans,
struct btree_path *path,
- unsigned new_locks_want)
+ unsigned new_locks_want,
+ struct get_locks_fail *f)
{
EBUG_ON(path->locks_want >= new_locks_want);
path->locks_want = new_locks_want;
- return btree_path_get_locks(trans, path, true);
+ return btree_path_get_locks(trans, path, true, f);
}
bool __bch2_btree_path_upgrade(struct btree_trans *trans,
struct btree_path *path,
- unsigned new_locks_want)
+ unsigned new_locks_want,
+ struct get_locks_fail *f)
{
struct btree_path *linked;
- if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want))
+ if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f))
return true;
/*
@@ -644,7 +655,7 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans,
linked->btree_id == path->btree_id &&
linked->locks_want < new_locks_want) {
linked->locks_want = new_locks_want;
- btree_path_get_locks(trans, linked, true);
+ btree_path_get_locks(trans, linked, true, NULL);
}
return false;
@@ -656,6 +667,9 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans,
{
unsigned l;
+ if (trans->restarted)
+ return;
+
EBUG_ON(path->locks_want < new_locks_want);
path->locks_want = new_locks_want;
@@ -674,6 +688,9 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans,
}
bch2_btree_path_verify_locks(path);
+
+ path->downgrade_seq++;
+ trace_path_downgrade(trans, _RET_IP_, path);
}
/* Btree transaction locking: */
@@ -682,6 +699,9 @@ void bch2_trans_downgrade(struct btree_trans *trans)
{
struct btree_path *path;
+ if (trans->restarted)
+ return;
+
trans_for_each_path(trans, path)
bch2_btree_path_downgrade(trans, path);
}
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index 6231e9ffc5d7..11b0a2c8cd69 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -355,26 +355,36 @@ static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans,
/* upgrade */
+
+struct get_locks_fail {
+ unsigned l;
+ struct btree *b;
+};
+
bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *,
- struct btree_path *, unsigned);
+ struct btree_path *, unsigned,
+ struct get_locks_fail *);
+
bool __bch2_btree_path_upgrade(struct btree_trans *,
- struct btree_path *, unsigned);
+ struct btree_path *, unsigned,
+ struct get_locks_fail *);
static inline int bch2_btree_path_upgrade(struct btree_trans *trans,
struct btree_path *path,
unsigned new_locks_want)
{
+ struct get_locks_fail f;
unsigned old_locks_want = path->locks_want;
new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
if (path->locks_want < new_locks_want
- ? __bch2_btree_path_upgrade(trans, path, new_locks_want)
+ ? __bch2_btree_path_upgrade(trans, path, new_locks_want, &f)
: path->uptodate == BTREE_ITER_UPTODATE)
return 0;
trace_and_count(trans->c, trans_restart_upgrade, trans, _THIS_IP_, path,
- old_locks_want, new_locks_want);
+ old_locks_want, new_locks_want, &f);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
}
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 53ddcaf042a2..8140b6e6e9a6 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -861,12 +861,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
*/
bch2_journal_res_put(&c->journal, &trans->journal_res);
- if (unlikely(ret))
- return ret;
-
- bch2_trans_downgrade(trans);
-
- return 0;
+ return ret;
}
static int journal_reclaim_wait_done(struct bch_fs *c)
@@ -1135,6 +1130,8 @@ out:
if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
bch2_write_ref_put(c, BCH_WRITE_REF_trans);
out_reset:
+ if (!ret)
+ bch2_trans_downgrade(trans);
bch2_trans_reset_updates(trans);
return ret;
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index a039ce4a4809..ecbb44b939a0 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -228,6 +228,8 @@ struct btree_path {
u8 sorted_idx;
u8 ref;
u8 intent_ref;
+ u32 alloc_seq;
+ u32 downgrade_seq;
/* btree_iter_copy starts here: */
struct bpos pos;
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 818a83f35d27..d029e0348c91 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -1987,7 +1987,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
out:
if (new_path)
bch2_path_put(trans, new_path, true);
- bch2_btree_path_downgrade(trans, iter->path);
+ bch2_trans_downgrade(trans);
return ret;
err:
bch2_btree_node_free_never_used(as, trans, n);
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index d116f2f03db2..0771a6d880bf 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -162,11 +162,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
if (((1U << i) & m->data_opts.rewrite_ptrs) &&
(ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
!ptr->cached) {
- bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), ptr);
- /*
- * See comment below:
bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr);
- */
rewrites_found |= 1U << i;
}
i++;
@@ -212,14 +208,8 @@ restart_drop_extra_replicas:
if (!p.ptr.cached &&
durability - ptr_durability >= m->op.opts.data_replicas) {
durability -= ptr_durability;
- bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), &entry->ptr);
- /*
- * Currently, we're dropping unneeded replicas
- * instead of marking them as cached, since
- * cached data in stripe buckets prevents them
- * from being reused:
+
bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr);
- */
goto restart_drop_extra_replicas;
}
}
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 81f72b2add09..893304a1f06e 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -1043,13 +1043,16 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split,
TP_ARGS(trans, caller_ip, path)
);
+struct get_locks_fail;
+
TRACE_EVENT(trans_restart_upgrade,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip,
struct btree_path *path,
unsigned old_locks_want,
- unsigned new_locks_want),
- TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want),
+ unsigned new_locks_want,
+ struct get_locks_fail *f),
+ TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want, f),
TP_STRUCT__entry(
__array(char, trans_fn, 32 )
@@ -1057,6 +1060,11 @@ TRACE_EVENT(trans_restart_upgrade,
__field(u8, btree_id )
__field(u8, old_locks_want )
__field(u8, new_locks_want )
+ __field(u8, level )
+ __field(u32, path_seq )
+ __field(u32, node_seq )
+ __field(u32, path_alloc_seq )
+ __field(u32, downgrade_seq)
TRACE_BPOS_entries(pos)
),
@@ -1066,10 +1074,15 @@ TRACE_EVENT(trans_restart_upgrade,
__entry->btree_id = path->btree_id;
__entry->old_locks_want = old_locks_want;
__entry->new_locks_want = new_locks_want;
+ __entry->level = f->l;
+ __entry->path_seq = path->l[f->l].lock_seq;
+ __entry->node_seq = IS_ERR_OR_NULL(f->b) ? 0 : f->b->c.lock.seq;
+ __entry->path_alloc_seq = path->alloc_seq;
+ __entry->downgrade_seq = path->downgrade_seq;
TRACE_BPOS_assign(pos, path->pos)
),
- TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u",
+ TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u level %u path seq %u node seq %u alloc_seq %u downgrade_seq %u",
__entry->trans_fn,
(void *) __entry->caller_ip,
bch2_btree_id_str(__entry->btree_id),
@@ -1077,7 +1090,12 @@ TRACE_EVENT(trans_restart_upgrade,
__entry->pos_offset,
__entry->pos_snapshot,
__entry->old_locks_want,
- __entry->new_locks_want)
+ __entry->new_locks_want,
+ __entry->level,
+ __entry->path_seq,
+ __entry->node_seq,
+ __entry->path_alloc_seq,
+ __entry->downgrade_seq)
);
DEFINE_EVENT(transaction_restart_iter, trans_restart_relock,
@@ -1238,6 +1256,27 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced,
__entry->new_u64s)
);
+TRACE_EVENT(path_downgrade,
+ TP_PROTO(struct btree_trans *trans,
+ unsigned long caller_ip,
+ struct btree_path *path),
+ TP_ARGS(trans, caller_ip, path),
+
+ TP_STRUCT__entry(
+ __array(char, trans_fn, 32 )
+ __field(unsigned long, caller_ip )
+ ),
+
+ TP_fast_assign(
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+ ),
+
+ TP_printk("%s %pS",
+ __entry->trans_fn,
+ (void *) __entry->caller_ip)
+);
+
DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip),