diff options
author | Alexander Aring <aahringo@redhat.com> | 2024-04-02 15:18:06 -0400 |
---|---|---|
committer | David Teigland <teigland@redhat.com> | 2024-04-09 11:44:49 -0500 |
commit | c288745f1d4a2ead903e81d2f4716e9d40b0ad85 (patch) | |
tree | 7ecb927ca1d0e6898c78b3ec6c974304dc2e1e58 /fs/dlm | |
parent | cc396e2355b5ca6e1aee005f3ce99bab8f37f5ff (diff) | |
download | lwn-c288745f1d4a2ead903e81d2f4716e9d40b0ad85.tar.gz lwn-c288745f1d4a2ead903e81d2f4716e9d40b0ad85.zip |
dlm: avoid blocking receive at the end of recovery
The end of the recovery process transitioned to normal message
processing by temporarily blocking the receiving context,
processing saved messages, then unblocking the receiving
context. To avoid blocking the receiving context, the old
wait_queue and mutex are replaced by a new rwlock and the new
RECV_MSG_BLOCKED flag. Received messages are added to the
list of saved messages, protected by the rwlock, until the
flag is cleared, which happens when all saved messages have
been processed.
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
Diffstat (limited to 'fs/dlm')
-rw-r--r-- | fs/dlm/dlm_internal.h | 5 | ||||
-rw-r--r-- | fs/dlm/lock.c | 16 | ||||
-rw-r--r-- | fs/dlm/lockspace.c | 4 | ||||
-rw-r--r-- | fs/dlm/member.c | 5 | ||||
-rw-r--r-- | fs/dlm/requestqueue.c | 41 |
5 files changed, 30 insertions, 41 deletions
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 9f98a815f935..61820b8c47a7 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -655,9 +655,7 @@ struct dlm_ls { struct rw_semaphore ls_in_recovery; /* block local requests */ struct rw_semaphore ls_recv_active; /* block dlm_recv */ struct list_head ls_requestqueue;/* queue remote requests */ - atomic_t ls_requestqueue_cnt; - wait_queue_head_t ls_requestqueue_wait; - struct mutex ls_requestqueue_mutex; + rwlock_t ls_requestqueue_lock; struct dlm_rcom *ls_recover_buf; int ls_recover_nodeid; /* for debugging */ unsigned int ls_recover_locks_in; /* for log info */ @@ -717,6 +715,7 @@ struct dlm_ls { #define LSFL_UEVENT_WAIT 7 #define LSFL_CB_DELAY 9 #define LSFL_NODIR 10 +#define LSFL_RECV_MSG_BLOCKED 11 #define DLM_PROC_FLAGS_CLOSING 1 #define DLM_PROC_FLAGS_COMPAT 2 diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 7b309231eebd..98d9c5a4be00 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -4752,20 +4752,32 @@ static void _receive_message(struct dlm_ls *ls, const struct dlm_message *ms, static void dlm_receive_message(struct dlm_ls *ls, const struct dlm_message *ms, int nodeid) { - if (dlm_locking_stopped(ls)) { +try_again: + read_lock(&ls->ls_requestqueue_lock); + if (test_bit(LSFL_RECV_MSG_BLOCKED, &ls->ls_flags)) { /* If we were a member of this lockspace, left, and rejoined, other nodes may still be sending us messages from the lockspace generation before we left. */ if (WARN_ON_ONCE(!ls->ls_generation)) { + read_unlock(&ls->ls_requestqueue_lock); log_limit(ls, "receive %d from %d ignore old gen", le32_to_cpu(ms->m_type), nodeid); return; } + read_unlock(&ls->ls_requestqueue_lock); + write_lock(&ls->ls_requestqueue_lock); + /* recheck because we hold writelock now */ + if (!test_bit(LSFL_RECV_MSG_BLOCKED, &ls->ls_flags)) { + write_unlock_bh(&ls->ls_requestqueue_lock); + goto try_again; + } + dlm_add_requestqueue(ls, nodeid, ms); + write_unlock(&ls->ls_requestqueue_lock); } else { - dlm_wait_requestqueue(ls); _receive_message(ls, ms, 0); + read_unlock(&ls->ls_requestqueue_lock); } } diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 945139805605..757e473bc619 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -554,9 +554,7 @@ static int new_lockspace(const char *name, const char *cluster, init_rwsem(&ls->ls_in_recovery); init_rwsem(&ls->ls_recv_active); INIT_LIST_HEAD(&ls->ls_requestqueue); - atomic_set(&ls->ls_requestqueue_cnt, 0); - init_waitqueue_head(&ls->ls_requestqueue_wait); - mutex_init(&ls->ls_requestqueue_mutex); + rwlock_init(&ls->ls_requestqueue_lock); spin_lock_init(&ls->ls_clear_proc_locks); /* Due backwards compatibility with 3.1 we need to use maximum diff --git a/fs/dlm/member.c b/fs/dlm/member.c index be7909ead71b..707cebcdc533 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -642,6 +642,11 @@ int dlm_ls_stop(struct dlm_ls *ls) set_bit(LSFL_RECOVER_STOP, &ls->ls_flags); new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); ls->ls_recover_seq++; + + /* activate requestqueue and stop processing */ + write_lock(&ls->ls_requestqueue_lock); + set_bit(LSFL_RECV_MSG_BLOCKED, &ls->ls_flags); + write_unlock(&ls->ls_requestqueue_lock); spin_unlock(&ls->ls_recover_lock); /* diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index c05940afd063..9b646026df46 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c @@ -48,10 +48,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, memcpy(&e->request, ms, sizeof(*ms)); memcpy(&e->request.m_extra, ms->m_extra, length); - atomic_inc(&ls->ls_requestqueue_cnt); - mutex_lock(&ls->ls_requestqueue_mutex); list_add_tail(&e->list, &ls->ls_requestqueue); - mutex_unlock(&ls->ls_requestqueue_mutex); } /* @@ -71,16 +68,14 @@ int dlm_process_requestqueue(struct dlm_ls *ls) struct dlm_message *ms; int error = 0; - mutex_lock(&ls->ls_requestqueue_mutex); - + write_lock(&ls->ls_requestqueue_lock); for (;;) { if (list_empty(&ls->ls_requestqueue)) { - mutex_unlock(&ls->ls_requestqueue_mutex); + clear_bit(LSFL_RECV_MSG_BLOCKED, &ls->ls_flags); error = 0; break; } - e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); - mutex_unlock(&ls->ls_requestqueue_mutex); + e = list_first_entry(&ls->ls_requestqueue, struct rq_entry, list); ms = &e->request; @@ -93,41 +88,23 @@ int dlm_process_requestqueue(struct dlm_ls *ls) e->recover_seq); dlm_receive_message_saved(ls, &e->request, e->recover_seq); - - mutex_lock(&ls->ls_requestqueue_mutex); list_del(&e->list); - if (atomic_dec_and_test(&ls->ls_requestqueue_cnt)) - wake_up(&ls->ls_requestqueue_wait); kfree(e); if (dlm_locking_stopped(ls)) { log_debug(ls, "process_requestqueue abort running"); - mutex_unlock(&ls->ls_requestqueue_mutex); error = -EINTR; break; } + write_unlock(&ls->ls_requestqueue_lock); schedule(); + write_lock(&ls->ls_requestqueue_lock); } + write_unlock(&ls->ls_requestqueue_lock); return error; } -/* - * After recovery is done, locking is resumed and dlm_recoverd takes all the - * saved requests and processes them as they would have been by dlm_recv. At - * the same time, dlm_recv will start receiving new requests from remote nodes. - * We want to delay dlm_recv processing new requests until dlm_recoverd has - * finished processing the old saved requests. We don't check for locking - * stopped here because dlm_ls_stop won't stop locking until it's suspended us - * (dlm_recv). - */ - -void dlm_wait_requestqueue(struct dlm_ls *ls) -{ - wait_event(ls->ls_requestqueue_wait, - atomic_read(&ls->ls_requestqueue_cnt) == 0); -} - static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid) { __le32 type = ms->m_type; @@ -158,17 +135,15 @@ void dlm_purge_requestqueue(struct dlm_ls *ls) struct dlm_message *ms; struct rq_entry *e, *safe; - mutex_lock(&ls->ls_requestqueue_mutex); + write_lock(&ls->ls_requestqueue_lock); list_for_each_entry_safe(e, safe, &ls->ls_requestqueue, list) { ms = &e->request; if (purge_request(ls, ms, e->nodeid)) { list_del(&e->list); - if (atomic_dec_and_test(&ls->ls_requestqueue_cnt)) - wake_up(&ls->ls_requestqueue_wait); kfree(e); } } - mutex_unlock(&ls->ls_requestqueue_mutex); + write_unlock(&ls->ls_requestqueue_lock); } |