diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-12-01 14:55:34 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-12-01 14:55:34 -0800 |
commit | 0a4ebed781dbb9ecbfef2374d29da1ab3b1a63f3 (patch) | |
tree | ab171047c0b1bab2d423e85106f84fdf7b52ac7d /fs/ocfs2/dlm | |
parent | 3b120ab76225592e9c267086fa4dfa3ce5a075fe (diff) | |
parent | 939255798a468e1a92f03546de6e87be7b491e57 (diff) | |
download | lwn-0a4ebed781dbb9ecbfef2374d29da1ab3b1a63f3.tar.gz lwn-0a4ebed781dbb9ecbfef2374d29da1ab3b1a63f3.zip |
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (31 commits)
ocfs2: avoid unaligned access to dqc_bitmap
ocfs2: Use filemap_write_and_wait() instead of write_inode_now()
ocfs2: honor O_(D)SYNC flag in fallocate
ocfs2: Add a missing journal credit in ocfs2_link_credits() -v2
ocfs2: send correct UUID to cleancache initialization
ocfs2: Commit transactions in error cases -v2
ocfs2: make direntry invalid when deleting it
fs/ocfs2/dlm/dlmlock.c: free kmem_cache_zalloc'd data using kmem_cache_free
ocfs2: Avoid livelock in ocfs2_readpage()
ocfs2: serialize unaligned aio
ocfs2: Implement llseek()
ocfs2: Fix ocfs2_page_mkwrite()
ocfs2: Add comment about orphan scanning
ocfs2: Clean up messages in the fs
ocfs2/cluster: Cluster up now includes network connections too
ocfs2/cluster: Add new function o2net_fill_node_map()
ocfs2/cluster: Fix output in file elapsed_time_in_ms
ocfs2/dlm: dlmlock_remote() needs to account for remastery
ocfs2/dlm: Take inflight reference count for remotely mastered resources too
ocfs2/dlm: Cleanup dlm_wait_for_node_death() and dlm_wait_for_node_recovery()
...
Diffstat (limited to 'fs/ocfs2/dlm')
-rw-r--r-- | fs/ocfs2/dlm/dlmcommon.h | 56 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 44 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmlock.c | 54 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 175 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 164 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmthread.c | 16 |
6 files changed, 238 insertions, 271 deletions
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index d602abb51b61..a5952ceecba5 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -859,8 +859,8 @@ void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); void dlm_wait_for_recovery(struct dlm_ctxt *dlm); void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); -int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); -int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout); +void dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); +void dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout); void dlm_put(struct dlm_ctxt *dlm); struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); @@ -877,9 +877,8 @@ static inline void dlm_lockres_get(struct dlm_lock_resource *res) kref_get(&res->refs); } void dlm_lockres_put(struct dlm_lock_resource *res); -void __dlm_unhash_lockres(struct dlm_lock_resource *res); -void __dlm_insert_lockres(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res); +void __dlm_unhash_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); +void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, const char *name, unsigned int len, @@ -902,46 +901,15 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, const char *name, unsigned int namelen); -#define dlm_lockres_set_refmap_bit(bit,res) \ - __dlm_lockres_set_refmap_bit(bit,res,__FILE__,__LINE__) -#define dlm_lockres_clear_refmap_bit(bit,res) \ - __dlm_lockres_clear_refmap_bit(bit,res,__FILE__,__LINE__) +void dlm_lockres_set_refmap_bit(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, int bit); +void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, int bit); -static inline void __dlm_lockres_set_refmap_bit(int bit, - struct dlm_lock_resource *res, - const char *file, - int line) -{ - //printk("%s:%d:%.*s: setting bit %d\n", file, line, - // res->lockname.len, res->lockname.name, bit); - set_bit(bit, res->refmap); -} - -static inline void __dlm_lockres_clear_refmap_bit(int bit, - struct dlm_lock_resource *res, - const char *file, - int line) -{ - //printk("%s:%d:%.*s: clearing bit %d\n", file, line, - // res->lockname.len, res->lockname.name, bit); - clear_bit(bit, res->refmap); -} - -void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - const char *file, - int line); -void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - int new_lockres, - const char *file, - int line); -#define dlm_lockres_drop_inflight_ref(d,r) \ - __dlm_lockres_drop_inflight_ref(d,r,__FILE__,__LINE__) -#define dlm_lockres_grab_inflight_ref(d,r) \ - __dlm_lockres_grab_inflight_ref(d,r,0,__FILE__,__LINE__) -#define dlm_lockres_grab_inflight_ref_new(d,r) \ - __dlm_lockres_grab_inflight_ref(d,r,1,__FILE__,__LINE__) +void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res); +void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res); void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 6ed6b95dcf93..92f2ead0fab6 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -157,16 +157,18 @@ static int dlm_protocol_compare(struct dlm_protocol_version *existing, static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); -void __dlm_unhash_lockres(struct dlm_lock_resource *lockres) +void __dlm_unhash_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { - if (!hlist_unhashed(&lockres->hash_node)) { - hlist_del_init(&lockres->hash_node); - dlm_lockres_put(lockres); - } + if (hlist_unhashed(&res->hash_node)) + return; + + mlog(0, "%s: Unhash res %.*s\n", dlm->name, res->lockname.len, + res->lockname.name); + hlist_del_init(&res->hash_node); + dlm_lockres_put(res); } -void __dlm_insert_lockres(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) +void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { struct hlist_head *bucket; struct qstr *q; @@ -180,6 +182,9 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm, dlm_lockres_get(res); hlist_add_head(&res->hash_node, bucket); + + mlog(0, "%s: Hash res %.*s\n", dlm->name, res->lockname.len, + res->lockname.name); } struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, @@ -539,17 +544,17 @@ again: static void __dlm_print_nodes(struct dlm_ctxt *dlm) { - int node = -1; + int node = -1, num = 0; assert_spin_locked(&dlm->spinlock); - printk(KERN_NOTICE "o2dlm: Nodes in domain %s: ", dlm->name); - + printk("( "); while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, node + 1)) < O2NM_MAX_NODES) { printk("%d ", node); + ++num; } - printk("\n"); + printk(") %u nodes\n", num); } static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, @@ -566,11 +571,10 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, node = exit_msg->node_idx; - printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s\n", node, dlm->name); - spin_lock(&dlm->spinlock); clear_bit(node, dlm->domain_map); clear_bit(node, dlm->exit_domain_map); + printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s ", node, dlm->name); __dlm_print_nodes(dlm); /* notify anything attached to the heartbeat events */ @@ -755,6 +759,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) dlm_mark_domain_leaving(dlm); dlm_leave_domain(dlm); + printk(KERN_NOTICE "o2dlm: Leaving domain %s\n", dlm->name); dlm_force_free_mles(dlm); dlm_complete_dlm_shutdown(dlm); } @@ -970,7 +975,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, clear_bit(assert->node_idx, dlm->exit_domain_map); __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); - printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n", + printk(KERN_NOTICE "o2dlm: Node %u joins domain %s ", assert->node_idx, dlm->name); __dlm_print_nodes(dlm); @@ -1701,8 +1706,10 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) bail: spin_lock(&dlm->spinlock); __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); - if (!status) + if (!status) { + printk(KERN_NOTICE "o2dlm: Joining domain %s ", dlm->name); __dlm_print_nodes(dlm); + } spin_unlock(&dlm->spinlock); if (ctxt) { @@ -2131,13 +2138,6 @@ struct dlm_ctxt * dlm_register_domain(const char *domain, goto leave; } - if (!o2hb_check_local_node_heartbeating()) { - mlog(ML_ERROR, "the local node has not been configured, or is " - "not heartbeating\n"); - ret = -EPROTO; - goto leave; - } - mlog(0, "register called for domain \"%s\"\n", domain); retry: diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 8d39e0fd66f7..975810b98492 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -183,10 +183,6 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, kick_thread = 1; } } - /* reduce the inflight count, this may result in the lockres - * being purged below during calc_usage */ - if (lock->ml.node == dlm->node_num) - dlm_lockres_drop_inflight_ref(dlm, res); spin_unlock(&res->spinlock); wake_up(&res->wq); @@ -231,10 +227,16 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, lock->ml.type, res->lockname.len, res->lockname.name, flags); + /* + * Wait if resource is getting recovered, remastered, etc. + * If the resource was remastered and new owner is self, then exit. + */ spin_lock(&res->spinlock); - - /* will exit this call with spinlock held */ __dlm_wait_on_lockres(res); + if (res->owner == dlm->node_num) { + spin_unlock(&res->spinlock); + return DLM_RECOVERING; + } res->state |= DLM_LOCK_RES_IN_PROGRESS; /* add lock to local (secondary) queue */ @@ -319,27 +321,23 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm, tmpret = o2net_send_message(DLM_CREATE_LOCK_MSG, dlm->key, &create, sizeof(create), res->owner, &status); if (tmpret >= 0) { - // successfully sent and received - ret = status; // this is already a dlm_status + ret = status; if (ret == DLM_REJECTED) { - mlog(ML_ERROR, "%s:%.*s: BUG. this is a stale lockres " - "no longer owned by %u. that node is coming back " - "up currently.\n", dlm->name, create.namelen, + mlog(ML_ERROR, "%s: res %.*s, Stale lockres no longer " + "owned by node %u. That node is coming back up " + "currently.\n", dlm->name, create.namelen, create.name, res->owner); dlm_print_one_lock_resource(res); BUG(); } } else { - mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " - "node %u\n", tmpret, DLM_CREATE_LOCK_MSG, dlm->key, - res->owner); - if (dlm_is_host_down(tmpret)) { + mlog(ML_ERROR, "%s: res %.*s, Error %d send CREATE LOCK to " + "node %u\n", dlm->name, create.namelen, create.name, + tmpret, res->owner); + if (dlm_is_host_down(tmpret)) ret = DLM_RECOVERING; - mlog(0, "node %u died so returning DLM_RECOVERING " - "from lock message!\n", res->owner); - } else { + else ret = dlm_err_to_dlm_status(tmpret); - } } return ret; @@ -440,7 +438,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, /* zero memory only if kernel-allocated */ lksb = kzalloc(sizeof(*lksb), GFP_NOFS); if (!lksb) { - kfree(lock); + kmem_cache_free(dlm_lock_cache, lock); return NULL; } kernel_allocated = 1; @@ -718,18 +716,10 @@ retry_lock: if (status == DLM_RECOVERING || status == DLM_MIGRATING || status == DLM_FORWARD) { - mlog(0, "retrying lock with migration/" - "recovery/in progress\n"); msleep(100); - /* no waiting for dlm_reco_thread */ if (recovery) { if (status != DLM_RECOVERING) goto retry_lock; - - mlog(0, "%s: got RECOVERING " - "for $RECOVERY lock, master " - "was %u\n", dlm->name, - res->owner); /* wait to see the node go down, then * drop down and allow the lockres to * get cleaned up. need to remaster. */ @@ -741,6 +731,14 @@ retry_lock: } } + /* Inflight taken in dlm_get_lock_resource() is dropped here */ + spin_lock(&res->spinlock); + dlm_lockres_drop_inflight_ref(dlm, res); + spin_unlock(&res->spinlock); + + dlm_lockres_calc_usage(dlm, res); + dlm_kick_thread(dlm, res); + if (status != DLM_NORMAL) { lock->lksb->flags &= ~DLM_LKSB_GET_LVB; if (status != DLM_NOTQUEUED) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 11eefb8c12e9..005261c333b0 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -631,39 +631,54 @@ error: return NULL; } -void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - int new_lockres, - const char *file, - int line) +void dlm_lockres_set_refmap_bit(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, int bit) { - if (!new_lockres) - assert_spin_locked(&res->spinlock); + assert_spin_locked(&res->spinlock); + + mlog(0, "res %.*s, set node %u, %ps()\n", res->lockname.len, + res->lockname.name, bit, __builtin_return_address(0)); + + set_bit(bit, res->refmap); +} + +void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, int bit) +{ + assert_spin_locked(&res->spinlock); + + mlog(0, "res %.*s, clr node %u, %ps()\n", res->lockname.len, + res->lockname.name, bit, __builtin_return_address(0)); + + clear_bit(bit, res->refmap); +} + + +void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res) +{ + assert_spin_locked(&res->spinlock); - if (!test_bit(dlm->node_num, res->refmap)) { - BUG_ON(res->inflight_locks != 0); - dlm_lockres_set_refmap_bit(dlm->node_num, res); - } res->inflight_locks++; - mlog(0, "%s:%.*s: inflight++: now %u\n", - dlm->name, res->lockname.len, res->lockname.name, - res->inflight_locks); + + mlog(0, "%s: res %.*s, inflight++: now %u, %ps()\n", dlm->name, + res->lockname.len, res->lockname.name, res->inflight_locks, + __builtin_return_address(0)); } -void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - const char *file, - int line) +void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res) { assert_spin_locked(&res->spinlock); BUG_ON(res->inflight_locks == 0); + res->inflight_locks--; - mlog(0, "%s:%.*s: inflight--: now %u\n", - dlm->name, res->lockname.len, res->lockname.name, - res->inflight_locks); - if (res->inflight_locks == 0) - dlm_lockres_clear_refmap_bit(dlm->node_num, res); + + mlog(0, "%s: res %.*s, inflight--: now %u, %ps()\n", dlm->name, + res->lockname.len, res->lockname.name, res->inflight_locks, + __builtin_return_address(0)); + wake_up(&res->wq); } @@ -697,7 +712,6 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, unsigned int hash; int tries = 0; int bit, wait_on_recovery = 0; - int drop_inflight_if_nonlocal = 0; BUG_ON(!lockid); @@ -709,36 +723,33 @@ lookup: spin_lock(&dlm->spinlock); tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash); if (tmpres) { - int dropping_ref = 0; - spin_unlock(&dlm->spinlock); - spin_lock(&tmpres->spinlock); - /* We wait for the other thread that is mastering the resource */ + /* Wait on the thread that is mastering the resource */ if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { __dlm_wait_on_lockres(tmpres); BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN); + spin_unlock(&tmpres->spinlock); + dlm_lockres_put(tmpres); + tmpres = NULL; + goto lookup; } - if (tmpres->owner == dlm->node_num) { - BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF); - dlm_lockres_grab_inflight_ref(dlm, tmpres); - } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) - dropping_ref = 1; - spin_unlock(&tmpres->spinlock); - - /* wait until done messaging the master, drop our ref to allow - * the lockres to be purged, start over. */ - if (dropping_ref) { - spin_lock(&tmpres->spinlock); - __dlm_wait_on_lockres_flags(tmpres, DLM_LOCK_RES_DROPPING_REF); + /* Wait on the resource purge to complete before continuing */ + if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) { + BUG_ON(tmpres->owner == dlm->node_num); + __dlm_wait_on_lockres_flags(tmpres, + DLM_LOCK_RES_DROPPING_REF); spin_unlock(&tmpres->spinlock); dlm_lockres_put(tmpres); tmpres = NULL; goto lookup; } - mlog(0, "found in hash!\n"); + /* Grab inflight ref to pin the resource */ + dlm_lockres_grab_inflight_ref(dlm, tmpres); + + spin_unlock(&tmpres->spinlock); if (res) dlm_lockres_put(res); res = tmpres; @@ -829,8 +840,8 @@ lookup: * but they might own this lockres. wait on them. */ bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); if (bit < O2NM_MAX_NODES) { - mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to " - "recover before lock mastery can begin\n", + mlog(0, "%s: res %.*s, At least one node (%d) " + "to recover before lock mastery can begin\n", dlm->name, namelen, (char *)lockid, bit); wait_on_recovery = 1; } @@ -843,12 +854,11 @@ lookup: /* finally add the lockres to its hash bucket */ __dlm_insert_lockres(dlm, res); - /* since this lockres is new it doesn't not require the spinlock */ - dlm_lockres_grab_inflight_ref_new(dlm, res); - /* if this node does not become the master make sure to drop - * this inflight reference below */ - drop_inflight_if_nonlocal = 1; + /* Grab inflight ref to pin the resource */ + spin_lock(&res->spinlock); + dlm_lockres_grab_inflight_ref(dlm, res); + spin_unlock(&res->spinlock); /* get an extra ref on the mle in case this is a BLOCK * if so, the creator of the BLOCK may try to put the last @@ -864,8 +874,8 @@ redo_request: * dlm spinlock would be detectable be a change on the mle, * so we only need to clear out the recovery map once. */ if (dlm_is_recovery_lock(lockid, namelen)) { - mlog(ML_NOTICE, "%s: recovery map is not empty, but " - "must master $RECOVERY lock now\n", dlm->name); + mlog(0, "%s: Recovery map is not empty, but must " + "master $RECOVERY lock now\n", dlm->name); if (!dlm_pre_master_reco_lockres(dlm, res)) wait_on_recovery = 0; else { @@ -883,8 +893,8 @@ redo_request: spin_lock(&dlm->spinlock); bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); if (bit < O2NM_MAX_NODES) { - mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to " - "recover before lock mastery can begin\n", + mlog(0, "%s: res %.*s, At least one node (%d) " + "to recover before lock mastery can begin\n", dlm->name, namelen, (char *)lockid, bit); wait_on_recovery = 1; } else @@ -913,8 +923,8 @@ redo_request: * yet, keep going until it does. this is how the * master will know that asserts are needed back to * the lower nodes. */ - mlog(0, "%s:%.*s: requests only up to %u but master " - "is %u, keep going\n", dlm->name, namelen, + mlog(0, "%s: res %.*s, Requests only up to %u but " + "master is %u, keep going\n", dlm->name, namelen, lockid, nodenum, mle->master); } } @@ -924,13 +934,12 @@ wait: ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked); if (ret < 0) { wait_on_recovery = 1; - mlog(0, "%s:%.*s: node map changed, redo the " - "master request now, blocked=%d\n", - dlm->name, res->lockname.len, + mlog(0, "%s: res %.*s, Node map changed, redo the master " + "request now, blocked=%d\n", dlm->name, res->lockname.len, res->lockname.name, blocked); if (++tries > 20) { - mlog(ML_ERROR, "%s:%.*s: spinning on " - "dlm_wait_for_lock_mastery, blocked=%d\n", + mlog(ML_ERROR, "%s: res %.*s, Spinning on " + "dlm_wait_for_lock_mastery, blocked = %d\n", dlm->name, res->lockname.len, res->lockname.name, blocked); dlm_print_one_lock_resource(res); @@ -940,7 +949,8 @@ wait: goto redo_request; } - mlog(0, "lockres mastered by %u\n", res->owner); + mlog(0, "%s: res %.*s, Mastered by %u\n", dlm->name, res->lockname.len, + res->lockname.name, res->owner); /* make sure we never continue without this */ BUG_ON(res->owner == O2NM_MAX_NODES); @@ -952,8 +962,6 @@ wait: wake_waiters: spin_lock(&res->spinlock); - if (res->owner != dlm->node_num && drop_inflight_if_nonlocal) - dlm_lockres_drop_inflight_ref(dlm, res); res->state &= ~DLM_LOCK_RES_IN_PROGRESS; spin_unlock(&res->spinlock); wake_up(&res->wq); @@ -1426,9 +1434,7 @@ way_up_top: } if (res->owner == dlm->node_num) { - mlog(0, "%s:%.*s: setting bit %u in refmap\n", - dlm->name, namelen, name, request->node_idx); - dlm_lockres_set_refmap_bit(request->node_idx, res); + dlm_lockres_set_refmap_bit(dlm, res, request->node_idx); spin_unlock(&res->spinlock); response = DLM_MASTER_RESP_YES; if (mle) @@ -1493,10 +1499,8 @@ way_up_top: * go back and clean the mles on any * other nodes */ dispatch_assert = 1; - dlm_lockres_set_refmap_bit(request->node_idx, res); - mlog(0, "%s:%.*s: setting bit %u in refmap\n", - dlm->name, namelen, name, - request->node_idx); + dlm_lockres_set_refmap_bit(dlm, res, + request->node_idx); } else response = DLM_MASTER_RESP_NO; } else { @@ -1702,7 +1706,7 @@ again: "lockres, set the bit in the refmap\n", namelen, lockname, to); spin_lock(&res->spinlock); - dlm_lockres_set_refmap_bit(to, res); + dlm_lockres_set_refmap_bit(dlm, res, to); spin_unlock(&res->spinlock); } } @@ -2187,8 +2191,6 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) namelen = res->lockname.len; BUG_ON(namelen > O2NM_MAX_NAME_LEN); - mlog(0, "%s:%.*s: sending deref to %d\n", - dlm->name, namelen, lockname, res->owner); memset(&deref, 0, sizeof(deref)); deref.node_idx = dlm->node_num; deref.namelen = namelen; @@ -2197,14 +2199,12 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key, &deref, sizeof(deref), res->owner, &r); if (ret < 0) - mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " - "node %u\n", ret, DLM_DEREF_LOCKRES_MSG, dlm->key, - res->owner); + mlog(ML_ERROR, "%s: res %.*s, error %d send DEREF to node %u\n", + dlm->name, namelen, lockname, ret, res->owner); else if (r < 0) { /* BAD. other node says I did not have a ref. */ - mlog(ML_ERROR,"while dropping ref on %s:%.*s " - "(master=%u) got %d.\n", dlm->name, namelen, - lockname, res->owner, r); + mlog(ML_ERROR, "%s: res %.*s, DEREF to node %u got %d\n", + dlm->name, namelen, lockname, res->owner, r); dlm_print_one_lock_resource(res); BUG(); } @@ -2260,7 +2260,7 @@ int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, else { BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); if (test_bit(node, res->refmap)) { - dlm_lockres_clear_refmap_bit(node, res); + dlm_lockres_clear_refmap_bit(dlm, res, node); cleared = 1; } } @@ -2320,7 +2320,7 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); if (test_bit(node, res->refmap)) { __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); - dlm_lockres_clear_refmap_bit(node, res); + dlm_lockres_clear_refmap_bit(dlm, res, node); cleared = 1; } spin_unlock(&res->spinlock); @@ -2802,7 +2802,8 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, BUG_ON(!list_empty(&lock->bast_list)); BUG_ON(lock->ast_pending); BUG_ON(lock->bast_pending); - dlm_lockres_clear_refmap_bit(lock->ml.node, res); + dlm_lockres_clear_refmap_bit(dlm, res, + lock->ml.node); list_del_init(&lock->list); dlm_lock_put(lock); /* In a normal unlock, we would have added a @@ -2823,7 +2824,7 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, mlog(0, "%s:%.*s: node %u had a ref to this " "migrating lockres, clearing\n", dlm->name, res->lockname.len, res->lockname.name, bit); - dlm_lockres_clear_refmap_bit(bit, res); + dlm_lockres_clear_refmap_bit(dlm, res, bit); } bit++; } @@ -2916,9 +2917,9 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, &migrate, sizeof(migrate), nodenum, &status); if (ret < 0) { - mlog(ML_ERROR, "Error %d when sending message %u (key " - "0x%x) to node %u\n", ret, DLM_MIGRATE_REQUEST_MSG, - dlm->key, nodenum); + mlog(ML_ERROR, "%s: res %.*s, Error %d send " + "MIGRATE_REQUEST to node %u\n", dlm->name, + migrate.namelen, migrate.name, ret, nodenum); if (!dlm_is_host_down(ret)) { mlog(ML_ERROR, "unhandled error=%d!\n", ret); BUG(); @@ -2937,7 +2938,7 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, dlm->name, res->lockname.len, res->lockname.name, nodenum); spin_lock(&res->spinlock); - dlm_lockres_set_refmap_bit(nodenum, res); + dlm_lockres_set_refmap_bit(dlm, res, nodenum); spin_unlock(&res->spinlock); } } @@ -3271,7 +3272,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, * mastery reference here since old_master will briefly have * a reference after the migration completes */ spin_lock(&res->spinlock); - dlm_lockres_set_refmap_bit(old_master, res); + dlm_lockres_set_refmap_bit(dlm, res, old_master); spin_unlock(&res->spinlock); mlog(0, "now time to do a migrate request to other nodes\n"); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 7efab6d28a21..01ebfd0bdad7 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -362,40 +362,38 @@ static int dlm_is_node_recovered(struct dlm_ctxt *dlm, u8 node) } -int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) +void dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) { - if (timeout) { - mlog(ML_NOTICE, "%s: waiting %dms for notification of " - "death of node %u\n", dlm->name, timeout, node); + if (dlm_is_node_dead(dlm, node)) + return; + + printk(KERN_NOTICE "o2dlm: Waiting on the death of node %u in " + "domain %s\n", node, dlm->name); + + if (timeout) wait_event_timeout(dlm->dlm_reco_thread_wq, - dlm_is_node_dead(dlm, node), - msecs_to_jiffies(timeout)); - } else { - mlog(ML_NOTICE, "%s: waiting indefinitely for notification " - "of death of node %u\n", dlm->name, node); + dlm_is_node_dead(dlm, node), + msecs_to_jiffies(timeout)); + else wait_event(dlm->dlm_reco_thread_wq, dlm_is_node_dead(dlm, node)); - } - /* for now, return 0 */ - return 0; } -int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout) +void dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout) { - if (timeout) { - mlog(0, "%s: waiting %dms for notification of " - "recovery of node %u\n", dlm->name, timeout, node); + if (dlm_is_node_recovered(dlm, node)) + return; + + printk(KERN_NOTICE "o2dlm: Waiting on the recovery of node %u in " + "domain %s\n", node, dlm->name); + + if (timeout) wait_event_timeout(dlm->dlm_reco_thread_wq, - dlm_is_node_recovered(dlm, node), - msecs_to_jiffies(timeout)); - } else { - mlog(0, "%s: waiting indefinitely for notification " - "of recovery of node %u\n", dlm->name, node); + dlm_is_node_recovered(dlm, node), + msecs_to_jiffies(timeout)); + else wait_event(dlm->dlm_reco_thread_wq, dlm_is_node_recovered(dlm, node)); - } - /* for now, return 0 */ - return 0; } /* callers of the top-level api calls (dlmlock/dlmunlock) should @@ -430,6 +428,8 @@ static void dlm_begin_recovery(struct dlm_ctxt *dlm) { spin_lock(&dlm->spinlock); BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE); + printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n", + dlm->name, dlm->reco.dead_node); dlm->reco.state |= DLM_RECO_STATE_ACTIVE; spin_unlock(&dlm->spinlock); } @@ -440,9 +440,18 @@ static void dlm_end_recovery(struct dlm_ctxt *dlm) BUG_ON(!(dlm->reco.state & DLM_RECO_STATE_ACTIVE)); dlm->reco.state &= ~DLM_RECO_STATE_ACTIVE; spin_unlock(&dlm->spinlock); + printk(KERN_NOTICE "o2dlm: End recovery on domain %s\n", dlm->name); wake_up(&dlm->reco.event); } +static void dlm_print_recovery_master(struct dlm_ctxt *dlm) +{ + printk(KERN_NOTICE "o2dlm: Node %u (%s) is the Recovery Master for the " + "dead node %u in domain %s\n", dlm->reco.new_master, + (dlm->node_num == dlm->reco.new_master ? "me" : "he"), + dlm->reco.dead_node, dlm->name); +} + static int dlm_do_recovery(struct dlm_ctxt *dlm) { int status = 0; @@ -505,9 +514,8 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) } mlog(0, "another node will master this recovery session.\n"); } - mlog(0, "dlm=%s (%d), new_master=%u, this node=%u, dead_node=%u\n", - dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.new_master, - dlm->node_num, dlm->reco.dead_node); + + dlm_print_recovery_master(dlm); /* it is safe to start everything back up here * because all of the dead node's lock resources @@ -518,15 +526,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) return 0; master_here: - mlog(ML_NOTICE, "(%d) Node %u is the Recovery Master for the Dead Node " - "%u for Domain %s\n", task_pid_nr(dlm->dlm_reco_thread_task), - dlm->node_num, dlm->reco.dead_node, dlm->name); + dlm_print_recovery_master(dlm); status = dlm_remaster_locks(dlm, dlm->reco.dead_node); if (status < 0) { /* we should never hit this anymore */ - mlog(ML_ERROR, "error %d remastering locks for node %u, " - "retrying.\n", status, dlm->reco.dead_node); + mlog(ML_ERROR, "%s: Error %d remastering locks for node %u, " + "retrying.\n", dlm->name, status, dlm->reco.dead_node); /* yield a bit to allow any final network messages * to get handled on remaining nodes */ msleep(100); @@ -567,7 +573,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); ndata->state = DLM_RECO_NODE_DATA_REQUESTING; - mlog(0, "requesting lock info from node %u\n", + mlog(0, "%s: Requesting lock info from node %u\n", dlm->name, ndata->node_num); if (ndata->node_num == dlm->node_num) { @@ -640,7 +646,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) spin_unlock(&dlm_reco_state_lock); } - mlog(0, "done requesting all lock info\n"); + mlog(0, "%s: Done requesting all lock info\n", dlm->name); /* nodes should be sending reco data now * just need to wait */ @@ -802,10 +808,9 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, /* negative status is handled by caller */ if (ret < 0) - mlog(ML_ERROR, "Error %d when sending message %u (key " - "0x%x) to node %u\n", ret, DLM_LOCK_REQUEST_MSG, - dlm->key, request_from); - + mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u " + "to recover dead node %u\n", dlm->name, ret, + request_from, dead_node); // return from here, then // sleep until all received or error return ret; @@ -956,9 +961,9 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to) ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg, sizeof(done_msg), send_to, &tmpret); if (ret < 0) { - mlog(ML_ERROR, "Error %d when sending message %u (key " - "0x%x) to node %u\n", ret, DLM_RECO_DATA_DONE_MSG, - dlm->key, send_to); + mlog(ML_ERROR, "%s: Error %d send RECO_DATA_DONE to node %u " + "to recover dead node %u\n", dlm->name, ret, send_to, + dead_node); if (!dlm_is_host_down(ret)) { BUG(); } @@ -1127,9 +1132,11 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, if (ret < 0) { /* XXX: negative status is not handled. * this will end up killing this node. */ - mlog(ML_ERROR, "Error %d when sending message %u (key " - "0x%x) to node %u\n", ret, DLM_MIG_LOCKRES_MSG, - dlm->key, send_to); + mlog(ML_ERROR, "%s: res %.*s, Error %d send MIG_LOCKRES to " + "node %u (%s)\n", dlm->name, mres->lockname_len, + mres->lockname, ret, send_to, + (orig_flags & DLM_MRES_MIGRATION ? + "migration" : "recovery")); } else { /* might get an -ENOMEM back here */ ret = status; @@ -1767,7 +1774,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, dlm->name, mres->lockname_len, mres->lockname, from); spin_lock(&res->spinlock); - dlm_lockres_set_refmap_bit(from, res); + dlm_lockres_set_refmap_bit(dlm, res, from); spin_unlock(&res->spinlock); added++; break; @@ -1965,7 +1972,7 @@ skip_lvb: mlog(0, "%s:%.*s: added lock for node %u, " "setting refmap bit\n", dlm->name, res->lockname.len, res->lockname.name, ml->node); - dlm_lockres_set_refmap_bit(ml->node, res); + dlm_lockres_set_refmap_bit(dlm, res, ml->node); added++; } spin_unlock(&res->spinlock); @@ -2084,6 +2091,9 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { if (res->owner == dead_node) { + mlog(0, "%s: res %.*s, Changing owner from %u to %u\n", + dlm->name, res->lockname.len, res->lockname.name, + res->owner, new_master); list_del_init(&res->recovering); spin_lock(&res->spinlock); /* new_master has our reference from @@ -2105,40 +2115,30 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_lockres_hash(dlm, i); hlist_for_each_entry(res, hash_iter, bucket, hash_node) { - if (res->state & DLM_LOCK_RES_RECOVERING) { - if (res->owner == dead_node) { - mlog(0, "(this=%u) res %.*s owner=%u " - "was not on recovering list, but " - "clearing state anyway\n", - dlm->node_num, res->lockname.len, - res->lockname.name, new_master); - } else if (res->owner == dlm->node_num) { - mlog(0, "(this=%u) res %.*s owner=%u " - "was not on recovering list, " - "owner is THIS node, clearing\n", - dlm->node_num, res->lockname.len, - res->lockname.name, new_master); - } else - continue; + if (!(res->state & DLM_LOCK_RES_RECOVERING)) + continue; - if (!list_empty(&res->recovering)) { - mlog(0, "%s:%.*s: lockres was " - "marked RECOVERING, owner=%u\n", - dlm->name, res->lockname.len, - res->lockname.name, res->owner); - list_del_init(&res->recovering); - dlm_lockres_put(res); - } - spin_lock(&res->spinlock); - /* new_master has our reference from - * the lock state sent during recovery */ - dlm_change_lockres_owner(dlm, res, new_master); - res->state &= ~DLM_LOCK_RES_RECOVERING; - if (__dlm_lockres_has_locks(res)) - __dlm_dirty_lockres(dlm, res); - spin_unlock(&res->spinlock); - wake_up(&res->wq); + if (res->owner != dead_node && + res->owner != dlm->node_num) + continue; + + if (!list_empty(&res->recovering)) { + list_del_init(&res->recovering); + dlm_lockres_put(res); } + + /* new_master has our reference from + * the lock state sent during recovery */ + mlog(0, "%s: res %.*s, Changing owner from %u to %u\n", + dlm->name, res->lockname.len, res->lockname.name, + res->owner, new_master); + spin_lock(&res->spinlock); + dlm_change_lockres_owner(dlm, res, new_master); + res->state &= ~DLM_LOCK_RES_RECOVERING; + if (__dlm_lockres_has_locks(res)) + __dlm_dirty_lockres(dlm, res); + spin_unlock(&res->spinlock); + wake_up(&res->wq); } } } @@ -2252,12 +2252,12 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, res->lockname.len, res->lockname.name, freed, dead_node); __dlm_print_one_lock_resource(res); } - dlm_lockres_clear_refmap_bit(dead_node, res); + dlm_lockres_clear_refmap_bit(dlm, res, dead_node); } else if (test_bit(dead_node, res->refmap)) { mlog(0, "%s:%.*s: dead node %u had a ref, but had " "no locks and had not purged before dying\n", dlm->name, res->lockname.len, res->lockname.name, dead_node); - dlm_lockres_clear_refmap_bit(dead_node, res); + dlm_lockres_clear_refmap_bit(dlm, res, dead_node); } /* do not kick thread yet */ @@ -2324,9 +2324,9 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) dlm_revalidate_lvb(dlm, res, dead_node); if (res->owner == dead_node) { if (res->state & DLM_LOCK_RES_DROPPING_REF) { - mlog(ML_NOTICE, "Ignore %.*s for " + mlog(ML_NOTICE, "%s: res %.*s, Skip " "recovery as it is being freed\n", - res->lockname.len, + dlm->name, res->lockname.len, res->lockname.name); } else dlm_move_lockres_to_recovery_list(dlm, diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 1d6d1d22c471..e73c833fc2a1 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -94,24 +94,26 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res) { int bit; + assert_spin_locked(&res->spinlock); + if (__dlm_lockres_has_locks(res)) return 0; + /* Locks are in the process of being created */ + if (res->inflight_locks) + return 0; + if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) return 0; if (res->state & DLM_LOCK_RES_RECOVERING) return 0; + /* Another node has this resource with this node as the master */ bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); if (bit < O2NM_MAX_NODES) return 0; - /* - * since the bit for dlm->node_num is not set, inflight_locks better - * be zero - */ - BUG_ON(res->inflight_locks != 0); return 1; } @@ -185,8 +187,6 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm, /* clear our bit from the master's refmap, ignore errors */ ret = dlm_drop_lockres_ref(dlm, res); if (ret < 0) { - mlog(ML_ERROR, "%s: deref %.*s failed %d\n", dlm->name, - res->lockname.len, res->lockname.name, ret); if (!dlm_is_host_down(ret)) BUG(); } @@ -209,7 +209,7 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm, BUG(); } - __dlm_unhash_lockres(res); + __dlm_unhash_lockres(dlm, res); /* lockres is not in the hash now. drop the flag and wake up * any processes waiting in dlm_get_lock_resource. */ |