summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjohn stultz <johnstul@us.ibm.com>2010-06-04 15:20:23 -0700
committerThomas Gleixner <tglx@linutronix.de>2010-06-08 20:02:30 +0200
commit697684652c217b241a07d9e261c7a20e9b072d43 (patch)
treeac2de77682f9e43def0f1b2c4eb05f0fb07ebb28
parent1f0f6ec523917972127caf33dee552990db1841e (diff)
downloadlwn-697684652c217b241a07d9e261c7a20e9b072d43.tar.gz
lwn-697684652c217b241a07d9e261c7a20e9b072d43.zip
dcache: Fix select_parent dentry traversal locking
With the vfs-scalability patches, the dcache_lock was replaced by the dentry->d_lock for protecting the dentry->d_subdirs as well as child dentries from being killed from under us. However, in select_parent, to minimize lock holds, when traversing down a directory, the parent d_lock was released. Then on ascending, the parent d_lock needed to be reacquired. This causes problems because we must aquire the parent lock first, so we would have to release the child lock, aquire the parent lock, then reaquire the next child lock. The problem being, when we released the child lock, we held no locks. If we were preempted here, the next dentry we were going to look at may be killed under us, and the d_subdirs list modified, so when we then reacquire the parent lock, the next value is then stale and we've lost our place in the d_subdirs list. This results in semi random null pointer dereferences and gpf oopses. To resolve this, I simply made select_parent continue to hold the parent lock as it descends down the directory tree. This avoids the lock acquisition dance on ascend and avoids the race illustrated above. This may results in higher contention due to us holding more locks, but its still better then the global dcache_lock originally being replaced. Signed-off-by: John Stultz <johnstul@us.ibm.com> Cc: John Kacur <jkacur@redhat.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: "Luis Claudio R. Goncalves" <lclaudio@uudg.org> Cc: Clark Williams <williams@redhat.com> Cc: Nick Piggin <npiggin@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--fs/dcache.c29
1 files changed, 12 insertions, 17 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index 23a3401af2fb..673cdc22ca46 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1052,12 +1052,12 @@ resume:
/*
* Descend a level if the d_subdirs list is non-empty.
+ * Note that we keep a hold on the parent lock while
+ * we descend, so we don't have to reacquire it on
+ * ascend.
*/
if (!list_empty(&dentry->d_subdirs)) {
- spin_unlock(&this_parent->d_lock);
- spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
this_parent = dentry;
- spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
goto repeat;
}
@@ -1071,25 +1071,20 @@ resume:
struct dentry *child;
tmp = this_parent->d_parent;
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- // d_unlinked(this_parent) || XXX
- read_seqretry(&rename_lock, seq)) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
- goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
+ spin_unlock(&this_parent->d_lock);
+ this_parent = tmp;
goto resume;
}
+
out:
+ /* Make sure we unlock all the way back up the tree */
+ while (this_parent != parent) {
+ struct dentry *tmp = this_parent->d_parent;
+ spin_unlock(&this_parent->d_lock);
+ this_parent = tmp;
+ }
spin_unlock(&this_parent->d_lock);
if (read_seqretry(&rename_lock, seq))
goto rename_retry;