summaryrefslogtreecommitdiff
path: root/fs/reiserfs
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-05-14 02:56:39 +0200
committerFrederic Weisbecker <fweisbec@gmail.com>2009-09-14 07:18:22 +0200
commit2ac626955ed62ee8596f00581f959cc86e6198d1 (patch)
treea5bd28e558b889ad7c026199f6032b2ff33f9103 /fs/reiserfs
parentc63e3c0b2498adec921b06c670d12c8c74b85538 (diff)
downloadlwn-2ac626955ed62ee8596f00581f959cc86e6198d1.tar.gz
lwn-2ac626955ed62ee8596f00581f959cc86e6198d1.zip
kill-the-bkl/reiserfs: unlock only when needed in search_by_key
search_by_key() is the site which most requires the lock. This is mostly because it is a very central function and also because it releases/reaqcuires the write lock at least once each time it is called. Such release/reacquire creates a lot of contention in this place and also opens more the window which let another thread changing the tree. When it happens, the current path searching over the tree must be retried from the beggining (the root) which is a wasteful and time consuming recovery. This patch factorizes two release/reacquire sequences: - reading leaf nodes blocks - reading current block The latter immediately follows the former. The whole sequence is safe as a single unlocked section because we check just after if the tree has changed during these operations. Cc: Jeff Mahoney <jeffm@suse.com> Cc: Chris Mason <chris.mason@oracle.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Alexander Beregalov <a.beregalov@gmail.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Diffstat (limited to 'fs/reiserfs')
-rw-r--r--fs/reiserfs/stree.c42
1 files changed, 34 insertions, 8 deletions
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 960c9114f6d3..6b025a42d510 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -519,12 +519,22 @@ static int is_tree_node(struct buffer_head *bh, int level)
#define SEARCH_BY_KEY_READA 16
-/* The function is NOT SCHEDULE-SAFE! */
-static void search_by_key_reada(struct super_block *s,
+/*
+ * The function is NOT SCHEDULE-SAFE!
+ * It might unlock the write lock if we needed to wait for a block
+ * to be read. Note that in this case it won't recover the lock to avoid
+ * high contention resulting from too much lock requests, especially
+ * the caller (search_by_key) will perform other schedule-unsafe
+ * operations just after calling this function.
+ *
+ * @return true if we have unlocked
+ */
+static bool search_by_key_reada(struct super_block *s,
struct buffer_head **bh,
b_blocknr_t *b, int num)
{
int i, j;
+ bool unlocked = false;
for (i = 0; i < num; i++) {
bh[i] = sb_getblk(s, b[i]);
@@ -536,16 +546,21 @@ static void search_by_key_reada(struct super_block *s,
* the lock. But it's still fine because we check later
* if the tree changed
*/
- reiserfs_write_unlock(s);
for (j = 0; j < i; j++) {
/*
* note, this needs attention if we are getting rid of the BKL
* you have to make sure the prepared bit isn't set on this buffer
*/
- if (!buffer_uptodate(bh[j]))
+ if (!buffer_uptodate(bh[j])) {
+ if (!unlocked) {
+ reiserfs_write_unlock(s);
+ unlocked = true;
+ }
ll_rw_block(READA, 1, bh + j);
+ }
brelse(bh[j]);
}
+ return unlocked;
}
/**************************************************************************
@@ -633,15 +648,26 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
have a pointer to it. */
if ((bh = last_element->pe_buffer =
sb_getblk(sb, block_number))) {
+ bool unlocked = false;
+
if (!buffer_uptodate(bh) && reada_count > 1)
- /* will unlock the write lock */
- search_by_key_reada(sb, reada_bh,
+ /* may unlock the write lock */
+ unlocked = search_by_key_reada(sb, reada_bh,
reada_blocks, reada_count);
- else
+ /*
+ * If we haven't already unlocked the write lock,
+ * then we need to do that here before reading
+ * the current block
+ */
+ if (!buffer_uptodate(bh) && !unlocked) {
reiserfs_write_unlock(sb);
+ unlocked = true;
+ }
ll_rw_block(READ, 1, &bh);
wait_on_buffer(bh);
- reiserfs_write_lock(sb);
+
+ if (unlocked)
+ reiserfs_write_lock(sb);
if (!buffer_uptodate(bh))
goto io_error;
} else {