diff options
author | Nick Piggin <npiggin@suse.de> | 2010-01-29 15:38:28 -0800 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2010-04-27 17:32:43 +0200 |
commit | 6c2613987518fa790e4eb7b2c649051d61cdd599 (patch) | |
tree | 219a4fb2507b96a50fe17589b1e4f5a94d125fd9 | |
parent | 1bb96f6d4dece641a9e9d7b97b109f6930967067 (diff) | |
download | lwn-6c2613987518fa790e4eb7b2c649051d61cdd599.tar.gz lwn-6c2613987518fa790e4eb7b2c649051d61cdd599.zip |
fs-inode_lock-scale-6
Add a new lock, wb_inode_list_lock, to protect i_list and various lists
which the inode can be put onto.
XXX: haven't audited ocfs2
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | fs/fs-writeback.c | 33 | ||||
-rw-r--r-- | fs/inode.c | 43 | ||||
-rw-r--r-- | include/linux/writeback.h | 1 | ||||
-rw-r--r-- | mm/backing-dev.c | 4 |
4 files changed, 72 insertions, 9 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1e9ead755050..0b522bd51c09 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -286,6 +286,7 @@ static void redirty_tail(struct inode *inode) { struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; + assert_spin_locked(&wb_inode_list_lock); if (!list_empty(&wb->b_dirty)) { struct inode *tail; @@ -303,6 +304,7 @@ static void requeue_io(struct inode *inode) { struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; + assert_spin_locked(&wb_inode_list_lock); list_move(&inode->i_list, &wb->b_more_io); } @@ -343,6 +345,7 @@ static void move_expired_inodes(struct list_head *delaying_queue, struct inode *inode; int do_sb_sort = 0; + assert_spin_locked(&wb_inode_list_lock); while (!list_empty(delaying_queue)) { inode = list_entry(delaying_queue->prev, struct inode, i_list); if (older_than_this && @@ -398,11 +401,13 @@ static void inode_wait_for_writeback(struct inode *inode) wqh = bit_waitqueue(&inode->i_state, __I_SYNC); do { + spin_unlock(&wb_inode_list_lock); spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); spin_lock(&inode_lock); spin_lock(&inode->i_lock); + spin_lock(&wb_inode_list_lock); } while (inode->i_state & I_SYNC); } @@ -459,6 +464,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) inode->i_state |= I_SYNC; inode->i_state &= ~I_DIRTY; + spin_unlock(&wb_inode_list_lock); spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); @@ -479,6 +485,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_lock(&inode_lock); spin_lock(&inode->i_lock); + spin_lock(&wb_inode_list_lock); inode->i_state &= ~I_SYNC; if (!(inode->i_state & (I_FREEING | I_CLEAR))) { if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { @@ -622,6 +629,8 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, const unsigned long start = jiffies; /* livelock avoidance */ spin_lock(&inode_lock); +again: + spin_lock(&wb_inode_list_lock); if (!wbc->for_kupdate || list_empty(&wb->b_io)) queue_io(wb, wbc->older_than_this); @@ -631,15 +640,20 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, struct inode, i_list); long pages_skipped; + if (!spin_trylock(&inode->i_lock)) { + spin_unlock(&wb_inode_list_lock); + goto again; + } + /* * super block given and doesn't match, skip this inode */ if (sb && sb != inode->i_sb) { redirty_tail(inode); + spin_unlock(&inode->i_lock); continue; } - spin_lock(&inode->i_lock); if (inode->i_state & (I_NEW | I_WILL_FREE)) { spin_unlock(&inode->i_lock); requeue_io(inode); @@ -672,11 +686,13 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, */ redirty_tail(inode); } + spin_unlock(&wb_inode_list_lock); spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); iput(inode); cond_resched(); spin_lock(&inode_lock); + spin_lock(&wb_inode_list_lock); if (wbc->nr_to_write <= 0) { wbc->more_io = 1; break; @@ -684,6 +700,7 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, if (!list_empty(&wb->b_more_io)) wbc->more_io = 1; } + spin_unlock(&wb_inode_list_lock); unpin_sb_for_writeback(&pin_sb); @@ -799,12 +816,20 @@ static long wb_writeback(struct bdi_writeback *wb, * become available for writeback. Otherwise * we'll just busyloop. */ +retry: spin_lock(&inode_lock); + spin_lock(&wb_inode_list_lock); if (!list_empty(&wb->b_more_io)) { inode = list_entry(wb->b_more_io.prev, struct inode, i_list); + if (!spin_trylock(&inode->i_lock)) { + spin_unlock(&wb_inode_list_lock); + spin_unlock(&inode_lock); + goto retry; + } inode_wait_for_writeback(inode); } + spin_unlock(&wb_inode_list_lock); spin_unlock(&inode_lock); } @@ -1101,7 +1126,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) } inode->dirtied_when = jiffies; + spin_lock(&wb_inode_list_lock); list_move(&inode->i_list, &wb->b_dirty); + spin_unlock(&wb_inode_list_lock); } } out: @@ -1265,7 +1292,9 @@ int write_inode_now(struct inode *inode, int sync) might_sleep(); spin_lock(&inode_lock); spin_lock(&inode->i_lock); + spin_lock(&wb_inode_list_lock); ret = writeback_single_inode(inode, &wbc); + spin_unlock(&wb_inode_list_lock); spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); if (sync) @@ -1291,7 +1320,9 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) spin_lock(&inode_lock); spin_lock(&inode->i_lock); + spin_lock(&wb_inode_list_lock); ret = writeback_single_inode(inode, wbc); + spin_unlock(&wb_inode_list_lock); spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); return ret; diff --git a/fs/inode.c b/fs/inode.c index 99d664bafe9f..76494983644d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -86,6 +86,7 @@ static struct hlist_head *inode_hashtable __read_mostly; */ DEFINE_SPINLOCK(inode_lock); DEFINE_SPINLOCK(sb_inode_list_lock); +DEFINE_SPINLOCK(wb_inode_list_lock); DEFINE_SPINLOCK(inode_hash_lock); /* @@ -294,8 +295,11 @@ void __iget(struct inode *inode) if (inode->i_count > 1) return; - if (!(inode->i_state & (I_DIRTY|I_SYNC))) + if (!(inode->i_state & (I_DIRTY|I_SYNC))) { + spin_lock(&wb_inode_list_lock); list_move(&inode->i_list, &inode_in_use); + spin_unlock(&wb_inode_list_lock); + } inodes_stat.nr_unused--; } @@ -399,7 +403,9 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) } invalidate_inode_buffers(inode); if (!inode->i_count) { + spin_lock(&wb_inode_list_lock); list_move(&inode->i_list, dispose); + spin_unlock(&wb_inode_list_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; spin_unlock(&inode->i_lock); @@ -478,6 +484,8 @@ static void prune_icache(int nr_to_scan) down_read(&iprune_sem); spin_lock(&inode_lock); +again: + spin_lock(&wb_inode_list_lock); for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { struct inode *inode; @@ -486,13 +494,17 @@ static void prune_icache(int nr_to_scan) inode = list_entry(inode_unused.prev, struct inode, i_list); - spin_lock(&inode->i_lock); + if (!spin_trylock(&inode->i_lock)) { + spin_unlock(&wb_inode_list_lock); + goto again; + } if (inode->i_state || inode->i_count) { list_move(&inode->i_list, &inode_unused); spin_unlock(&inode->i_lock); continue; } if (inode_has_buffers(inode) || inode->i_data.nrpages) { + spin_unlock(&wb_inode_list_lock); __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&inode_lock); @@ -501,11 +513,16 @@ static void prune_icache(int nr_to_scan) 0, -1); iput(inode); spin_lock(&inode_lock); +again2: + spin_lock(&wb_inode_list_lock); if (inode != list_entry(inode_unused.next, struct inode, i_list)) continue; /* wrong inode or list_empty */ - spin_lock(&inode->i_lock); + if (!spin_trylock(&inode->i_lock)) { + spin_unlock(&wb_inode_list_lock); + goto again2; + } if (!can_unuse(inode)) { spin_unlock(&inode->i_lock); continue; @@ -523,6 +540,7 @@ static void prune_icache(int nr_to_scan) else __count_vm_events(PGINODESTEAL, reap); spin_unlock(&inode_lock); + spin_unlock(&wb_inode_list_lock); dispose_list(&freeable); up_read(&iprune_sem); @@ -646,7 +664,9 @@ __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, spin_lock(&sb_inode_list_lock); list_add(&inode->i_sb_list, &sb->s_inodes); spin_unlock(&sb_inode_list_lock); + spin_lock(&wb_inode_list_lock); list_add(&inode->i_list, &inode_in_use); + spin_unlock(&wb_inode_list_lock); if (head) { spin_lock(&inode_hash_lock); hlist_add_head(&inode->i_hash, head); @@ -1276,7 +1296,9 @@ void generic_delete_inode(struct inode *inode) { const struct super_operations *op = inode->i_sb->s_op; + spin_lock(&wb_inode_list_lock); list_del_init(&inode->i_list); + spin_unlock(&wb_inode_list_lock); list_del_init(&inode->i_sb_list); spin_unlock(&sb_inode_list_lock); WARN_ON(inode->i_state & I_NEW); @@ -1325,8 +1347,11 @@ int generic_detach_inode(struct inode *inode) struct super_block *sb = inode->i_sb; if (!hlist_unhashed(&inode->i_hash)) { - if (!(inode->i_state & (I_DIRTY|I_SYNC))) + if (!(inode->i_state & (I_DIRTY|I_SYNC))) { + spin_lock(&wb_inode_list_lock); list_move(&inode->i_list, &inode_unused); + spin_unlock(&wb_inode_list_lock); + } inodes_stat.nr_unused++; if (sb->s_flags & MS_ACTIVE) { spin_unlock(&inode->i_lock); @@ -1350,7 +1375,9 @@ int generic_detach_inode(struct inode *inode) hlist_del_init(&inode->i_hash); spin_unlock(&inode_hash_lock); } + spin_lock(&wb_inode_list_lock); list_del_init(&inode->i_list); + spin_unlock(&wb_inode_list_lock); list_del_init(&inode->i_sb_list); spin_unlock(&sb_inode_list_lock); WARN_ON(inode->i_state & I_NEW); @@ -1422,17 +1449,17 @@ void iput(struct inode *inode) if (inode) { BUG_ON(inode->i_state == I_CLEAR); -retry: +retry1: spin_lock(&inode->i_lock); if (inode->i_count == 1) { if (!spin_trylock(&inode_lock)) { +retry2: spin_unlock(&inode->i_lock); - goto retry; + goto retry1; } if (!spin_trylock(&sb_inode_list_lock)) { spin_unlock(&inode_lock); - spin_unlock(&inode->i_lock); - goto retry; + goto retry2; } inode->i_count--; iput_final(inode); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 5b9178fbb261..90ad0abb935f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -11,6 +11,7 @@ struct backing_dev_info; extern spinlock_t inode_lock; extern spinlock_t sb_inode_list_lock; +extern spinlock_t wb_inode_list_lock; extern spinlock_t inode_hash_lock; extern struct list_head inode_in_use; extern struct list_head inode_unused; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 0e8ca0347707..ea2a4e42cab1 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -72,6 +72,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) */ nr_wb = nr_dirty = nr_io = nr_more_io = 0; spin_lock(&inode_lock); + spin_lock(&wb_inode_list_lock); list_for_each_entry(wb, &bdi->wb_list, list) { nr_wb++; list_for_each_entry(inode, &wb->b_dirty, i_list) @@ -81,6 +82,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) list_for_each_entry(inode, &wb->b_more_io, i_list) nr_more_io++; } + spin_unlock(&wb_inode_list_lock); spin_unlock(&inode_lock); get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); @@ -697,9 +699,11 @@ void bdi_destroy(struct backing_dev_info *bdi) struct bdi_writeback *dst = &default_backing_dev_info.wb; spin_lock(&inode_lock); + spin_lock(&wb_inode_list_lock); list_splice(&bdi->wb.b_dirty, &dst->b_dirty); list_splice(&bdi->wb.b_io, &dst->b_io); list_splice(&bdi->wb.b_more_io, &dst->b_more_io); + spin_unlock(&wb_inode_list_lock); spin_unlock(&inode_lock); } |