diff options
author | Nick Piggin <npiggin@suse.de> | 2010-01-29 15:38:34 -0800 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2010-04-27 17:32:54 +0200 |
commit | c8c34fe3ba9b739aa495e2f295ac8d46b479567a (patch) | |
tree | 1605f9ecb04aeb0d418004e07d4f2b33e13f7c66 | |
parent | d48369f49733ab843c216f41519eebc4fb1a5357 (diff) | |
download | lwn-c8c34fe3ba9b739aa495e2f295ac8d46b479567a.tar.gz lwn-c8c34fe3ba9b739aa495e2f295ac8d46b479567a.zip |
fs-sb-inodes-percpu
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | fs/drop_caches.c | 37 | ||||
-rw-r--r-- | fs/fs-writeback.c | 80 | ||||
-rw-r--r-- | fs/inode.c | 128 | ||||
-rw-r--r-- | fs/notify/inode_mark.c | 112 | ||||
-rw-r--r-- | fs/notify/inotify/inotify.c | 134 | ||||
-rw-r--r-- | fs/quota/dquot.c | 102 | ||||
-rw-r--r-- | fs/super.c | 19 | ||||
-rw-r--r-- | include/linux/fs.h | 10 | ||||
-rw-r--r-- | include/linux/fsnotify_backend.h | 4 | ||||
-rw-r--r-- | include/linux/inotify.h | 4 |
10 files changed, 377 insertions, 253 deletions
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 9962d37d69fc..ee55bbf3bd37 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -14,26 +14,35 @@ int sysctl_drop_caches; static void drop_pagecache_sb(struct super_block *sb) { - struct inode *inode, *toput_inode = NULL; + int i; - rcu_read_lock(); - list_for_each_entry_rcu(inode, &sb->s_inodes, i_sb_list) { - spin_lock(&inode->i_lock); - if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW) - || inode->i_mapping->nrpages == 0) { + for_each_possible_cpu(i) { + struct inode *inode, *toput_inode = NULL; + struct list_head *list; +#ifdef CONFIG_SMP + list = per_cpu_ptr(sb->s_inodes, i); +#else + list = &sb->s_inodes; +#endif + rcu_read_lock(); + list_for_each_entry_rcu(inode, list, i_sb_list) { + spin_lock(&inode->i_lock); + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW) + || inode->i_mapping->nrpages == 0) { + spin_unlock(&inode->i_lock); + continue; + } + __iget(inode); spin_unlock(&inode->i_lock); - continue; + rcu_read_unlock(); + invalidate_mapping_pages(inode->i_mapping, 0, -1); + iput(toput_inode); + toput_inode = inode; + rcu_read_lock(); } - __iget(inode); - spin_unlock(&inode->i_lock); rcu_read_unlock(); - invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); - toput_inode = inode; - rcu_read_lock(); } - rcu_read_unlock(); - iput(toput_inode); } static void drop_pagecache(void) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e5333cf53a5c..5fc0515a0e85 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1142,7 +1142,7 @@ EXPORT_SYMBOL(__mark_inode_dirty); */ static void wait_sb_inodes(struct super_block *sb) { - struct inode *inode, *old_inode = NULL; + int i; /* * We need to be protected against the filesystem going from @@ -1150,47 +1150,57 @@ static void wait_sb_inodes(struct super_block *sb) */ WARN_ON(!rwsem_is_locked(&sb->s_umount)); - /* - * Data integrity sync. Must wait for all pages under writeback, - * because there may have been pages dirtied before our sync - * call, but which had writeout started before we write it out. - * In which case, the inode may not be on the dirty list, but - * we still have to wait for that writeout. - */ - rcu_read_lock(); - list_for_each_entry_rcu(inode, &sb->s_inodes, i_sb_list) { - struct address_space *mapping; + for_each_possible_cpu(i) { + struct inode *inode, *old_inode = NULL; + struct list_head *list; +#ifdef CONFIG_SMP + list = per_cpu_ptr(sb->s_inodes, i); +#else + list = &sb->s_inodes; +#endif + /* + * Data integrity sync. Must wait for all pages under writeback, + * because there may have been pages dirtied before our sync + * call, but which had writeout started before we write it out. + * In which case, the inode may not be on the dirty list, but + * we still have to wait for that writeout. + */ + rcu_read_lock(); + list_for_each_entry_rcu(inode, list, i_sb_list) { + struct address_space *mapping; - mapping = inode->i_mapping; - if (mapping->nrpages == 0) - continue; + mapping = inode->i_mapping; + if (mapping->nrpages == 0) + continue; - spin_lock(&inode->i_lock); - if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) { + spin_lock(&inode->i_lock); + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) { + spin_unlock(&inode->i_lock); + continue; + } + __iget(inode); spin_unlock(&inode->i_lock); - continue; - } - __iget(inode); - spin_unlock(&inode->i_lock); - rcu_read_unlock(); - /* - * We hold a reference to 'inode' so it couldn't have been - * removed from s_inodes list while we dropped the i_lock. We - * cannot iput the inode now as we can be holding the last - * reference and we cannot iput it under spinlock. So we keep - * the reference and iput it later. - */ - iput(old_inode); - old_inode = inode; + rcu_read_unlock(); + /* + * We hold a reference to 'inode' so it couldn't have + * been removed from s_inodes list while we dropped the + * i_lock. We cannot iput the inode now as we can be + * holding the last reference and we cannot iput it + * under spinlock. So we keep the reference and iput it + * later. + */ + iput(old_inode); + old_inode = inode; - filemap_fdatawait(mapping); + filemap_fdatawait(mapping); - cond_resched(); + cond_resched(); - rcu_read_lock(); + rcu_read_lock(); + } + rcu_read_unlock(); + iput(old_inode); } - rcu_read_unlock(); - iput(old_inode); } /** diff --git a/fs/inode.c b/fs/inode.c index 2c4d74aaeb2a..fdd682265869 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -88,7 +88,7 @@ static struct inode_hash_bucket *inode_hashtable __read_mostly; * NOTE! You also have to own the lock if you change * the i_state of an inode while it is in use.. */ -DEFINE_SPINLOCK(sb_inode_list_lock); +static DEFINE_PER_CPU(spinlock_t, inode_cpulock); DEFINE_SPINLOCK(wb_inode_list_lock); /* @@ -373,9 +373,7 @@ static void dispose_list(struct list_head *head) spin_lock(&inode->i_lock); __remove_inode_hash(inode); - spin_lock(&sb_inode_list_lock); - list_del_rcu(&inode->i_sb_list); - spin_unlock(&sb_inode_list_lock); + inode_sb_list_del(inode); spin_unlock(&inode->i_lock); wake_up_inode(inode); @@ -387,39 +385,49 @@ static void dispose_list(struct list_head *head) /* * Invalidate all inodes for a device. */ -static int invalidate_list(struct list_head *head, struct list_head *dispose) +static int invalidate_sb_inodes(struct super_block *sb, struct list_head *dispose) { - struct list_head *next; int busy = 0; + int i; - next = head->next; - for (;;) { - struct list_head *tmp = next; - struct inode *inode; + for_each_possible_cpu(i) { + struct list_head *next; + struct list_head *head; +#ifdef CONFIG_SMP + head = per_cpu_ptr(sb->s_inodes, i); +#else + head = &sb->s_inodes; +#endif - next = next->next; - if (tmp == head) - break; - inode = list_entry(tmp, struct inode, i_sb_list); - spin_lock(&inode->i_lock); - if (inode->i_state & I_NEW) { - spin_unlock(&inode->i_lock); - continue; - } - invalidate_inode_buffers(inode); - if (!inode->i_count) { - spin_lock(&wb_inode_list_lock); - list_del(&inode->i_list); - inodes_stat.nr_unused--; - spin_unlock(&wb_inode_list_lock); - WARN_ON(inode->i_state & I_NEW); - inode->i_state |= I_FREEING; + next = head->next; + for (;;) { + struct list_head *tmp = next; + struct inode *inode; + + next = next->next; + if (tmp == head) + break; + inode = list_entry(tmp, struct inode, i_sb_list); + spin_lock(&inode->i_lock); + if (inode->i_state & I_NEW) { + spin_unlock(&inode->i_lock); + continue; + } + invalidate_inode_buffers(inode); + if (!inode->i_count) { + spin_lock(&wb_inode_list_lock); + list_del(&inode->i_list); + inodes_stat.nr_unused--; + spin_unlock(&wb_inode_list_lock); + WARN_ON(inode->i_state & I_NEW); + inode->i_state |= I_FREEING; + spin_unlock(&inode->i_lock); + list_add(&inode->i_list, dispose); + continue; + } spin_unlock(&inode->i_lock); - list_add(&inode->i_list, dispose); - continue; + busy = 1; } - spin_unlock(&inode->i_lock); - busy = 1; } return busy; } @@ -444,9 +452,9 @@ int invalidate_inodes(struct super_block *sb) */ down_write(&iprune_sem); // spin_lock(&sb_inode_list_lock); XXX: is this safe? - inotify_unmount_inodes(&sb->s_inodes); - fsnotify_unmount_inodes(&sb->s_inodes); - busy = invalidate_list(&sb->s_inodes, &throw_away); + inotify_unmount_inodes(sb); + fsnotify_unmount_inodes(sb); + busy = invalidate_sb_inodes(sb, &throw_away); // spin_unlock(&sb_inode_list_lock); dispose_list(&throw_away); @@ -665,13 +673,47 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval) return tmp & I_HASHMASK; } +static void inode_sb_list_add(struct inode *inode, struct super_block *sb) +{ + spinlock_t *lock; + struct list_head *list; +#ifdef CONFIG_SMP + int cpu; +#endif + + lock = &get_cpu_var(inode_cpulock); +#ifdef CONFIG_SMP + cpu = smp_processor_id(); + list = per_cpu_ptr(sb->s_inodes, cpu); + inode->i_sb_list_cpu = cpu; +#else + list = &sb->s_files; +#endif + spin_lock(lock); + list_add_rcu(&inode->i_sb_list, list); + spin_unlock(lock); + put_cpu_var(inode_cpulock); +} + +void inode_sb_list_del(struct inode *inode) +{ + spinlock_t *lock; + +#ifdef CONFIG_SMP + lock = &per_cpu(inode_cpulock, inode->i_sb_list_cpu); +#else + lock = &__get_cpu_var(inode_cpulock); +#endif + spin_lock(lock); + list_del_rcu(&inode->i_sb_list); + spin_unlock(lock); +} + static inline void __inode_add_to_lists(struct super_block *sb, struct inode_hash_bucket *b, struct inode *inode) { - spin_lock(&sb_inode_list_lock); - list_add_rcu(&inode->i_sb_list, &sb->s_inodes); - spin_unlock(&sb_inode_list_lock); + inode_sb_list_add(inode, sb); percpu_counter_inc(&nr_inodes); if (b) { spin_lock(&b->lock); @@ -1349,9 +1391,7 @@ void generic_delete_inode(struct inode *inode) list_del_init(&inode->i_list); spin_unlock(&wb_inode_list_lock); } - spin_lock(&sb_inode_list_lock); - list_del_rcu(&inode->i_sb_list); - spin_unlock(&sb_inode_list_lock); + inode_sb_list_del(inode); percpu_counter_dec(&nr_inodes); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; @@ -1426,9 +1466,7 @@ int generic_detach_inode(struct inode *inode) inodes_stat.nr_unused--; spin_unlock(&wb_inode_list_lock); } - spin_lock(&sb_inode_list_lock); - list_del_rcu(&inode->i_sb_list); - spin_unlock(&sb_inode_list_lock); + inode_sb_list_del(inode); percpu_counter_dec(&nr_inodes); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; @@ -1750,6 +1788,10 @@ void __init inode_init(void) init_once); register_shrinker(&icache_shrinker); + for_each_possible_cpu(loop) { + spin_lock_init(&per_cpu(inode_cpulock, loop)); + } + /* Hash may have been set up in inode_init_early */ if (!hashdist) return; diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index c705b7934034..be1af807e67b 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -362,65 +362,75 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry, * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. * We temporarily drop inode_lock, however, and CAN block. */ -void fsnotify_unmount_inodes(struct list_head *list) +void fsnotify_unmount_inodes(struct super_block *sb) { - struct inode *inode, *next_i, *need_iput = NULL; - - list_for_each_entry_safe(inode, next_i, list, i_sb_list) { - struct inode *need_iput_tmp; - - spin_lock(&inode->i_lock); - /* - * We cannot __iget() an inode in state I_CLEAR, I_FREEING, - * I_WILL_FREE, or I_NEW which is fine because by that point - * the inode cannot have any associated watches. - */ - if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) { - spin_unlock(&inode->i_lock); - continue; - } + int i; + + for_each_possible_cpu(i) { + struct inode *inode, *next_i, *need_iput = NULL; + struct list_head *list; +#ifdef CONFIG_SMP + list = per_cpu_ptr(sb->s_inodes, i); +#else + list = &sb->s_inodes; +#endif + + list_for_each_entry_safe(inode, next_i, list, i_sb_list) { + struct inode *need_iput_tmp; + + spin_lock(&inode->i_lock); + /* + * We cannot __iget() an inode in state I_CLEAR, I_FREEING, + * I_WILL_FREE, or I_NEW which is fine because by that point + * the inode cannot have any associated watches. + */ + if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) { + spin_unlock(&inode->i_lock); + continue; + } + + /* + * If i_count is zero, the inode cannot have any watches and + * doing an __iget/iput with MS_ACTIVE clear would actually + * evict all inodes with zero i_count from icache which is + * unnecessarily violent and may in fact be illegal to do. + */ + if (!inode->i_count) { + spin_unlock(&inode->i_lock); + continue; + } - /* - * If i_count is zero, the inode cannot have any watches and - * doing an __iget/iput with MS_ACTIVE clear would actually - * evict all inodes with zero i_count from icache which is - * unnecessarily violent and may in fact be illegal to do. - */ - if (!inode->i_count) { + need_iput_tmp = need_iput; + need_iput = NULL; + + /* In case fsnotify_inode_delete() drops a reference. */ + if (inode != need_iput_tmp) { + __iget(inode); + } else + need_iput_tmp = NULL; spin_unlock(&inode->i_lock); - continue; - } - need_iput_tmp = need_iput; - need_iput = NULL; - - /* In case fsnotify_inode_delete() drops a reference. */ - if (inode != need_iput_tmp) { - __iget(inode); - } else - need_iput_tmp = NULL; - spin_unlock(&inode->i_lock); - - /* In case the dropping of a reference would nuke next_i. */ - if (&next_i->i_sb_list != list) { - spin_lock(&next_i->i_lock); - if (next_i->i_count && - !(next_i->i_state & - (I_CLEAR | I_FREEING | I_WILL_FREE))) { - __iget(next_i); - need_iput = next_i; + /* In case the dropping of a reference would nuke next_i. */ + if (&next_i->i_sb_list != list) { + spin_lock(&next_i->i_lock); + if (next_i->i_count && + !(next_i->i_state & + (I_CLEAR | I_FREEING | I_WILL_FREE))) { + __iget(next_i); + need_iput = next_i; + } + spin_unlock(&next_i->i_lock); } - spin_unlock(&next_i->i_lock); - } - if (need_iput_tmp) - iput(need_iput_tmp); + if (need_iput_tmp) + iput(need_iput_tmp); - /* for each watch, send FS_UNMOUNT and then remove it */ - fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + /* for each watch, send FS_UNMOUNT and then remove it */ + fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); - fsnotify_inode_delete(inode); + fsnotify_inode_delete(inode); - iput(inode); + iput(inode); + } } } diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c index 1c0bc7b681f7..e0a92bf683d6 100644 --- a/fs/notify/inotify/inotify.c +++ b/fs/notify/inotify/inotify.c @@ -385,76 +385,86 @@ EXPORT_SYMBOL_GPL(inotify_get_cookie); * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. * We temporarily drop inode_lock, however, and CAN block. */ -void inotify_unmount_inodes(struct list_head *list) -{ - struct inode *inode, *next_i, *need_iput = NULL; +void inotify_unmount_inodes(struct super_block *sb) +{ + int i; + + for_each_possible_cpu(i) { + struct inode *inode, *next_i, *need_iput = NULL; + struct list_head *list; +#ifdef CONFIG_SMP + list = per_cpu_ptr(sb->s_inodes, i); +#else + list = &sb->s_inodes; +#endif + + list_for_each_entry_safe(inode, next_i, list, i_sb_list) { + struct inotify_watch *watch, *next_w; + struct inode *need_iput_tmp; + struct list_head *watches; + + spin_lock(&inode->i_lock); + /* + * We cannot __iget() an inode in state I_CLEAR, I_FREEING, + * I_WILL_FREE, or I_NEW which is fine because by that point + * the inode cannot have any associated watches. + */ + if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) { + spin_unlock(&inode->i_lock); + continue; + } - list_for_each_entry_safe(inode, next_i, list, i_sb_list) { - struct inotify_watch *watch, *next_w; - struct inode *need_iput_tmp; - struct list_head *watches; + /* + * If i_count is zero, the inode cannot have any watches and + * doing an __iget/iput with MS_ACTIVE clear would actually + * evict all inodes with zero i_count from icache which is + * unnecessarily violent and may in fact be illegal to do. + */ + if (!inode->i_count) { + spin_unlock(&inode->i_lock); + continue; + } - spin_lock(&inode->i_lock); - /* - * We cannot __iget() an inode in state I_CLEAR, I_FREEING, - * I_WILL_FREE, or I_NEW which is fine because by that point - * the inode cannot have any associated watches. - */ - if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) { - spin_unlock(&inode->i_lock); - continue; - } + need_iput_tmp = need_iput; + need_iput = NULL; + /* In case inotify_remove_watch_locked() drops a reference. */ + if (inode != need_iput_tmp) { + __iget(inode); + } else + need_iput_tmp = NULL; - /* - * If i_count is zero, the inode cannot have any watches and - * doing an __iget/iput with MS_ACTIVE clear would actually - * evict all inodes with zero i_count from icache which is - * unnecessarily violent and may in fact be illegal to do. - */ - if (!inode->i_count) { spin_unlock(&inode->i_lock); - continue; - } - need_iput_tmp = need_iput; - need_iput = NULL; - /* In case inotify_remove_watch_locked() drops a reference. */ - if (inode != need_iput_tmp) { - __iget(inode); - } else - need_iput_tmp = NULL; - - spin_unlock(&inode->i_lock); - - /* In case the dropping of a reference would nuke next_i. */ - if (&next_i->i_sb_list != list) { - spin_lock(&next_i->i_lock); - if (next_i->i_count && - !(next_i->i_state & - (I_CLEAR|I_FREEING|I_WILL_FREE))) { - __iget(next_i); - need_iput = next_i; + /* In case the dropping of a reference would nuke next_i. */ + if (&next_i->i_sb_list != list) { + spin_lock(&next_i->i_lock); + if (next_i->i_count && + !(next_i->i_state & + (I_CLEAR|I_FREEING|I_WILL_FREE))) { + __iget(next_i); + need_iput = next_i; + } + spin_unlock(&next_i->i_lock); } - spin_unlock(&next_i->i_lock); - } - - if (need_iput_tmp) - iput(need_iput_tmp); - /* for each watch, send IN_UNMOUNT and then remove it */ - mutex_lock(&inode->inotify_mutex); - watches = &inode->inotify_watches; - list_for_each_entry_safe(watch, next_w, watches, i_list) { - struct inotify_handle *ih = watch->ih; - get_inotify_watch(watch); - mutex_lock(&ih->mutex); - ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, NULL, NULL); - inotify_remove_watch_locked(ih, watch); - mutex_unlock(&ih->mutex); - put_inotify_watch(watch); + if (need_iput_tmp) + iput(need_iput_tmp); + + /* for each watch, send IN_UNMOUNT and then remove it */ + mutex_lock(&inode->inotify_mutex); + watches = &inode->inotify_watches; + list_for_each_entry_safe(watch, next_w, watches, i_list) { + struct inotify_handle *ih = watch->ih; + get_inotify_watch(watch); + mutex_lock(&ih->mutex); + ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, NULL, NULL); + inotify_remove_watch_locked(ih, watch); + mutex_unlock(&ih->mutex); + put_inotify_watch(watch); + } + mutex_unlock(&inode->inotify_mutex); + iput(inode); } - mutex_unlock(&inode->inotify_mutex); - iput(inode); } } EXPORT_SYMBOL_GPL(inotify_unmount_inodes); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 7d5ecefc4091..4c6a62baa109 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -841,46 +841,55 @@ static int dqinit_needed(struct inode *inode, int type) /* This routine is guarded by dqonoff_mutex mutex */ static void add_dquot_ref(struct super_block *sb, int type) { - struct inode *inode, *old_inode = NULL; int reserved = 0; + int i; + for_each_possible_cpu(i) { + struct inode *inode, *old_inode = NULL; + struct list_head *list; +#ifdef CONFIG_SMP + list = per_cpu_ptr(sb->s_inodes, i); +#else + list = &sb->s_inodes; +#endif - rcu_read_lock(); - list_for_each_entry_rcu(inode, &sb->s_inodes, i_sb_list) { - spin_lock(&inode->i_lock); - if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) { - spin_unlock(&inode->i_lock); - continue; - } + rcu_read_lock(); + list_for_each_entry_rcu(inode, list, i_sb_list) { + spin_lock(&inode->i_lock); + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) { + spin_unlock(&inode->i_lock); + continue; + } - if (unlikely(inode_get_rsv_space(inode) > 0)) - reserved = 1; + if (unlikely(inode_get_rsv_space(inode) > 0)) + reserved = 1; - if (!atomic_read(&inode->i_writecount)) { - spin_unlock(&inode->i_lock); - continue; - } + if (!atomic_read(&inode->i_writecount)) { + spin_unlock(&inode->i_lock); + continue; + } + + if (!dqinit_needed(inode, type)) { + spin_unlock(&inode->i_lock); + continue; + } - if (!dqinit_needed(inode, type)) { + __iget(inode); spin_unlock(&inode->i_lock); - continue; + rcu_read_unlock(); + + iput(old_inode); + sb->dq_op->initialize(inode, type); + /* We hold a reference to 'inode' so it couldn't have been + * removed from s_inodes list while we dropped the inode_lock. + * We cannot iput the inode now as we can be holding the last + * reference and we cannot iput it under inode_lock. So we + * keep the reference and iput it later. */ + old_inode = inode; + rcu_read_lock(); } - - __iget(inode); - spin_unlock(&inode->i_lock); rcu_read_unlock(); - iput(old_inode); - sb->dq_op->initialize(inode, type); - /* We hold a reference to 'inode' so it couldn't have been - * removed from s_inodes list while we dropped the inode_lock. - * We cannot iput the inode now as we can be holding the last - * reference and we cannot iput it under inode_lock. So we - * keep the reference and iput it later. */ - old_inode = inode; - rcu_read_lock(); } - rcu_read_unlock(); - iput(old_inode); if (reserved) { printk(KERN_WARNING "VFS (%s): Writes happened before quota" @@ -953,20 +962,29 @@ static void put_dquot_list(struct list_head *tofree_head) static void remove_dquot_ref(struct super_block *sb, int type, struct list_head *tofree_head) { - struct inode *inode; + int i; + for_each_possible_cpu(i) { + struct inode *inode; + struct list_head *list; +#ifdef CONFIG_SMP + list = per_cpu_ptr(sb->s_inodes, i); +#else + list = &sb->s_inodes; +#endif - rcu_read_lock(); - list_for_each_entry_rcu(inode, &sb->s_inodes, i_sb_list) { - /* - * We have to scan also I_NEW inodes because they can already - * have quota pointer initialized. Luckily, we need to touch - * only quota pointers and these have separate locking - * (dqptr_sem). - */ - if (!IS_NOQUOTA(inode)) - remove_inode_dquot_ref(inode, type, tofree_head); + rcu_read_lock(); + list_for_each_entry_rcu(inode, list, i_sb_list) { + /* + * We have to scan also I_NEW inodes because they can already + * have quota pointer initialized. Luckily, we need to touch + * only quota pointers and these have separate locking + * (dqptr_sem). + */ + if (!IS_NOQUOTA(inode)) + remove_inode_dquot_ref(inode, type, tofree_head); + } + rcu_read_unlock(); } - rcu_read_unlock(); } /* Gather all references from inodes and drop them */ diff --git a/fs/super.c b/fs/super.c index 9bb1f386bc52..84c4aafd6fae 100644 --- a/fs/super.c +++ b/fs/super.c @@ -78,9 +78,25 @@ static struct super_block *alloc_super(struct file_system_type *type) #else INIT_LIST_HEAD(&s->s_files); #endif +#ifdef CONFIG_SMP + s->s_inodes = alloc_percpu(struct list_head); + if (!s->s_inodes) { + free_percpu(s->s_files); + security_sb_free(s); + kfree(s); + s = NULL; + goto out; + } else { + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(per_cpu_ptr(s->s_inodes, i)); + } +#else + INIT_LIST_HEAD(&s->s_inodes); +#endif INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); - INIT_LIST_HEAD(&s->s_inodes); INIT_LIST_HEAD(&s->s_dentry_lru); init_rwsem(&s->s_umount); mutex_init(&s->s_lock); @@ -133,6 +149,7 @@ out: static inline void destroy_super(struct super_block *s) { #ifdef CONFIG_SMP + free_percpu(s->s_inodes); free_percpu(s->s_files); #endif security_sb_free(s); diff --git a/include/linux/fs.h b/include/linux/fs.h index 63d5c2e7b398..94b8fa3f486e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -733,6 +733,9 @@ struct inode { struct rcu_head i_rcu; }; unsigned long i_ino; +#ifdef CONFIG_SMP + int i_sb_list_cpu; +#endif unsigned int i_count; unsigned int i_nlink; uid_t i_uid; @@ -1345,9 +1348,13 @@ struct super_block { #endif struct xattr_handler **s_xattr; - struct list_head s_inodes; /* all inodes */ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ #ifdef CONFIG_SMP + struct list_head *s_inodes; +#else + struct list_head s_inodes; /* all inodes */ +#endif +#ifdef CONFIG_SMP struct list_head *s_files; #else struct list_head s_files; @@ -2187,6 +2194,7 @@ extern struct inode *new_inode(struct super_block *); extern int should_remove_suid(struct dentry *); extern int file_remove_suid(struct file *); +extern void inode_sb_list_del(struct inode *inode); extern void __insert_inode_hash(struct inode *, unsigned long hashval); extern void __remove_inode_hash(struct inode *); extern void remove_inode_hash(struct inode *); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index a6db0585cf04..e776fb5ed01a 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -344,7 +344,7 @@ extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry); extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry); extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry); -extern void fsnotify_unmount_inodes(struct list_head *list); +extern void fsnotify_unmount_inodes(struct super_block *sb); /* put here because inotify does some weird stuff when destroying watches */ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, @@ -374,7 +374,7 @@ static inline u32 fsnotify_get_cookie(void) return 0; } -static inline void fsnotify_unmount_inodes(struct list_head *list) +static inline void fsnotify_unmount_inodes(struct super_block *sb) {} #endif /* CONFIG_FSNOTIFY */ diff --git a/include/linux/inotify.h b/include/linux/inotify.h index 37ea2894b3c0..e8bcd7c6c0cc 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -111,7 +111,7 @@ extern void inotify_inode_queue_event(struct inode *, __u32, __u32, const char *, struct inode *); extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32, const char *); -extern void inotify_unmount_inodes(struct list_head *); +extern void inotify_unmount_inodes(struct super_block *); extern void inotify_inode_is_dead(struct inode *); extern u32 inotify_get_cookie(void); @@ -161,7 +161,7 @@ static inline void inotify_dentry_parent_queue_event(struct dentry *dentry, { } -static inline void inotify_unmount_inodes(struct list_head *list) +static inline void inotify_unmount_inodes(struct super_block *sb) { } |