summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2010-01-29 15:38:34 -0800
committerThomas Gleixner <tglx@linutronix.de>2010-04-27 17:32:54 +0200
commitc8c34fe3ba9b739aa495e2f295ac8d46b479567a (patch)
tree1605f9ecb04aeb0d418004e07d4f2b33e13f7c66
parentd48369f49733ab843c216f41519eebc4fb1a5357 (diff)
downloadlwn-c8c34fe3ba9b739aa495e2f295ac8d46b479567a.tar.gz
lwn-c8c34fe3ba9b739aa495e2f295ac8d46b479567a.zip
fs-sb-inodes-percpu
Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: John Stultz <johnstul@us.ibm.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--fs/drop_caches.c37
-rw-r--r--fs/fs-writeback.c80
-rw-r--r--fs/inode.c128
-rw-r--r--fs/notify/inode_mark.c112
-rw-r--r--fs/notify/inotify/inotify.c134
-rw-r--r--fs/quota/dquot.c102
-rw-r--r--fs/super.c19
-rw-r--r--include/linux/fs.h10
-rw-r--r--include/linux/fsnotify_backend.h4
-rw-r--r--include/linux/inotify.h4
10 files changed, 377 insertions, 253 deletions
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 9962d37d69fc..ee55bbf3bd37 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -14,26 +14,35 @@ int sysctl_drop_caches;
static void drop_pagecache_sb(struct super_block *sb)
{
- struct inode *inode, *toput_inode = NULL;
+ int i;
- rcu_read_lock();
- list_for_each_entry_rcu(inode, &sb->s_inodes, i_sb_list) {
- spin_lock(&inode->i_lock);
- if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)
- || inode->i_mapping->nrpages == 0) {
+ for_each_possible_cpu(i) {
+ struct inode *inode, *toput_inode = NULL;
+ struct list_head *list;
+#ifdef CONFIG_SMP
+ list = per_cpu_ptr(sb->s_inodes, i);
+#else
+ list = &sb->s_inodes;
+#endif
+ rcu_read_lock();
+ list_for_each_entry_rcu(inode, list, i_sb_list) {
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)
+ || inode->i_mapping->nrpages == 0) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+ __iget(inode);
spin_unlock(&inode->i_lock);
- continue;
+ rcu_read_unlock();
+ invalidate_mapping_pages(inode->i_mapping, 0, -1);
+ iput(toput_inode);
+ toput_inode = inode;
+ rcu_read_lock();
}
- __iget(inode);
- spin_unlock(&inode->i_lock);
rcu_read_unlock();
- invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
- toput_inode = inode;
- rcu_read_lock();
}
- rcu_read_unlock();
- iput(toput_inode);
}
static void drop_pagecache(void)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e5333cf53a5c..5fc0515a0e85 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1142,7 +1142,7 @@ EXPORT_SYMBOL(__mark_inode_dirty);
*/
static void wait_sb_inodes(struct super_block *sb)
{
- struct inode *inode, *old_inode = NULL;
+ int i;
/*
* We need to be protected against the filesystem going from
@@ -1150,47 +1150,57 @@ static void wait_sb_inodes(struct super_block *sb)
*/
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- /*
- * Data integrity sync. Must wait for all pages under writeback,
- * because there may have been pages dirtied before our sync
- * call, but which had writeout started before we write it out.
- * In which case, the inode may not be on the dirty list, but
- * we still have to wait for that writeout.
- */
- rcu_read_lock();
- list_for_each_entry_rcu(inode, &sb->s_inodes, i_sb_list) {
- struct address_space *mapping;
+ for_each_possible_cpu(i) {
+ struct inode *inode, *old_inode = NULL;
+ struct list_head *list;
+#ifdef CONFIG_SMP
+ list = per_cpu_ptr(sb->s_inodes, i);
+#else
+ list = &sb->s_inodes;
+#endif
+ /*
+ * Data integrity sync. Must wait for all pages under writeback,
+ * because there may have been pages dirtied before our sync
+ * call, but which had writeout started before we write it out.
+ * In which case, the inode may not be on the dirty list, but
+ * we still have to wait for that writeout.
+ */
+ rcu_read_lock();
+ list_for_each_entry_rcu(inode, list, i_sb_list) {
+ struct address_space *mapping;
- mapping = inode->i_mapping;
- if (mapping->nrpages == 0)
- continue;
+ mapping = inode->i_mapping;
+ if (mapping->nrpages == 0)
+ continue;
- spin_lock(&inode->i_lock);
- if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) {
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+ __iget(inode);
spin_unlock(&inode->i_lock);
- continue;
- }
- __iget(inode);
- spin_unlock(&inode->i_lock);
- rcu_read_unlock();
- /*
- * We hold a reference to 'inode' so it couldn't have been
- * removed from s_inodes list while we dropped the i_lock. We
- * cannot iput the inode now as we can be holding the last
- * reference and we cannot iput it under spinlock. So we keep
- * the reference and iput it later.
- */
- iput(old_inode);
- old_inode = inode;
+ rcu_read_unlock();
+ /*
+ * We hold a reference to 'inode' so it couldn't have
+ * been removed from s_inodes list while we dropped the
+ * i_lock. We cannot iput the inode now as we can be
+ * holding the last reference and we cannot iput it
+ * under spinlock. So we keep the reference and iput it
+ * later.
+ */
+ iput(old_inode);
+ old_inode = inode;
- filemap_fdatawait(mapping);
+ filemap_fdatawait(mapping);
- cond_resched();
+ cond_resched();
- rcu_read_lock();
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+ iput(old_inode);
}
- rcu_read_unlock();
- iput(old_inode);
}
/**
diff --git a/fs/inode.c b/fs/inode.c
index 2c4d74aaeb2a..fdd682265869 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -88,7 +88,7 @@ static struct inode_hash_bucket *inode_hashtable __read_mostly;
* NOTE! You also have to own the lock if you change
* the i_state of an inode while it is in use..
*/
-DEFINE_SPINLOCK(sb_inode_list_lock);
+static DEFINE_PER_CPU(spinlock_t, inode_cpulock);
DEFINE_SPINLOCK(wb_inode_list_lock);
/*
@@ -373,9 +373,7 @@ static void dispose_list(struct list_head *head)
spin_lock(&inode->i_lock);
__remove_inode_hash(inode);
- spin_lock(&sb_inode_list_lock);
- list_del_rcu(&inode->i_sb_list);
- spin_unlock(&sb_inode_list_lock);
+ inode_sb_list_del(inode);
spin_unlock(&inode->i_lock);
wake_up_inode(inode);
@@ -387,39 +385,49 @@ static void dispose_list(struct list_head *head)
/*
* Invalidate all inodes for a device.
*/
-static int invalidate_list(struct list_head *head, struct list_head *dispose)
+static int invalidate_sb_inodes(struct super_block *sb, struct list_head *dispose)
{
- struct list_head *next;
int busy = 0;
+ int i;
- next = head->next;
- for (;;) {
- struct list_head *tmp = next;
- struct inode *inode;
+ for_each_possible_cpu(i) {
+ struct list_head *next;
+ struct list_head *head;
+#ifdef CONFIG_SMP
+ head = per_cpu_ptr(sb->s_inodes, i);
+#else
+ head = &sb->s_inodes;
+#endif
- next = next->next;
- if (tmp == head)
- break;
- inode = list_entry(tmp, struct inode, i_sb_list);
- spin_lock(&inode->i_lock);
- if (inode->i_state & I_NEW) {
- spin_unlock(&inode->i_lock);
- continue;
- }
- invalidate_inode_buffers(inode);
- if (!inode->i_count) {
- spin_lock(&wb_inode_list_lock);
- list_del(&inode->i_list);
- inodes_stat.nr_unused--;
- spin_unlock(&wb_inode_list_lock);
- WARN_ON(inode->i_state & I_NEW);
- inode->i_state |= I_FREEING;
+ next = head->next;
+ for (;;) {
+ struct list_head *tmp = next;
+ struct inode *inode;
+
+ next = next->next;
+ if (tmp == head)
+ break;
+ inode = list_entry(tmp, struct inode, i_sb_list);
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & I_NEW) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+ invalidate_inode_buffers(inode);
+ if (!inode->i_count) {
+ spin_lock(&wb_inode_list_lock);
+ list_del(&inode->i_list);
+ inodes_stat.nr_unused--;
+ spin_unlock(&wb_inode_list_lock);
+ WARN_ON(inode->i_state & I_NEW);
+ inode->i_state |= I_FREEING;
+ spin_unlock(&inode->i_lock);
+ list_add(&inode->i_list, dispose);
+ continue;
+ }
spin_unlock(&inode->i_lock);
- list_add(&inode->i_list, dispose);
- continue;
+ busy = 1;
}
- spin_unlock(&inode->i_lock);
- busy = 1;
}
return busy;
}
@@ -444,9 +452,9 @@ int invalidate_inodes(struct super_block *sb)
*/
down_write(&iprune_sem);
// spin_lock(&sb_inode_list_lock); XXX: is this safe?
- inotify_unmount_inodes(&sb->s_inodes);
- fsnotify_unmount_inodes(&sb->s_inodes);
- busy = invalidate_list(&sb->s_inodes, &throw_away);
+ inotify_unmount_inodes(sb);
+ fsnotify_unmount_inodes(sb);
+ busy = invalidate_sb_inodes(sb, &throw_away);
// spin_unlock(&sb_inode_list_lock);
dispose_list(&throw_away);
@@ -665,13 +673,47 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval)
return tmp & I_HASHMASK;
}
+static void inode_sb_list_add(struct inode *inode, struct super_block *sb)
+{
+ spinlock_t *lock;
+ struct list_head *list;
+#ifdef CONFIG_SMP
+ int cpu;
+#endif
+
+ lock = &get_cpu_var(inode_cpulock);
+#ifdef CONFIG_SMP
+ cpu = smp_processor_id();
+ list = per_cpu_ptr(sb->s_inodes, cpu);
+ inode->i_sb_list_cpu = cpu;
+#else
+ list = &sb->s_files;
+#endif
+ spin_lock(lock);
+ list_add_rcu(&inode->i_sb_list, list);
+ spin_unlock(lock);
+ put_cpu_var(inode_cpulock);
+}
+
+void inode_sb_list_del(struct inode *inode)
+{
+ spinlock_t *lock;
+
+#ifdef CONFIG_SMP
+ lock = &per_cpu(inode_cpulock, inode->i_sb_list_cpu);
+#else
+ lock = &__get_cpu_var(inode_cpulock);
+#endif
+ spin_lock(lock);
+ list_del_rcu(&inode->i_sb_list);
+ spin_unlock(lock);
+}
+
static inline void
__inode_add_to_lists(struct super_block *sb, struct inode_hash_bucket *b,
struct inode *inode)
{
- spin_lock(&sb_inode_list_lock);
- list_add_rcu(&inode->i_sb_list, &sb->s_inodes);
- spin_unlock(&sb_inode_list_lock);
+ inode_sb_list_add(inode, sb);
percpu_counter_inc(&nr_inodes);
if (b) {
spin_lock(&b->lock);
@@ -1349,9 +1391,7 @@ void generic_delete_inode(struct inode *inode)
list_del_init(&inode->i_list);
spin_unlock(&wb_inode_list_lock);
}
- spin_lock(&sb_inode_list_lock);
- list_del_rcu(&inode->i_sb_list);
- spin_unlock(&sb_inode_list_lock);
+ inode_sb_list_del(inode);
percpu_counter_dec(&nr_inodes);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
@@ -1426,9 +1466,7 @@ int generic_detach_inode(struct inode *inode)
inodes_stat.nr_unused--;
spin_unlock(&wb_inode_list_lock);
}
- spin_lock(&sb_inode_list_lock);
- list_del_rcu(&inode->i_sb_list);
- spin_unlock(&sb_inode_list_lock);
+ inode_sb_list_del(inode);
percpu_counter_dec(&nr_inodes);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
@@ -1750,6 +1788,10 @@ void __init inode_init(void)
init_once);
register_shrinker(&icache_shrinker);
+ for_each_possible_cpu(loop) {
+ spin_lock_init(&per_cpu(inode_cpulock, loop));
+ }
+
/* Hash may have been set up in inode_init_early */
if (!hashdist)
return;
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index c705b7934034..be1af807e67b 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -362,65 +362,75 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
* of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
* We temporarily drop inode_lock, however, and CAN block.
*/
-void fsnotify_unmount_inodes(struct list_head *list)
+void fsnotify_unmount_inodes(struct super_block *sb)
{
- struct inode *inode, *next_i, *need_iput = NULL;
-
- list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
- struct inode *need_iput_tmp;
-
- spin_lock(&inode->i_lock);
- /*
- * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
- * I_WILL_FREE, or I_NEW which is fine because by that point
- * the inode cannot have any associated watches.
- */
- if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct inode *inode, *next_i, *need_iput = NULL;
+ struct list_head *list;
+#ifdef CONFIG_SMP
+ list = per_cpu_ptr(sb->s_inodes, i);
+#else
+ list = &sb->s_inodes;
+#endif
+
+ list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
+ struct inode *need_iput_tmp;
+
+ spin_lock(&inode->i_lock);
+ /*
+ * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
+ * I_WILL_FREE, or I_NEW which is fine because by that point
+ * the inode cannot have any associated watches.
+ */
+ if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+
+ /*
+ * If i_count is zero, the inode cannot have any watches and
+ * doing an __iget/iput with MS_ACTIVE clear would actually
+ * evict all inodes with zero i_count from icache which is
+ * unnecessarily violent and may in fact be illegal to do.
+ */
+ if (!inode->i_count) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
- /*
- * If i_count is zero, the inode cannot have any watches and
- * doing an __iget/iput with MS_ACTIVE clear would actually
- * evict all inodes with zero i_count from icache which is
- * unnecessarily violent and may in fact be illegal to do.
- */
- if (!inode->i_count) {
+ need_iput_tmp = need_iput;
+ need_iput = NULL;
+
+ /* In case fsnotify_inode_delete() drops a reference. */
+ if (inode != need_iput_tmp) {
+ __iget(inode);
+ } else
+ need_iput_tmp = NULL;
spin_unlock(&inode->i_lock);
- continue;
- }
- need_iput_tmp = need_iput;
- need_iput = NULL;
-
- /* In case fsnotify_inode_delete() drops a reference. */
- if (inode != need_iput_tmp) {
- __iget(inode);
- } else
- need_iput_tmp = NULL;
- spin_unlock(&inode->i_lock);
-
- /* In case the dropping of a reference would nuke next_i. */
- if (&next_i->i_sb_list != list) {
- spin_lock(&next_i->i_lock);
- if (next_i->i_count &&
- !(next_i->i_state &
- (I_CLEAR | I_FREEING | I_WILL_FREE))) {
- __iget(next_i);
- need_iput = next_i;
+ /* In case the dropping of a reference would nuke next_i. */
+ if (&next_i->i_sb_list != list) {
+ spin_lock(&next_i->i_lock);
+ if (next_i->i_count &&
+ !(next_i->i_state &
+ (I_CLEAR | I_FREEING | I_WILL_FREE))) {
+ __iget(next_i);
+ need_iput = next_i;
+ }
+ spin_unlock(&next_i->i_lock);
}
- spin_unlock(&next_i->i_lock);
- }
- if (need_iput_tmp)
- iput(need_iput_tmp);
+ if (need_iput_tmp)
+ iput(need_iput_tmp);
- /* for each watch, send FS_UNMOUNT and then remove it */
- fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+ /* for each watch, send FS_UNMOUNT and then remove it */
+ fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
- fsnotify_inode_delete(inode);
+ fsnotify_inode_delete(inode);
- iput(inode);
+ iput(inode);
+ }
}
}
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
index 1c0bc7b681f7..e0a92bf683d6 100644
--- a/fs/notify/inotify/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -385,76 +385,86 @@ EXPORT_SYMBOL_GPL(inotify_get_cookie);
* of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
* We temporarily drop inode_lock, however, and CAN block.
*/
-void inotify_unmount_inodes(struct list_head *list)
-{
- struct inode *inode, *next_i, *need_iput = NULL;
+void inotify_unmount_inodes(struct super_block *sb)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct inode *inode, *next_i, *need_iput = NULL;
+ struct list_head *list;
+#ifdef CONFIG_SMP
+ list = per_cpu_ptr(sb->s_inodes, i);
+#else
+ list = &sb->s_inodes;
+#endif
+
+ list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
+ struct inotify_watch *watch, *next_w;
+ struct inode *need_iput_tmp;
+ struct list_head *watches;
+
+ spin_lock(&inode->i_lock);
+ /*
+ * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
+ * I_WILL_FREE, or I_NEW which is fine because by that point
+ * the inode cannot have any associated watches.
+ */
+ if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
- list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
- struct inotify_watch *watch, *next_w;
- struct inode *need_iput_tmp;
- struct list_head *watches;
+ /*
+ * If i_count is zero, the inode cannot have any watches and
+ * doing an __iget/iput with MS_ACTIVE clear would actually
+ * evict all inodes with zero i_count from icache which is
+ * unnecessarily violent and may in fact be illegal to do.
+ */
+ if (!inode->i_count) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
- spin_lock(&inode->i_lock);
- /*
- * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
- * I_WILL_FREE, or I_NEW which is fine because by that point
- * the inode cannot have any associated watches.
- */
- if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
+ need_iput_tmp = need_iput;
+ need_iput = NULL;
+ /* In case inotify_remove_watch_locked() drops a reference. */
+ if (inode != need_iput_tmp) {
+ __iget(inode);
+ } else
+ need_iput_tmp = NULL;
- /*
- * If i_count is zero, the inode cannot have any watches and
- * doing an __iget/iput with MS_ACTIVE clear would actually
- * evict all inodes with zero i_count from icache which is
- * unnecessarily violent and may in fact be illegal to do.
- */
- if (!inode->i_count) {
spin_unlock(&inode->i_lock);
- continue;
- }
- need_iput_tmp = need_iput;
- need_iput = NULL;
- /* In case inotify_remove_watch_locked() drops a reference. */
- if (inode != need_iput_tmp) {
- __iget(inode);
- } else
- need_iput_tmp = NULL;
-
- spin_unlock(&inode->i_lock);
-
- /* In case the dropping of a reference would nuke next_i. */
- if (&next_i->i_sb_list != list) {
- spin_lock(&next_i->i_lock);
- if (next_i->i_count &&
- !(next_i->i_state &
- (I_CLEAR|I_FREEING|I_WILL_FREE))) {
- __iget(next_i);
- need_iput = next_i;
+ /* In case the dropping of a reference would nuke next_i. */
+ if (&next_i->i_sb_list != list) {
+ spin_lock(&next_i->i_lock);
+ if (next_i->i_count &&
+ !(next_i->i_state &
+ (I_CLEAR|I_FREEING|I_WILL_FREE))) {
+ __iget(next_i);
+ need_iput = next_i;
+ }
+ spin_unlock(&next_i->i_lock);
}
- spin_unlock(&next_i->i_lock);
- }
-
- if (need_iput_tmp)
- iput(need_iput_tmp);
- /* for each watch, send IN_UNMOUNT and then remove it */
- mutex_lock(&inode->inotify_mutex);
- watches = &inode->inotify_watches;
- list_for_each_entry_safe(watch, next_w, watches, i_list) {
- struct inotify_handle *ih = watch->ih;
- get_inotify_watch(watch);
- mutex_lock(&ih->mutex);
- ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, NULL, NULL);
- inotify_remove_watch_locked(ih, watch);
- mutex_unlock(&ih->mutex);
- put_inotify_watch(watch);
+ if (need_iput_tmp)
+ iput(need_iput_tmp);
+
+ /* for each watch, send IN_UNMOUNT and then remove it */
+ mutex_lock(&inode->inotify_mutex);
+ watches = &inode->inotify_watches;
+ list_for_each_entry_safe(watch, next_w, watches, i_list) {
+ struct inotify_handle *ih = watch->ih;
+ get_inotify_watch(watch);
+ mutex_lock(&ih->mutex);
+ ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, NULL, NULL);
+ inotify_remove_watch_locked(ih, watch);
+ mutex_unlock(&ih->mutex);
+ put_inotify_watch(watch);
+ }
+ mutex_unlock(&inode->inotify_mutex);
+ iput(inode);
}
- mutex_unlock(&inode->inotify_mutex);
- iput(inode);
}
}
EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 7d5ecefc4091..4c6a62baa109 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -841,46 +841,55 @@ static int dqinit_needed(struct inode *inode, int type)
/* This routine is guarded by dqonoff_mutex mutex */
static void add_dquot_ref(struct super_block *sb, int type)
{
- struct inode *inode, *old_inode = NULL;
int reserved = 0;
+ int i;
+ for_each_possible_cpu(i) {
+ struct inode *inode, *old_inode = NULL;
+ struct list_head *list;
+#ifdef CONFIG_SMP
+ list = per_cpu_ptr(sb->s_inodes, i);
+#else
+ list = &sb->s_inodes;
+#endif
- rcu_read_lock();
- list_for_each_entry_rcu(inode, &sb->s_inodes, i_sb_list) {
- spin_lock(&inode->i_lock);
- if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
+ rcu_read_lock();
+ list_for_each_entry_rcu(inode, list, i_sb_list) {
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
- if (unlikely(inode_get_rsv_space(inode) > 0))
- reserved = 1;
+ if (unlikely(inode_get_rsv_space(inode) > 0))
+ reserved = 1;
- if (!atomic_read(&inode->i_writecount)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
+ if (!atomic_read(&inode->i_writecount)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+
+ if (!dqinit_needed(inode, type)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
- if (!dqinit_needed(inode, type)) {
+ __iget(inode);
spin_unlock(&inode->i_lock);
- continue;
+ rcu_read_unlock();
+
+ iput(old_inode);
+ sb->dq_op->initialize(inode, type);
+ /* We hold a reference to 'inode' so it couldn't have been
+ * removed from s_inodes list while we dropped the inode_lock.
+ * We cannot iput the inode now as we can be holding the last
+ * reference and we cannot iput it under inode_lock. So we
+ * keep the reference and iput it later. */
+ old_inode = inode;
+ rcu_read_lock();
}
-
- __iget(inode);
- spin_unlock(&inode->i_lock);
rcu_read_unlock();
-
iput(old_inode);
- sb->dq_op->initialize(inode, type);
- /* We hold a reference to 'inode' so it couldn't have been
- * removed from s_inodes list while we dropped the inode_lock.
- * We cannot iput the inode now as we can be holding the last
- * reference and we cannot iput it under inode_lock. So we
- * keep the reference and iput it later. */
- old_inode = inode;
- rcu_read_lock();
}
- rcu_read_unlock();
- iput(old_inode);
if (reserved) {
printk(KERN_WARNING "VFS (%s): Writes happened before quota"
@@ -953,20 +962,29 @@ static void put_dquot_list(struct list_head *tofree_head)
static void remove_dquot_ref(struct super_block *sb, int type,
struct list_head *tofree_head)
{
- struct inode *inode;
+ int i;
+ for_each_possible_cpu(i) {
+ struct inode *inode;
+ struct list_head *list;
+#ifdef CONFIG_SMP
+ list = per_cpu_ptr(sb->s_inodes, i);
+#else
+ list = &sb->s_inodes;
+#endif
- rcu_read_lock();
- list_for_each_entry_rcu(inode, &sb->s_inodes, i_sb_list) {
- /*
- * We have to scan also I_NEW inodes because they can already
- * have quota pointer initialized. Luckily, we need to touch
- * only quota pointers and these have separate locking
- * (dqptr_sem).
- */
- if (!IS_NOQUOTA(inode))
- remove_inode_dquot_ref(inode, type, tofree_head);
+ rcu_read_lock();
+ list_for_each_entry_rcu(inode, list, i_sb_list) {
+ /*
+ * We have to scan also I_NEW inodes because they can already
+ * have quota pointer initialized. Luckily, we need to touch
+ * only quota pointers and these have separate locking
+ * (dqptr_sem).
+ */
+ if (!IS_NOQUOTA(inode))
+ remove_inode_dquot_ref(inode, type, tofree_head);
+ }
+ rcu_read_unlock();
}
- rcu_read_unlock();
}
/* Gather all references from inodes and drop them */
diff --git a/fs/super.c b/fs/super.c
index 9bb1f386bc52..84c4aafd6fae 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -78,9 +78,25 @@ static struct super_block *alloc_super(struct file_system_type *type)
#else
INIT_LIST_HEAD(&s->s_files);
#endif
+#ifdef CONFIG_SMP
+ s->s_inodes = alloc_percpu(struct list_head);
+ if (!s->s_inodes) {
+ free_percpu(s->s_files);
+ security_sb_free(s);
+ kfree(s);
+ s = NULL;
+ goto out;
+ } else {
+ int i;
+
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(per_cpu_ptr(s->s_inodes, i));
+ }
+#else
+ INIT_LIST_HEAD(&s->s_inodes);
+#endif
INIT_LIST_HEAD(&s->s_instances);
INIT_HLIST_HEAD(&s->s_anon);
- INIT_LIST_HEAD(&s->s_inodes);
INIT_LIST_HEAD(&s->s_dentry_lru);
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
@@ -133,6 +149,7 @@ out:
static inline void destroy_super(struct super_block *s)
{
#ifdef CONFIG_SMP
+ free_percpu(s->s_inodes);
free_percpu(s->s_files);
#endif
security_sb_free(s);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 63d5c2e7b398..94b8fa3f486e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -733,6 +733,9 @@ struct inode {
struct rcu_head i_rcu;
};
unsigned long i_ino;
+#ifdef CONFIG_SMP
+ int i_sb_list_cpu;
+#endif
unsigned int i_count;
unsigned int i_nlink;
uid_t i_uid;
@@ -1345,9 +1348,13 @@ struct super_block {
#endif
struct xattr_handler **s_xattr;
- struct list_head s_inodes; /* all inodes */
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
#ifdef CONFIG_SMP
+ struct list_head *s_inodes;
+#else
+ struct list_head s_inodes; /* all inodes */
+#endif
+#ifdef CONFIG_SMP
struct list_head *s_files;
#else
struct list_head s_files;
@@ -2187,6 +2194,7 @@ extern struct inode *new_inode(struct super_block *);
extern int should_remove_suid(struct dentry *);
extern int file_remove_suid(struct file *);
+extern void inode_sb_list_del(struct inode *inode);
extern void __insert_inode_hash(struct inode *, unsigned long hashval);
extern void __remove_inode_hash(struct inode *);
extern void remove_inode_hash(struct inode *);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index a6db0585cf04..e776fb5ed01a 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -344,7 +344,7 @@ extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry);
extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry);
extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry);
-extern void fsnotify_unmount_inodes(struct list_head *list);
+extern void fsnotify_unmount_inodes(struct super_block *sb);
/* put here because inotify does some weird stuff when destroying watches */
extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
@@ -374,7 +374,7 @@ static inline u32 fsnotify_get_cookie(void)
return 0;
}
-static inline void fsnotify_unmount_inodes(struct list_head *list)
+static inline void fsnotify_unmount_inodes(struct super_block *sb)
{}
#endif /* CONFIG_FSNOTIFY */
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 37ea2894b3c0..e8bcd7c6c0cc 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -111,7 +111,7 @@ extern void inotify_inode_queue_event(struct inode *, __u32, __u32,
const char *, struct inode *);
extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32,
const char *);
-extern void inotify_unmount_inodes(struct list_head *);
+extern void inotify_unmount_inodes(struct super_block *);
extern void inotify_inode_is_dead(struct inode *);
extern u32 inotify_get_cookie(void);
@@ -161,7 +161,7 @@ static inline void inotify_dentry_parent_queue_event(struct dentry *dentry,
{
}
-static inline void inotify_unmount_inodes(struct list_head *list)
+static inline void inotify_unmount_inodes(struct super_block *sb)
{
}