diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2010-07-13 15:57:14 +0200 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2010-07-13 15:57:14 +0200 |
commit | ec646ea8db21abc1db436aac580a0464e460bd9d (patch) | |
tree | e1c137718bd94548589f0e2f2a89342c75e052da | |
parent | 596fc8ee275b6e4b441b6aa1e2c1a89aeeccb877 (diff) | |
download | lwn-ec646ea8db21abc1db436aac580a0464e460bd9d.tar.gz lwn-ec646ea8db21abc1db436aac580a0464e460bd9d.zip |
vfs: Revert the scalability patches
We still have sporadic and hard to debug problems. Revert it for now
and revisit with Nick's new version.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
115 files changed, 1268 insertions, 3033 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index a4fd4040d4ac..18b9d0ca0630 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -17,7 +17,7 @@ prototypes: void (*d_iput)(struct dentry *, struct inode *); char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); -locking rules: XXX: update these!! +locking rules: none have BKL dcache_lock rename_lock ->d_lock may block d_revalidate: no no no yes diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index f078fe70d3cd..64a4c2d85f7c 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -1548,7 +1548,7 @@ static int spufs_mfc_open(struct inode *inode, struct file *file) if (ctx->owner != current->mm) return -EINVAL; - if (inode->i_count != 1) + if (atomic_read(&inode->i_count) != 1) return -EBUSY; mutex_lock(&ctx->mapping_lock); diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 7f0737214788..fc1b1c42b1dc 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -158,18 +158,18 @@ static void spufs_prune_dir(struct dentry *dir) mutex_lock(&dir->d_inode->i_mutex); list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { + spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry)) && dentry->d_inode) { - dget_dlock(dentry); + dget_locked(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); simple_unlink(dir->d_inode, dentry); - /* XXX: what was dcache_lock protecting here? Other - * filesystems (IB, configfs) release dcache_lock - * before unlink */ + spin_unlock(&dcache_lock); dput(dentry); } else { spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); } } shrink_dcache_parent(dir); diff --git a/drivers/char/pty.c b/drivers/char/pty.c index 8fa273e76bb3..385c44b3034f 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -649,11 +649,7 @@ static int __ptmx_open(struct inode *inode, struct file *filp) set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ filp->private_data = tty; - - file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ - spin_lock(&tty_files_lock); - list_add(&filp->f_u.fu_list, &tty->tty_files); - spin_unlock(&tty_files_lock); + file_move(filp, &tty->tty_files); retval = devpts_pty_new(inode, tty->link); if (retval) diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 44a7ce0c5f4d..9af676611759 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -136,9 +136,6 @@ LIST_HEAD(tty_drivers); /* linked list of tty drivers */ DEFINE_MUTEX(tty_mutex); EXPORT_SYMBOL(tty_mutex); -/* Spinlock to protect the tty->tty_files list */ -DEFINE_SPINLOCK(tty_files_lock); - static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *); static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *); ssize_t redirected_tty_write(struct file *, const char __user *, @@ -237,11 +234,11 @@ static int check_tty_count(struct tty_struct *tty, const char *routine) struct list_head *p; int count = 0; - spin_lock(&tty_files_lock); + file_list_lock(); list_for_each(p, &tty->tty_files) { count++; } - spin_unlock(&tty_files_lock); + file_list_unlock(); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_SLAVE && tty->link && tty->link->count) @@ -519,7 +516,8 @@ static void do_tty_hangup(struct work_struct *work) /* inuse_filps is protected by the single kernel lock */ lock_kernel(); check_tty_count(tty, "do_tty_hangup"); - spin_lock(&tty_files_lock); + + file_list_lock(); /* This breaks for file handles being sent over AF_UNIX sockets ? */ list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) { if (filp->f_op->write == redirected_tty_write) @@ -530,7 +528,7 @@ static void do_tty_hangup(struct work_struct *work) tty_fasync(-1, filp, 0); /* can't block */ filp->f_op = &hung_up_tty_fops; } - spin_unlock(&tty_files_lock); + file_list_unlock(); tty_ldisc_hangup(tty); @@ -1421,9 +1419,9 @@ static void release_one_tty(struct work_struct *work) tty_driver_kref_put(driver); module_put(driver->owner); - spin_lock(&tty_files_lock); + file_list_lock(); list_del_init(&tty->tty_files); - spin_unlock(&tty_files_lock); + file_list_unlock(); put_pid(tty->pgrp); put_pid(tty->session); @@ -1668,10 +1666,7 @@ int tty_release(struct inode *inode, struct file *filp) * - do_tty_hangup no longer sees this file descriptor as * something that needs to be handled for hangups. */ - spin_lock(&tty_files_lock); - BUG_ON(list_empty(&filp->f_u.fu_list)); - list_del_init(&filp->f_u.fu_list); - spin_unlock(&tty_files_lock); + file_kill(filp); filp->private_data = NULL; /* @@ -1840,11 +1835,7 @@ got_driver: } filp->private_data = tty; - BUG_ON(list_empty(&filp->f_u.fu_list)); - file_sb_list_del(filp); /* __dentry_open has put it on the sb list */ - spin_lock(&tty_files_lock); - list_add(&filp->f_u.fu_list, &tty->tty_files); - spin_unlock(&tty_files_lock); + file_move(filp, &tty->tty_files); check_tty_count(tty, "tty_open"); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index ba08b0e6ea5a..100da8542bba 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -272,14 +272,18 @@ static int remove_file(struct dentry *parent, char *name) goto bail; } + spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_dlock(tmp); + dget_locked(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); + spin_unlock(&dcache_lock); simple_unlink(parent->d_inode, tmp); - } else + } else { spin_unlock(&tmp->d_lock); + spin_unlock(&dcache_lock); + } ret = 0; bail: diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c index 9fd07a138dad..3bad888ced13 100644 --- a/drivers/staging/pohmelfs/path_entry.c +++ b/drivers/staging/pohmelfs/path_entry.c @@ -84,11 +84,10 @@ out: int pohmelfs_path_length(struct pohmelfs_inode *pi) { struct dentry *d, *root, *first; - int len; - unsigned seq; + int len = 1; /* Root slash */ - first = d_find_alias(&pi->vfs_inode); - if (!first) { + first = d = d_find_alias(&pi->vfs_inode); + if (!d) { dprintk("%s: ino: %llu, mode: %o.\n", __func__, pi->ino, pi->vfs_inode.i_mode); return -ENOENT; } @@ -97,11 +96,7 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi) root = dget(current->fs->root.dentry); read_unlock(¤t->fs->lock); -rename_retry: - len = 1; /* Root slash */ - d = first; - seq = read_seqbegin(&rename_lock); - rcu_read_lock(); + spin_lock(&dcache_lock); if (!IS_ROOT(d) && d_unhashed(d)) len += UNHASHED_OBSCURE_STRING_SIZE; /* Obscure " (deleted)" string */ @@ -110,9 +105,7 @@ rename_retry: len += d->d_name.len + 1; /* Plus slash */ d = d->d_parent; } - rcu_read_unlock(); - if (read_seqretry(&rename_lock, seq)) - goto rename_retry; + spin_unlock(&dcache_lock); dput(root); dput(first); diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 120500e1c96d..4a6366a42129 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -347,16 +347,17 @@ static int usbfs_empty (struct dentry *dentry) { struct list_head *list; - spin_lock(&dentry->d_lock); + spin_lock(&dcache_lock); + list_for_each(list, &dentry->d_subdirs) { struct dentry *de = list_entry(list, struct dentry, d_u.d_child); if (usbfs_positive(de)) { - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); return 0; } } - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); return 1; } diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 3a4557e8325c..7d0f0a30f7a3 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -128,7 +128,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) void *data = dentry->d_fsdata; struct list_head *head, *next; - spin_lock(&inode->i_lock); + spin_lock(&dcache_lock); head = &inode->i_dentry; next = head->next; while (next != head) { @@ -139,7 +139,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) } next = next->next; } - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); } diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 9af06f0b355c..3c4ec7d864c4 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -379,9 +379,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3 affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain)); mark_buffer_dirty_inode(inode_bh, inode); inode->i_nlink = 2; - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); } affs_fix_checksum(sb, bh); mark_buffer_dirty_inode(bh, inode); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 88106a018440..88067f36e5e7 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1007,9 +1007,7 @@ static int afs_link(struct dentry *from, struct inode *dir, if (ret < 0) goto link_error; - spin_lock(&vnode->vfs_inode.i_lock); - vnode->vfs_inode.i_count++; - spin_unlock(&vnode->vfs_inode.i_lock); + atomic_inc(&vnode->vfs_inode.i_count); d_instantiate(dentry, &vnode->vfs_inode); key_put(key); _leave(" = 0"); diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 81f82e7e7290..9f0bf13291e5 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -115,9 +115,7 @@ struct file *anon_inode_getfile(const char *name, * so we can avoid doing an igrab() and we can use an open-coded * atomic_inc(). */ - spin_lock(&anon_inode_inode->i_lock); - anon_inode_inode->i_count++; - spin_unlock(&anon_inode_inode->i_lock); + atomic_inc(&anon_inode_inode->i_count); path.dentry->d_op = &anon_inodefs_dentry_operations; d_instantiate(path.dentry, anon_inode_inode); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 4ea26380a16b..0118d67221b2 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -16,7 +16,6 @@ #include <linux/auto_fs4.h> #include <linux/auto_dev-ioctl.h> #include <linux/mutex.h> -#include <linux/spinlock.h> #include <linux/list.h> /* This is the range of ioctl() numbers we claim as ours */ @@ -66,8 +65,6 @@ struct rehash_entry { struct list_head list; }; -extern spinlock_t autofs4_lock; - /* Unified info structure. This is pointed to by both the dentry and inode structures. Each file in the filesystem has an instance of this structure. It holds a reference to the dentry, so dentries are never diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 60a7c6ce0a0d..74bc9aa6df31 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -93,59 +93,22 @@ done: /* * Calculate next entry in top down tree traversal. * From next_mnt in namespace.c - elegant. - * - * How is this supposed to work if we drop autofs4_lock between calls anyway? - * How does it cope with renames? - * And also callers dput the returned dentry before taking autofs4_lock again - * so what prevents it from being freed?? */ -static struct dentry *get_next_positive_dentry(struct dentry *p, - struct dentry *root) +static struct dentry *next_dentry(struct dentry *p, struct dentry *root) { - struct list_head *next; - struct dentry *ret; + struct list_head *next = p->d_subdirs.next; - spin_lock(&autofs4_lock); -again: - spin_lock(&p->d_lock); - next = p->d_subdirs.next; if (next == &p->d_subdirs) { while (1) { - struct dentry *parent; - - if (p == root) { - spin_unlock(&p->d_lock); - spin_unlock(&autofs4_lock); + if (p == root) return NULL; - } - - parent = p->d_parent; - if (!spin_trylock(&parent->d_lock)) { - spin_unlock(&p->d_lock); - goto again; - } - spin_unlock(&p->d_lock); next = p->d_u.d_child.next; - p = parent; - if (next != &parent->d_subdirs) + if (next != &p->d_parent->d_subdirs) break; + p = p->d_parent; } } - ret = list_entry(next, struct dentry, d_u.d_child); - - spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED); - /* Negative dentry - try next */ - if (!simple_positive(ret)) { - spin_unlock(&ret->d_lock); - p = ret; - goto again; - } - dget_dlock(ret); - spin_unlock(&ret->d_lock); - spin_unlock(&p->d_lock); - spin_unlock(&autofs4_lock); - - return ret; + return list_entry(next, struct dentry, d_u.d_child); } /* @@ -195,11 +158,18 @@ static int autofs4_tree_busy(struct vfsmount *mnt, if (!simple_positive(top)) return 1; - for (p = dget(top); p; p = get_next_positive_dentry(p, top)) { + spin_lock(&dcache_lock); + for (p = top; p; p = next_dentry(p, top)) { + /* Negative dentry - give up */ + if (!simple_positive(p)) + continue; DPRINTK("dentry %p %.*s", p, (int) p->d_name.len, p->d_name.name); + p = dget(p); + spin_unlock(&dcache_lock); + /* * Is someone visiting anywhere in the subtree ? * If there's no mount we need to check the usage @@ -235,7 +205,9 @@ static int autofs4_tree_busy(struct vfsmount *mnt, } } dput(p); + spin_lock(&dcache_lock); } + spin_unlock(&dcache_lock); /* Timeout of a tree mount is ultimately determined by its top dentry */ if (!autofs4_can_expire(top, timeout, do_now)) @@ -254,11 +226,18 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, DPRINTK("parent %p %.*s", parent, (int)parent->d_name.len, parent->d_name.name); - for (p = dget(parent); p; p = get_next_positive_dentry(p, parent)) { + spin_lock(&dcache_lock); + for (p = parent; p; p = next_dentry(p, parent)) { + /* Negative dentry - give up */ + if (!simple_positive(p)) + continue; DPRINTK("dentry %p %.*s", p, (int) p->d_name.len, p->d_name.name); + p = dget(p); + spin_unlock(&dcache_lock); + if (d_mountpoint(p)) { /* Can we umount this guy */ if (autofs4_mount_busy(mnt, p)) @@ -270,7 +249,9 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt, } cont: dput(p); + spin_lock(&dcache_lock); } + spin_unlock(&dcache_lock); return NULL; } @@ -295,10 +276,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, struct autofs_info *ino = autofs4_dentry_ino(root); if (d_mountpoint(root)) { ino->flags |= AUTOFS_INF_MOUNTPOINT; - spin_lock(&root->d_lock); - WARN_ON(root->d_mounted == 0); root->d_mounted--; - spin_unlock(&root->d_lock); } ino->flags |= AUTOFS_INF_EXPIRING; autofs4_add_expiring(root); @@ -317,8 +295,6 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, * A tree is eligible if :- * - it is unused by any user process * - it has been unused for exp_timeout time - * This seems to be racy dropping autofs4_lock and asking for next->next after - * the lock has been dropped. */ struct dentry *autofs4_expire_indirect(struct super_block *sb, struct vfsmount *mnt, @@ -340,8 +316,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, now = jiffies; timeout = sbi->exp_timeout; - spin_lock(&autofs4_lock); - spin_lock(&root->d_lock); + spin_lock(&dcache_lock); next = root->d_subdirs.next; /* On exit from the loop expire is set to a dgot dentry @@ -355,11 +330,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, continue; } - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - dentry = dget_dlock(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&root->d_lock); - spin_unlock(&autofs4_lock); + dentry = dget(dentry); + spin_unlock(&dcache_lock); spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); @@ -424,12 +396,10 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, next: spin_unlock(&sbi->fs_lock); dput(dentry); - spin_lock(&autofs4_lock); - spin_lock(&root->d_lock); + spin_lock(&dcache_lock); next = next->next; } - spin_unlock(&root->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); return NULL; found: @@ -440,13 +410,9 @@ found: autofs4_add_expiring(expired); init_completion(&ino->expire_complete); spin_unlock(&sbi->fs_lock); - spin_lock(&autofs4_lock); - spin_lock(&expired->d_parent->d_lock); - spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); + spin_lock(&dcache_lock); list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); - spin_unlock(&expired->d_lock); - spin_unlock(&expired->d_parent->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); return expired; } @@ -536,9 +502,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, spin_lock(&sbi->fs_lock); if (ino->flags & AUTOFS_INF_MOUNTPOINT) { - spin_lock(&sb->s_root->d_lock); sb->s_root->d_mounted++; - spin_unlock(&sb->s_root->d_lock); ino->flags &= ~AUTOFS_INF_MOUNTPOINT; } ino->flags &= ~AUTOFS_INF_EXPIRING; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 0b9c391ddeb6..d0a3de247458 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -111,9 +111,8 @@ static void autofs4_force_release(struct autofs_sb_info *sbi) if (!sbi->sb->s_root) return; - spin_lock(&autofs4_lock); + spin_lock(&dcache_lock); repeat: - spin_lock(&this_parent->d_lock); next = this_parent->d_subdirs.next; resume: while (next != &this_parent->d_subdirs) { @@ -126,39 +125,33 @@ resume: } if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&this_parent->d_lock); this_parent = dentry; goto repeat; } next = next->next; - spin_unlock(&this_parent->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); DPRINTK("dentry %p %.*s", dentry, (int)dentry->d_name.len, dentry->d_name.name); dput(dentry); - spin_lock(&autofs4_lock); - spin_lock(&this_parent->d_lock); + spin_lock(&dcache_lock); } if (this_parent != sbi->sb->s_root) { struct dentry *dentry = this_parent; next = this_parent->d_u.d_child.next; - spin_unlock(&this_parent->d_lock); this_parent = this_parent->d_parent; - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); DPRINTK("parent dentry %p %.*s", dentry, (int)dentry->d_name.len, dentry->d_name.name); dput(dentry); - spin_lock(&autofs4_lock); - spin_lock(&this_parent->d_lock); + spin_lock(&dcache_lock); goto resume; } - spin_unlock(&this_parent->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); } void autofs4_kill_sb(struct super_block *sb) diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 40ca9360caef..30cc9ddf4b70 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -17,11 +17,8 @@ #include <linux/stat.h> #include <linux/param.h> #include <linux/time.h> -#include <linux/spinlock.h> #include "autofs_i.h" -DEFINE_SPINLOCK(autofs4_lock); - static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); static int autofs4_dir_unlink(struct inode *,struct dentry *); static int autofs4_dir_rmdir(struct inode *,struct dentry *); @@ -228,15 +225,12 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) * autofs file system so just let the libfs routines handle * it. */ - spin_lock(&autofs4_lock); - spin_lock(&dentry->d_lock); + spin_lock(&dcache_lock); if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { - spin_unlock(&dentry->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); return -ENOENT; } - spin_unlock(&dentry->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); out: return dcache_dir_open(inode, file); @@ -305,9 +299,9 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) /* We trigger a mount for almost all flags */ lookup_type = autofs4_need_mount(nd->flags); spin_lock(&sbi->fs_lock); - spin_lock(&autofs4_lock); + spin_lock(&dcache_lock); if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) { - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); goto follow; } @@ -317,11 +311,10 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) * multi-mount with no root mount offset. So don't try to * mount it again. */ - spin_lock(&dentry->d_lock); if (ino->flags & AUTOFS_INF_PENDING || (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) { ino->flags |= AUTOFS_INF_PENDING; - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); status = try_to_fill_dentry(dentry); @@ -329,16 +322,14 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) spin_lock(&sbi->fs_lock); ino->flags &= ~AUTOFS_INF_PENDING; spin_unlock(&sbi->fs_lock); - spin_unlock(&autofs4_lock); if (status) goto out_error; goto follow; } - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); - spin_unlock(&autofs4_lock); follow: /* * If there is no root mount it must be an autofs @@ -389,7 +380,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) mutex_aquired = mutex_trylock(&dir->i_mutex); spin_lock(&sbi->fs_lock); - spin_lock(&autofs4_lock); + spin_lock(&dcache_lock); /* Pending dentry */ if (autofs4_ispending(dentry)) { int status; @@ -403,11 +394,11 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) ino->flags |= AUTOFS_INF_PENDING; if (!mutex_aquired) { autofs4_revalidate_drop(dentry, entry); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); return 0; } - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); mutex_unlock(&dir->i_mutex); kfree(entry); @@ -454,11 +445,11 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) ino->flags |= AUTOFS_INF_PENDING; if (!mutex_aquired) { autofs4_revalidate_drop(dentry, entry); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); return 0; } - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); mutex_unlock(&dir->i_mutex); kfree(entry); @@ -479,7 +470,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) return status; } } - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); if (mutex_aquired) @@ -553,7 +544,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) struct list_head *p, *head; restart: - spin_lock(&autofs4_lock); + spin_lock(&dcache_lock); spin_lock(&sbi->lookup_lock); head = &sbi->active_list; list_for_each(p, head) { @@ -567,15 +558,15 @@ restart: spin_lock(&active->d_lock); /* Already gone? */ - if (atomic_read(&dentry->d_count) == 0) + if (atomic_read(&active->d_count) == 0) goto next; if (active->d_inode && IS_DEADDIR(active->d_inode)) { if (!list_empty(&ino->rehash_list)) { - dget_dlock(active); + dget(active); spin_unlock(&active->d_lock); spin_unlock(&sbi->lookup_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); autofs4_remove_rehash_entrys(ino); dput(active); goto restart; @@ -595,16 +586,16 @@ restart: if (memcmp(qstr->name, str, len)) goto next; - dget_dlock(active); + dget(active); spin_unlock(&active->d_lock); spin_unlock(&sbi->lookup_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); return active; next: spin_unlock(&active->d_lock); } spin_unlock(&sbi->lookup_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); return NULL; } @@ -619,7 +610,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) const unsigned char *str = name->name; struct list_head *p, *head; - spin_lock(&autofs4_lock); + spin_lock(&dcache_lock); spin_lock(&sbi->lookup_lock); head = &sbi->expiring_list; list_for_each(p, head) { @@ -648,16 +639,16 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) if (memcmp(qstr->name, str, len)) goto next; - dget_dlock(expiring); + dget(expiring); spin_unlock(&expiring->d_lock); spin_unlock(&sbi->lookup_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); return expiring; next: spin_unlock(&expiring->d_lock); } spin_unlock(&sbi->lookup_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); return NULL; } @@ -918,15 +909,11 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) dir->i_mtime = CURRENT_TIME; - spin_lock(&autofs4_lock); - spin_lock(&sbi->lookup_lock); - if (list_empty(&ino->expiring)) - list_add(&ino->expiring, &sbi->expiring_list); - spin_unlock(&sbi->lookup_lock); + spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); return 0; } @@ -943,21 +930,15 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) if (!autofs4_oz_mode(sbi)) return -EACCES; - spin_lock(&autofs4_lock); - spin_lock(&sbi->lookup_lock); - spin_lock(&dentry->d_lock); + spin_lock(&dcache_lock); if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&dentry->d_lock); - spin_unlock(&sbi->lookup_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); return -ENOTEMPTY; } - if (list_empty(&ino->expiring)) - list_add(&ino->expiring, &sbi->expiring_list); - spin_unlock(&sbi->lookup_lock); + spin_lock(&dentry->d_lock); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&autofs4_lock); + spin_unlock(&dcache_lock); if (atomic_dec_and_test(&ino->count)) { p_ino = autofs4_dentry_ino(dentry->d_parent); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index c5f8459c905e..2341375386f8 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -186,26 +186,16 @@ static int autofs4_getpath(struct autofs_sb_info *sbi, { struct dentry *root = sbi->sb->s_root; struct dentry *tmp; - char *buf; + char *buf = *name; char *p; - int len; - unsigned seq; + int len = 0; -rename_retry: - buf = *name; - len = 0; - - seq = read_seqbegin(&rename_lock); - rcu_read_lock(); - spin_lock(&autofs4_lock); + spin_lock(&dcache_lock); for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) len += tmp->d_name.len + 1; if (!len || --len > NAME_MAX) { - spin_unlock(&autofs4_lock); - rcu_read_unlock(); - if (read_seqretry(&rename_lock, seq)) - goto rename_retry; + spin_unlock(&dcache_lock); return 0; } @@ -218,10 +208,7 @@ rename_retry: p -= tmp->d_name.len; strncpy(p, tmp->d_name.name, tmp->d_name.len); } - spin_unlock(&autofs4_lock); - rcu_read_unlock(); - if (read_seqretry(&rename_lock, seq)) - goto rename_retry; + spin_unlock(&dcache_lock); return len; } diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index a822829df2f2..1e41aadb1068 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -178,9 +178,7 @@ static int bfs_link(struct dentry *old, struct inode *dir, inc_nlink(inode); inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); d_instantiate(new, inode); mutex_unlock(&info->bfs_lock); return 0; diff --git a/fs/block_dev.c b/fs/block_dev.c index 37a7a1f12329..8db62b2b6df8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -435,20 +435,13 @@ static struct inode *bdev_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void bdev_i_callback(struct rcu_head *head) +static void bdev_destroy_inode(struct inode *inode) { - struct inode *inode = container_of(head, struct inode, i_rcu); struct bdev_inode *bdi = BDEV_I(inode); - INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(bdev_cachep, bdi); } -static void bdev_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, bdev_i_callback); -} - static void init_once(void *foo) { struct bdev_inode *ei = (struct bdev_inode *) foo; @@ -594,12 +587,7 @@ EXPORT_SYMBOL(bdget); */ struct block_device *bdgrab(struct block_device *bdev) { - struct inode *inode = bdev->bd_inode; - - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); - + atomic_inc(&bdev->bd_inode->i_count); return bdev; } @@ -629,9 +617,7 @@ static struct block_device *bd_acquire(struct inode *inode) spin_lock(&bdev_lock); bdev = inode->i_bdev; if (bdev) { - spin_lock(&inode->i_lock); - bdev->bd_inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&bdev->bd_inode->i_count); spin_unlock(&bdev_lock); return bdev; } @@ -647,9 +633,7 @@ static struct block_device *bd_acquire(struct inode *inode) * So, we can access it via ->i_mapping always * without igrab(). */ - spin_lock(&inode->i_lock); - bdev->bd_inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&bdev->bd_inode->i_count); inode->i_bdev = bdev; inode->i_mapping = bdev->bd_inode->i_mapping; list_add(&inode->i_devices, &bdev->bd_inodes); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3288f3a2899e..4deb280f8969 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1991,14 +1991,8 @@ void btrfs_add_delayed_iput(struct inode *inode) struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; struct delayed_iput *delayed; - spin_lock(&inode->i_lock); - if (inode->i_count == 1) { - spin_unlock(&inode->i_lock); + if (atomic_add_unless(&inode->i_count, -1, 1)) return; - } - inode->i_count--; - spin_unlock(&inode->i_lock); - delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL); delayed->inode = inode; @@ -3606,14 +3600,8 @@ again: objectid = entry->vfs_inode.i_ino + 1; inode = igrab(&entry->vfs_inode); if (inode) { - int count; spin_unlock(&root->inode_lock); - - spin_lock(&inode->i_lock); - count = inode->i_count; - spin_unlock(&inode->i_lock); - - if (count > 1) + if (atomic_read(&inode->i_count) > 1) d_prune_aliases(inode); /* * btrfs_drop_inode will remove it from @@ -4458,9 +4446,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); err = btrfs_add_nondir(trans, dentry, inode, 1, index); diff --git a/fs/buffer.c b/fs/buffer.c index 416a2686ec66..b34323cfe2da 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1145,7 +1145,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) * inode list. * * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, - * and mapping->tree_lock. + * mapping->tree_lock and the global inode_lock. */ void mark_buffer_dirty(struct buffer_head *bh) { diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 3d5ccbd5cef3..7ec8555cf164 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1442,7 +1442,7 @@ int cifs_revalidate(struct dentry *direntry) } cFYI(1, ("Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld " "jiffies %ld", full_path, direntry->d_inode, - direntry->d_inode->i_count, direntry, + direntry->d_inode->i_count.counter, direntry, direntry->d_time, jiffies)); if (cifsInode->time == 0) { diff --git a/fs/coda/cache.c b/fs/coda/cache.c index e0ff265e49c5..a5bf5771a22a 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -86,7 +86,7 @@ static void coda_flag_children(struct dentry *parent, int flag) struct list_head *child; struct dentry *de; - spin_lock(&parent->d_lock); + spin_lock(&dcache_lock); list_for_each(child, &parent->d_subdirs) { de = list_entry(child, struct dentry, d_u.d_child); @@ -95,7 +95,7 @@ static void coda_flag_children(struct dentry *parent, int flag) continue; coda_flag_inode(de->d_inode, flag); } - spin_unlock(&parent->d_lock); + spin_unlock(&dcache_lock); return; } diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 4d3bbd8514ac..4bb9d0a5decc 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -302,9 +302,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode, } coda_dir_update_mtime(dir_inode); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); d_instantiate(de, inode); inc_nlink(inode); diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index 026cf68553a4..da6061a6df40 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -120,7 +120,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry { struct config_item * item = NULL; - spin_lock(&dentry->d_lock); + spin_lock(&dcache_lock); if (!d_unhashed(dentry)) { struct configfs_dirent * sd = dentry->d_fsdata; if (sd->s_type & CONFIGFS_ITEM_LINK) { @@ -129,7 +129,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry } else item = config_item_get(sd->s_element); } - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); return item; } diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index c3638b63b8ce..8e48b52205aa 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -400,7 +400,7 @@ static void remove_dir(struct dentry * d) simple_rmdir(parent->d_inode,d); pr_debug(" o %s removing done (%d)\n",d->d_name.name, - atomic_read(&d->d_count)); + atomic_read(&d->d_count)); dput(parent); } diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index aa8e83ed0f0b..a2f746066c5d 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -254,14 +254,18 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) struct dentry * dentry = sd->s_dentry; if (dentry) { + spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry) && dentry->d_inode)) { - dget_dlock(dentry); + dget_locked(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); simple_unlink(parent->d_inode, dentry); - } else + } else { spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); + } } } diff --git a/fs/dcache.c b/fs/dcache.c index 18a3b762297c..116fd33f564b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -35,34 +35,13 @@ #include <linux/hardirq.h> #include "internal.h" -/* - * Usage: - * dcache->d_inode->i_lock protects: - * - the inode alias lists, d_inode - * dcache_hash_bucket->lock protects: - * - the dcache hash table - * dcache_lru_lock protects: - * - the dcache lru lists and counters - * d_lock protects: - * - d_flags - * - d_name - * - d_lru - * - d_unhashed - * - d_subdirs and children's d_child - * - * Ordering: - * dcache->d_inode->i_lock - * dentry->d_lock - * dcache_lru_lock - * dcache_hash_bucket->lock - */ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock); + __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); -EXPORT_SYMBOL(rename_lock); +EXPORT_SYMBOL(dcache_lock); static struct kmem_cache *dentry_cache __read_mostly; @@ -81,27 +60,13 @@ static struct kmem_cache *dentry_cache __read_mostly; static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; - -struct dcache_hash_bucket { - spinlock_t lock; - struct hlist_head head; -}; -static struct dcache_hash_bucket *dentry_hashtable __read_mostly; +static struct hlist_head *dentry_hashtable __read_mostly; /* Statistics gathering. */ struct dentry_stat_t dentry_stat = { - .nr_dentry = 0, .age_limit = 45, }; -static inline struct dcache_hash_bucket *d_hash(struct dentry *parent, - unsigned long hash) -{ - hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; - hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); - return dentry_hashtable + (hash & D_HASHMASK); -} - static void __d_free(struct dentry *dentry) { WARN_ON(!list_empty(&dentry->d_alias)); @@ -117,11 +82,11 @@ static void d_callback(struct rcu_head *head) } /* - * no locks, please. + * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry + * inside dcache_lock. */ static void d_free(struct dentry *dentry) { - BUG_ON(atomic_read(&dentry->d_count)); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); /* if dentry was never inserted into hash, immediate free is OK */ @@ -137,13 +102,14 @@ static void d_free(struct dentry *dentry) */ static void dentry_iput(struct dentry * dentry) __releases(dentry->d_lock) + __releases(dcache_lock) { struct inode *inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; list_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); if (!inode->i_nlink) fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) @@ -152,60 +118,42 @@ static void dentry_iput(struct dentry * dentry) iput(inode); } else { spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); } } /* - * dentry_lru_(add|add_tail|del|del_init) must be called with d_lock held - * to protect list_empty(d_lru) condition. + * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held. */ static void dentry_lru_add(struct dentry *dentry) { - spin_lock(&dcache_lru_lock); list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; dentry_stat.nr_unused++; - spin_unlock(&dcache_lru_lock); } static void dentry_lru_add_tail(struct dentry *dentry) { - spin_lock(&dcache_lru_lock); list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); dentry->d_sb->s_nr_dentry_unused++; dentry_stat.nr_unused++; - spin_unlock(&dcache_lru_lock); -} - -static void __dentry_lru_del(struct dentry *dentry) -{ - list_del(&dentry->d_lru); - dentry->d_sb->s_nr_dentry_unused--; - dentry_stat.nr_unused--; -} - -static void __dentry_lru_del_init(struct dentry *dentry) -{ - list_del_init(&dentry->d_lru); - dentry->d_sb->s_nr_dentry_unused--; - dentry_stat.nr_unused--; } static void dentry_lru_del(struct dentry *dentry) { if (!list_empty(&dentry->d_lru)) { - spin_lock(&dcache_lru_lock); - __dentry_lru_del(dentry); - spin_unlock(&dcache_lru_lock); + list_del(&dentry->d_lru); + dentry->d_sb->s_nr_dentry_unused--; + dentry_stat.nr_unused--; } } static void dentry_lru_del_init(struct dentry *dentry) { if (likely(!list_empty(&dentry->d_lru))) { - spin_lock(&dcache_lru_lock); - __dentry_lru_del_init(dentry); - spin_unlock(&dcache_lru_lock); + list_del_init(&dentry->d_lru); + dentry->d_sb->s_nr_dentry_unused--; + dentry_stat.nr_unused--; } } @@ -216,87 +164,25 @@ static void dentry_lru_del_init(struct dentry *dentry) * The dentry must already be unhashed and removed from the LRU. * * If this is the root of the dentry tree, return NULL. - * - * d_lock and d_parent->d_lock must be held by caller, and - * are dropped by d_kill. */ static struct dentry *d_kill(struct dentry *dentry) __releases(dentry->d_lock) + __releases(dcache_lock) { struct dentry *parent; list_del(&dentry->d_u.d_child); - if (dentry->d_parent && dentry != dentry->d_parent) - spin_unlock(&dentry->d_parent->d_lock); + dentry_stat.nr_dentry--; /* For d_free, below */ + /*drops the locks, at that point nobody can reach this dentry */ + dentry_iput(dentry); if (IS_ROOT(dentry)) parent = NULL; else parent = dentry->d_parent; - /*drops the locks, at that point nobody can reach this dentry */ - dentry_iput(dentry); d_free(dentry); return parent; } -void __d_drop(struct dentry *dentry) -{ - if (!(dentry->d_flags & DCACHE_UNHASHED)) { - struct dcache_hash_bucket *b; - b = d_hash(dentry->d_parent, dentry->d_name.hash); - dentry->d_flags |= DCACHE_UNHASHED; - spin_lock(&b->lock); - hlist_del_rcu(&dentry->d_hash); - spin_unlock(&b->lock); - } -} -EXPORT_SYMBOL(__d_drop); - -void d_drop(struct dentry *dentry) -{ - spin_lock(&dentry->d_lock); - __d_drop(dentry); - spin_unlock(&dentry->d_lock); -} -EXPORT_SYMBOL(d_drop); - -static inline struct dentry *__dget_dlock(struct dentry *dentry) -{ - atomic_inc(&dentry->d_count); - return dentry; -} - -static inline struct dentry *__dget(struct dentry *dentry) -{ - __dget_dlock(dentry); - return dentry; -} - -struct dentry *dget_parent(struct dentry *dentry) -{ - struct dentry *ret; - -repeat: - spin_lock(&dentry->d_lock); - ret = dentry->d_parent; - if (!ret) - goto out; - if (dentry == ret) { - atomic_inc(&ret->d_count); - goto out; - } - if (!spin_trylock(&ret->d_lock)) { - spin_unlock(&dentry->d_lock); - goto repeat; - } - BUG_ON(!atomic_read(&ret->d_count)); - atomic_inc(&ret->d_count); - spin_unlock(&ret->d_lock); -out: - spin_unlock(&dentry->d_lock); - return ret; -} -EXPORT_SYMBOL(dget_parent); - /* * This is dput * @@ -328,68 +214,48 @@ EXPORT_SYMBOL(dget_parent); void dput(struct dentry *dentry) { - struct dentry *parent; - struct inode *inode; - if (!dentry) return; repeat: if (atomic_read(&dentry->d_count) == 1) might_sleep(); - - if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) + if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock)) return; + spin_lock(&dentry->d_lock); + if (atomic_read(&dentry->d_count)) { + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); + return; + } /* * AV: ->d_delete() is _NOT_ allowed to block now. */ if (dentry->d_op && dentry->d_op->d_delete) { - if (dentry->d_op->d_delete(dentry)) { - __d_drop(dentry); - goto kill_it; - } + if (dentry->d_op->d_delete(dentry)) + goto unhash_it; } /* Unreachable? Get rid of it */ - if (d_unhashed(dentry)) + if (d_unhashed(dentry)) goto kill_it; - if (list_empty(&dentry->d_lru)) { - dentry->d_flags |= DCACHE_REFERENCED; + if (list_empty(&dentry->d_lru)) { + dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); - } - spin_unlock(&dentry->d_lock); + } + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); return; +unhash_it: + __d_drop(dentry); kill_it: - inode = dentry->d_inode; - if (inode && !spin_trylock(&inode->i_lock)) - goto retry; - - parent = dentry->d_parent; - if (parent && parent != dentry && !spin_trylock(&parent->d_lock)) { - if (inode) - spin_unlock(&inode->i_lock); - goto retry; - } - /* if dentry was on the d_lru list delete it from there */ dentry_lru_del(dentry); dentry = d_kill(dentry); if (dentry) goto repeat; - return; - -retry: - /* - * We are about to drop dentry->d_lock. dentry->d_count is 0 - * so it could be freed by someone else and leave us with a - * stale pointer. Prevent this by increasing d_count before - * dropping d_lock. - */ - atomic_inc(&dentry->d_count); - spin_unlock(&dentry->d_lock); - goto repeat; } /** @@ -409,9 +275,9 @@ int d_invalidate(struct dentry * dentry) /* * If it's already been dropped, return OK. */ - spin_lock(&dentry->d_lock); + spin_lock(&dcache_lock); if (d_unhashed(dentry)) { - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); return 0; } /* @@ -419,9 +285,9 @@ int d_invalidate(struct dentry * dentry) * to get rid of unused child entries. */ if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); shrink_dcache_parent(dentry); - spin_lock(&dentry->d_lock); + spin_lock(&dcache_lock); } /* @@ -434,18 +300,35 @@ int d_invalidate(struct dentry * dentry) * we might still populate it if it was a * working directory or similar). */ + spin_lock(&dentry->d_lock); if (atomic_read(&dentry->d_count) > 1) { if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); return -EBUSY; } } __d_drop(dentry); spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); return 0; } +/* This should be called _only_ with dcache_lock held */ + +static inline struct dentry * __dget_locked(struct dentry *dentry) +{ + atomic_inc(&dentry->d_count); + dentry_lru_del_init(dentry); + return dentry; +} + +struct dentry * dget_locked(struct dentry *dentry) +{ + return __dget_locked(dentry); +} + /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question @@ -475,21 +358,18 @@ static struct dentry * __d_find_alias(struct inode *inode, int want_discon) next = tmp->next; prefetch(next); alias = list_entry(tmp, struct dentry, d_alias); - spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && (alias->d_flags & DCACHE_DISCONNECTED)) discon_alias = alias; else if (!want_discon) { - __dget_dlock(alias); - spin_unlock(&alias->d_lock); + __dget_locked(alias); return alias; } } - spin_unlock(&alias->d_lock); } if (discon_alias) - __dget(discon_alias); + __dget_locked(discon_alias); return discon_alias; } @@ -498,9 +378,9 @@ struct dentry * d_find_alias(struct inode *inode) struct dentry *de = NULL; if (!list_empty(&inode->i_dentry)) { - spin_lock(&inode->i_lock); + spin_lock(&dcache_lock); de = __d_find_alias(inode, 0); - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); } return de; } @@ -513,20 +393,20 @@ void d_prune_aliases(struct inode *inode) { struct dentry *dentry; restart: - spin_lock(&inode->i_lock); + spin_lock(&dcache_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!atomic_read(&dentry->d_count)) { - __dget_dlock(dentry); + __dget_locked(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); dput(dentry); goto restart; } spin_unlock(&dentry->d_lock); } - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); } /* @@ -539,43 +419,27 @@ restart: */ static void prune_one_dentry(struct dentry * dentry) __releases(dentry->d_lock) + __releases(dcache_lock) + __acquires(dcache_lock) { __d_drop(dentry); dentry = d_kill(dentry); /* - * Prune ancestors. + * Prune ancestors. Locking is simpler than in dput(), + * because dcache_lock needs to be taken anyway. */ + spin_lock(&dcache_lock); while (dentry) { - struct dentry *parent = NULL; - struct inode *inode = dentry->d_inode; - - if (inode) - spin_lock(&inode->i_lock); -again: - spin_lock(&dentry->d_lock); - if (dentry->d_parent && dentry != dentry->d_parent) { - if (!spin_trylock(&dentry->d_parent->d_lock)) { - spin_unlock(&dentry->d_lock); - goto again; - } - parent = dentry->d_parent; - } - atomic_dec(&dentry->d_count); - if (atomic_read(&dentry->d_count)) { - if (parent) - spin_unlock(&parent->d_lock); - spin_unlock(&dentry->d_lock); - if (inode) - spin_unlock(&inode->i_lock); + if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) return; - } if (dentry->d_op && dentry->d_op->d_delete) dentry->d_op->d_delete(dentry); dentry_lru_del_init(dentry); __d_drop(dentry); dentry = d_kill(dentry); + spin_lock(&dcache_lock); } } @@ -596,11 +460,10 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) BUG_ON(!sb); BUG_ON((flags & DCACHE_REFERENCED) && count == NULL); + spin_lock(&dcache_lock); if (count != NULL) /* called from prune_dcache() and shrink_dcache_parent() */ cnt = *count; -relock: - spin_lock(&dcache_lru_lock); restart: if (count == NULL) list_splice_init(&sb->s_dentry_lru, &tmp); @@ -610,10 +473,7 @@ restart: struct dentry, d_lru); BUG_ON(dentry->d_sb != sb); - if (!spin_trylock(&dentry->d_lock)) { - spin_unlock(&dcache_lru_lock); - goto relock; - } + spin_lock(&dentry->d_lock); /* * If we are honouring the DCACHE_REFERENCED flag and * the dentry has this flag set, don't free it. Clear @@ -631,61 +491,33 @@ restart: if (!cnt) break; } - cond_resched_lock(&dcache_lru_lock); + cond_resched_lock(&dcache_lock); } } - spin_unlock(&dcache_lru_lock); - -again: - spin_lock(&dcache_lru_lock); /* lru_lock also protects tmp list */ while (!list_empty(&tmp)) { - struct inode *inode; - dentry = list_entry(tmp.prev, struct dentry, d_lru); - - if (!spin_trylock(&dentry->d_lock)) { -again1: - spin_unlock(&dcache_lru_lock); - goto again; - } + dentry_lru_del_init(dentry); + spin_lock(&dentry->d_lock); /* * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free * it - just keep it off the LRU list. */ if (atomic_read(&dentry->d_count)) { - __dentry_lru_del_init(dentry); spin_unlock(&dentry->d_lock); continue; } - inode = dentry->d_inode; - if (inode && !spin_trylock(&inode->i_lock)) { -again2: - spin_unlock(&dentry->d_lock); - goto again1; - } - if (dentry->d_parent && dentry->d_parent != dentry) { - if (!spin_trylock(&dentry->d_parent->d_lock)) { - if (inode) - spin_unlock(&inode->i_lock); - goto again2; - } - } - __dentry_lru_del_init(dentry); - spin_unlock(&dcache_lru_lock); - prune_one_dentry(dentry); - /* dentry->d_lock dropped */ - spin_lock(&dcache_lru_lock); + /* dentry->d_lock was dropped in prune_one_dentry() */ + cond_resched_lock(&dcache_lock); } - if (count == NULL && !list_empty(&sb->s_dentry_lru)) goto restart; if (count != NULL) *count = cnt; if (!list_empty(&referenced)) list_splice(&referenced, &sb->s_dentry_lru); - spin_unlock(&dcache_lru_lock); + spin_unlock(&dcache_lock); } /** @@ -707,6 +539,7 @@ static void prune_dcache(int count) if (unused == 0 || count == 0) return; + spin_lock(&dcache_lock); restart: if (count >= unused) prune_ratio = 1; @@ -742,9 +575,11 @@ restart: if (down_read_trylock(&sb->s_umount)) { if ((sb->s_root != NULL) && (!list_empty(&sb->s_dentry_lru))) { + spin_unlock(&dcache_lock); __shrink_dcache_sb(sb, &w_count, DCACHE_REFERENCED); pruned -= w_count; + spin_lock(&dcache_lock); } up_read(&sb->s_umount); } @@ -760,6 +595,7 @@ restart: } } spin_unlock(&sb_lock); + spin_unlock(&dcache_lock); } /** @@ -788,10 +624,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG_ON(!IS_ROOT(dentry)); /* detach this root from the system */ - spin_lock(&dentry->d_lock); + spin_lock(&dcache_lock); dentry_lru_del_init(dentry); __d_drop(dentry); - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); for (;;) { /* descend to the first leaf in the current subtree */ @@ -800,15 +636,14 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) /* this is a branch with children - detach all of them * from the system in one go */ - spin_lock(&dentry->d_lock); + spin_lock(&dcache_lock); list_for_each_entry(loop, &dentry->d_subdirs, d_u.d_child) { - spin_lock_nested(&loop->d_lock, DENTRY_D_LOCK_NESTED); dentry_lru_del_init(loop); __d_drop(loop); - spin_unlock(&loop->d_lock); + cond_resched_lock(&dcache_lock); } - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); /* move to the first child */ dentry = list_entry(dentry->d_subdirs.next, @@ -835,17 +670,14 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG(); } - if (IS_ROOT(dentry)) { + if (IS_ROOT(dentry)) parent = NULL; - list_del(&dentry->d_u.d_child); - } else { + else { parent = dentry->d_parent; - spin_lock(&parent->d_lock); atomic_dec(&parent->d_count); - list_del(&dentry->d_u.d_child); - spin_unlock(&parent->d_lock); } + list_del(&dentry->d_u.d_child); detached++; inode = dentry->d_inode; @@ -874,12 +706,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) struct dentry, d_u.d_child); } out: - return; + /* several dentries were freed, need to correct nr_dentry */ + spin_lock(&dcache_lock); + dentry_stat.nr_dentry -= detached; + spin_unlock(&dcache_lock); } /* * destroy the dentries attached to a superblock on unmounting - * - we don't need to use dentry->d_lock because: + * - we don't need to use dentry->d_lock, and only need dcache_lock when + * removing the dentry from the system lists and hashes because: * - the superblock is detached from all mountings and open files, so the * dentry trees will not be rearranged by the VFS * - s_umount is write-locked, so the memory pressure shrinker will ignore @@ -897,9 +733,7 @@ void shrink_dcache_for_umount(struct super_block *sb) dentry = sb->s_root; sb->s_root = NULL; - spin_lock(&dentry->d_lock); atomic_dec(&dentry->d_count); - spin_unlock(&dentry->d_lock); shrink_dcache_for_umount_subtree(dentry); while (!hlist_empty(&sb->s_anon)) { @@ -921,19 +755,15 @@ void shrink_dcache_for_umount(struct super_block *sb) * Return true if the parent or its subdirectories contain * a mount point */ + int have_submounts(struct dentry *parent) { - struct dentry *this_parent; + struct dentry *this_parent = parent; struct list_head *next; - unsigned seq; - -rename_retry: - this_parent = parent; - seq = read_seqbegin(&rename_lock); + spin_lock(&dcache_lock); if (d_mountpoint(parent)) goto positive; - spin_lock(&this_parent->d_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -941,56 +771,26 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; - - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); /* Have we found a mount point ? */ - if (d_mountpoint(dentry)) { - spin_unlock(&dentry->d_lock); - spin_unlock(&this_parent->d_lock); + if (d_mountpoint(dentry)) goto positive; - } if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&this_parent->d_lock); - spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; - spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } - spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - struct dentry *tmp; - struct dentry *child; - - tmp = this_parent->d_parent; - rcu_read_lock(); - spin_unlock(&this_parent->d_lock); - child = this_parent; - this_parent = tmp; - spin_lock(&this_parent->d_lock); - /* might go back up the wrong parent if we have had a rename - * or deletion */ - if (this_parent != child->d_parent || - // d_unlinked(this_parent) || XXX - read_seqretry(&rename_lock, seq)) { - spin_unlock(&this_parent->d_lock); - rcu_read_unlock(); - goto rename_retry; - } - rcu_read_unlock(); - next = child->d_u.d_child.next; + next = this_parent->d_u.d_child.next; + this_parent = this_parent->d_parent; goto resume; } - spin_unlock(&this_parent->d_lock); - if (read_seqretry(&rename_lock, seq)) - goto rename_retry; + spin_unlock(&dcache_lock); return 0; /* No mount points found in tree */ positive: - if (read_seqretry(&rename_lock, seq)) - goto rename_retry; + spin_unlock(&dcache_lock); return 1; } @@ -1010,17 +810,11 @@ positive: */ static int select_parent(struct dentry * parent) { - struct dentry *this_parent; + struct dentry *this_parent = parent; struct list_head *next; - unsigned seq; - int found; - -rename_retry: - found = 0; - this_parent = parent; - seq = read_seqbegin(&rename_lock); + int found = 0; - spin_lock(&this_parent->d_lock); + spin_lock(&dcache_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -1029,7 +823,6 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); dentry_lru_del_init(dentry); /* * move only zero ref count dentries to the end @@ -1045,49 +838,27 @@ resume: * ensures forward progress). We'll be coming back to find * the rest. */ - if (found && need_resched()) { - spin_unlock(&dentry->d_lock); + if (found && need_resched()) goto out; - } /* * Descend a level if the d_subdirs list is non-empty. - * Note that we keep a hold on the parent lock while - * we descend, so we don't have to reacquire it on - * ascend. */ if (!list_empty(&dentry->d_subdirs)) { this_parent = dentry; goto repeat; } - - spin_unlock(&dentry->d_lock); } /* * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - struct dentry *tmp; - struct dentry *child; - - tmp = this_parent->d_parent; - child = this_parent; - next = child->d_u.d_child.next; - spin_unlock(&this_parent->d_lock); - this_parent = tmp; + next = this_parent->d_u.d_child.next; + this_parent = this_parent->d_parent; goto resume; } - out: - /* Make sure we unlock all the way back up the tree */ - while (this_parent != parent) { - struct dentry *tmp = this_parent->d_parent; - spin_unlock(&this_parent->d_lock); - this_parent = tmp; - } - spin_unlock(&this_parent->d_lock); - if (read_seqretry(&rename_lock, seq)) - goto rename_retry; + spin_unlock(&dcache_lock); return found; } @@ -1172,7 +943,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) atomic_set(&dentry->d_count, 1); dentry->d_flags = DCACHE_UNHASHED; spin_lock_init(&dentry->d_lock); - seqcount_init(&dentry->d_seq); dentry->d_inode = NULL; dentry->d_parent = NULL; dentry->d_sb = NULL; @@ -1183,18 +953,20 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); INIT_LIST_HEAD(&dentry->d_alias); - INIT_LIST_HEAD(&dentry->d_u.d_child); if (parent) { - spin_lock(&parent->d_lock); - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - dentry->d_parent = dget_dlock(parent); + dentry->d_parent = dget(parent); dentry->d_sb = parent->d_sb; - list_add(&dentry->d_u.d_child, &parent->d_subdirs); - spin_unlock(&dentry->d_lock); - spin_unlock(&parent->d_lock); + } else { + INIT_LIST_HEAD(&dentry->d_u.d_child); } + spin_lock(&dcache_lock); + if (parent) + list_add(&dentry->d_u.d_child, &parent->d_subdirs); + dentry_stat.nr_dentry++; + spin_unlock(&dcache_lock); + return dentry; } @@ -1209,6 +981,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) } EXPORT_SYMBOL(d_alloc_name); +/* the caller must hold dcache_lock */ static void __d_instantiate(struct dentry *dentry, struct inode *inode) { if (inode) @@ -1235,11 +1008,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { BUG_ON(!list_empty(&entry->d_alias)); - if (inode) - spin_lock(&inode->i_lock); + spin_lock(&dcache_lock); __d_instantiate(entry, inode); - if (inode) - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); security_d_instantiate(entry, inode); } @@ -1283,7 +1054,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, continue; if (memcmp(qstr->name, name, len)) continue; - dget(alias); + dget_locked(alias); return alias; } @@ -1297,11 +1068,9 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) BUG_ON(!list_empty(&entry->d_alias)); - if (inode) - spin_lock(&inode->i_lock); + spin_lock(&dcache_lock); result = __d_instantiate_unique(entry, inode); - if (inode) - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); if (!result) { security_d_instantiate(entry, inode); @@ -1341,6 +1110,14 @@ struct dentry * d_alloc_root(struct inode * root_inode) return res; } +static inline struct hlist_head *d_hash(struct dentry *parent, + unsigned long hash) +{ + hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; + hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS); + return dentry_hashtable + (hash & D_HASHMASK); +} + /** * d_obtain_alias - find or allocate a dentry for a given inode * @inode: inode to allocate the dentry for @@ -1381,10 +1158,10 @@ struct dentry *d_obtain_alias(struct inode *inode) } tmp->d_parent = tmp; /* make sure dput doesn't croak */ - spin_lock(&inode->i_lock); + spin_lock(&dcache_lock); res = __d_find_alias(inode, 0); if (res) { - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); dput(tmp); goto out_iput; } @@ -1398,8 +1175,8 @@ struct dentry *d_obtain_alias(struct inode *inode) list_add(&tmp->d_alias, &inode->i_dentry); hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); spin_unlock(&tmp->d_lock); - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); return tmp; out_iput: @@ -1429,19 +1206,19 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) struct dentry *new = NULL; if (inode && S_ISDIR(inode->i_mode)) { - spin_lock(&inode->i_lock); + spin_lock(&dcache_lock); new = __d_find_alias(inode, 1); if (new) { BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); security_d_instantiate(new, inode); d_rehash(dentry); d_move(new, dentry); iput(inode); } else { - /* already taken inode->i_lock, d_add() by hand */ + /* already taking dcache_lock, so d_add() by hand */ __d_instantiate(dentry, inode); - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); security_d_instantiate(dentry, inode); d_rehash(dentry); } @@ -1513,10 +1290,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * Negative dentry: instantiate it unless the inode is a directory and * already has a dentry. */ - spin_lock(&inode->i_lock); + spin_lock(&dcache_lock); if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { __d_instantiate(found, inode); - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); security_d_instantiate(found, inode); return found; } @@ -1526,8 +1303,8 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * reference to it, move it in place and use it. */ new = list_entry(inode->i_dentry.next, struct dentry, d_alias); - dget(new); - spin_unlock(&inode->i_lock); + dget_locked(new); + spin_unlock(&dcache_lock); security_d_instantiate(found, inode); d_move(new, found); iput(inode); @@ -1549,7 +1326,7 @@ err_out: * is returned. The caller must use dput to free the entry when it has * finished using it. %NULL is returned on failure. * - * __d_lookup is global lock free. The hash list is protected using RCU. + * __d_lookup is dcache_lock free. The hash list is protected using RCU. * Memory barriers are used while updating and doing lockless traversal. * To avoid races with d_move while rename is happening, d_lock is used. * @@ -1561,18 +1338,33 @@ err_out: * * The dentry unused LRU is not updated even if lookup finds the required dentry * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, - * select_parent. This laziness saves lookup from LRU lock acquisition. + * select_parent and __dget_locked. This laziness saves lookup from dcache_lock + * acquisition. * * d_lookup() is protected against the concurrent renames in some unrelated * directory using the seqlockt_t rename_lock. */ + +struct dentry * d_lookup(struct dentry * parent, struct qstr * name) +{ + struct dentry * dentry = NULL; + unsigned long seq; + + do { + seq = read_seqbegin(&rename_lock); + dentry = __d_lookup(parent, name); + if (dentry) + break; + } while (read_seqretry(&rename_lock, seq)); + return dentry; +} + struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) { unsigned int len = name->len; unsigned int hash = name->hash; const unsigned char *str = name->name; - struct dcache_hash_bucket *b = d_hash(parent, hash); - struct hlist_head *head = &b->head; + struct hlist_head *head = d_hash(parent,hash); struct dentry *found = NULL; struct hlist_node *node; struct dentry *dentry; @@ -1628,78 +1420,6 @@ next: return found; } -struct dentry * d_lookup(struct dentry * parent, struct qstr * name) -{ - struct dentry *dentry = NULL; - unsigned seq; - - do { - seq = read_seqbegin(&rename_lock); - dentry = __d_lookup(parent, name); - if (dentry) - break; - } while (read_seqretry(&rename_lock, seq)); - return dentry; -} - -struct dentry * __d_lookup_rcu(struct dentry * parent, struct qstr * name) -{ - unsigned int len = name->len; - unsigned int hash = name->hash; - const unsigned char *str = name->name; - struct dcache_hash_bucket *b = d_hash(parent, hash); - struct hlist_head *head = &b->head; - struct hlist_node *node; - struct dentry *dentry; - - hlist_for_each_entry_rcu(dentry, node, head, d_hash) { - unsigned seq; - struct dentry *tparent; - const char *tname; - int tlen; - - if (unlikely(dentry->d_name.hash != hash)) - continue; - -seqretry: - seq = read_seqcount_begin(&dentry->d_seq); - tparent = dentry->d_parent; - if (unlikely(tparent != parent)) - continue; - tlen = dentry->d_name.len; - if (unlikely(tlen != len)) - continue; - tname = dentry->d_name.name; - if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) - goto seqretry; - if (unlikely(memcmp(tname, str, tlen))) - continue; - if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) - goto seqretry; - - return dentry; - } - return NULL; -} - -struct dentry *d_lookup_rcu(struct dentry *parent, struct qstr * name) -{ - struct dentry *dentry = NULL; - unsigned seq; - - if (parent->d_op && parent->d_op->d_compare) - goto out; - - do { - seq = read_seqbegin(&rename_lock); - dentry = __d_lookup_rcu(parent, name); - if (dentry) - break; - } while (read_seqretry(&rename_lock, seq)); -out: - return dentry; -} - /** * d_hash_and_lookup - hash the qstr then search for a dentry * @dir: Directory to search in @@ -1738,7 +1458,6 @@ out: int d_validate(struct dentry *dentry, struct dentry *dparent) { - struct dcache_hash_bucket *b; struct hlist_head *base; struct hlist_node *lhp; @@ -1749,23 +1468,19 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) if (dentry->d_parent != dparent) goto out; - spin_lock(&dentry->d_lock); - b = d_hash(dparent, dentry->d_name.hash); - base = &b->head; - spin_lock(&b->lock); - hlist_for_each(lhp, base) { + spin_lock(&dcache_lock); + base = d_hash(dparent, dentry->d_name.hash); + hlist_for_each(lhp,base) { /* hlist_for_each_entry_rcu() not required for d_hash list - * as it is parsed under dcache_hash_bucket->lock + * as it is parsed under dcache_lock */ if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { - spin_unlock(&b->lock); - __dget_dlock(dentry); - spin_unlock(&dentry->d_lock); + __dget_locked(dentry); + spin_unlock(&dcache_lock); return 1; } } - spin_unlock(&b->lock); - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); out: return 0; } @@ -1793,20 +1508,14 @@ out: void d_delete(struct dentry * dentry) { - struct inode *inode; int isdir = 0; /* * Are we the only user? */ -again: + spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); - inode = dentry->d_inode; - isdir = S_ISDIR(inode->i_mode); + isdir = S_ISDIR(dentry->d_inode->i_mode); if (atomic_read(&dentry->d_count) == 1) { - if (inode && !spin_trylock(&inode->i_lock)) { - spin_unlock(&dentry->d_lock); - goto again; - } dentry_iput(dentry); fsnotify_nameremove(dentry, isdir); return; @@ -1816,16 +1525,16 @@ again: __d_drop(dentry); spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); fsnotify_nameremove(dentry, isdir); } -static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b) +static void __d_rehash(struct dentry * entry, struct hlist_head *list) { + entry->d_flags &= ~DCACHE_UNHASHED; - spin_lock(&b->lock); - hlist_add_head_rcu(&entry->d_hash, &b->head); - spin_unlock(&b->lock); + hlist_add_head_rcu(&entry->d_hash, list); } static void _d_rehash(struct dentry * entry) @@ -1842,9 +1551,11 @@ static void _d_rehash(struct dentry * entry) void d_rehash(struct dentry * entry) { + spin_lock(&dcache_lock); spin_lock(&entry->d_lock); _d_rehash(entry); spin_unlock(&entry->d_lock); + spin_unlock(&dcache_lock); } /* @@ -1921,46 +1632,32 @@ static void switch_names(struct dentry *dentry, struct dentry *target) */ static void d_move_locked(struct dentry * dentry, struct dentry * target) { - struct dcache_hash_bucket *b; + struct hlist_head *list; + if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); write_seqlock(&rename_lock); - - if (target->d_parent != dentry->d_parent) { - if (target->d_parent < dentry->d_parent) { - spin_lock(&target->d_parent->d_lock); - spin_lock_nested(&dentry->d_parent->d_lock, - DENTRY_D_LOCK_NESTED); - } else { - spin_lock(&dentry->d_parent->d_lock); - spin_lock_nested(&target->d_parent->d_lock, - DENTRY_D_LOCK_NESTED); - } - } else { - spin_lock(&target->d_parent->d_lock); - } - - if (dentry != dentry->d_parent) { - if (target < dentry) { - spin_lock_nested(&target->d_lock, 2); - spin_lock_nested(&dentry->d_lock, 3); - } else { - spin_lock_nested(&dentry->d_lock, 2); - spin_lock_nested(&target->d_lock, 3); - } + /* + * XXXX: do we really need to take target->d_lock? + */ + if (target < dentry) { + spin_lock(&target->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); } else { - spin_lock_nested(&target->d_lock, 2); + spin_lock(&dentry->d_lock); + spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); } /* Move the dentry to the target hash queue, if on different bucket */ - if (!d_unhashed(dentry)) { - b = d_hash(dentry->d_parent, dentry->d_name.hash); - spin_lock(&b->lock); - hlist_del_rcu(&dentry->d_hash); - spin_unlock(&b->lock); - } - __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); + if (d_unhashed(dentry)) + goto already_unhashed; + + hlist_del_rcu(&dentry->d_hash); + +already_unhashed: + list = d_hash(target->d_parent, target->d_name.hash); + __d_rehash(dentry, list); /* Unhash the target: dput() will then get rid of it */ __d_drop(target); @@ -1969,8 +1666,6 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) list_del(&target->d_u.d_child); /* Switch the names.. */ - write_seqcount_begin(&dentry->d_seq); - write_seqcount_begin(&target->d_seq); switch_names(dentry, target); swap(dentry->d_name.hash, target->d_name.hash); @@ -1985,14 +1680,8 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) /* And add them back to the (new) parent lists */ list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); } - write_seqcount_end(&target->d_seq); - write_seqcount_end(&dentry->d_seq); list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); - if (target->d_parent != dentry->d_parent) - spin_unlock(&dentry->d_parent->d_lock); - if (target->d_parent != target) - spin_unlock(&target->d_parent->d_lock); spin_unlock(&target->d_lock); fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); @@ -2010,7 +1699,9 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) void d_move(struct dentry * dentry, struct dentry * target) { + spin_lock(&dcache_lock); d_move_locked(dentry, target); + spin_unlock(&dcache_lock); } /** @@ -2036,16 +1727,16 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) * This helper attempts to cope with remotely renamed directories * * It assumes that the caller is already holding - * dentry->d_parent->d_inode->i_mutex + * dentry->d_parent->d_inode->i_mutex and the dcache_lock * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... */ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) + __releases(dcache_lock) { struct mutex *m1 = NULL, *m2 = NULL; struct dentry *ret; - struct inode *inode = NULL; /* If alias and dentry share a parent, then no extra locks required */ if (alias->d_parent == dentry->d_parent) @@ -2061,15 +1752,14 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) goto out_err; m1 = &dentry->d_sb->s_vfs_rename_mutex; - inode = alias->d_parent->d_inode; - if (!mutex_trylock(&inode->i_mutex)) + if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex)) goto out_err; - m2 = &inode->i_mutex; + m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: d_move_locked(alias, dentry); ret = alias; out_err: - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); if (m2) mutex_unlock(m2); if (m1) @@ -2091,12 +1781,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) dparent = dentry->d_parent; aparent = anon->d_parent; - /* XXX: hack */ - spin_lock(&aparent->d_lock); - spin_lock(&dparent->d_lock); - spin_lock(&dentry->d_lock); - spin_lock(&anon->d_lock); - dentry->d_parent = (aparent == anon) ? dentry : aparent; list_del(&dentry->d_u.d_child); if (!IS_ROOT(dentry)) @@ -2111,11 +1795,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) else INIT_LIST_HEAD(&anon->d_u.d_child); - spin_unlock(&anon->d_lock); - spin_unlock(&dentry->d_lock); - spin_unlock(&dparent->d_lock); - spin_unlock(&aparent->d_lock); - anon->d_flags &= ~DCACHE_DISCONNECTED; } @@ -2133,15 +1812,14 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) BUG_ON(!d_unhashed(dentry)); + spin_lock(&dcache_lock); + if (!inode) { actual = dentry; __d_instantiate(dentry, NULL); - d_rehash(actual); - goto out_nolock; + goto found_lock; } - spin_lock(&inode->i_lock); - if (S_ISDIR(inode->i_mode)) { struct dentry *alias; @@ -2169,14 +1847,15 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) actual = __d_instantiate_unique(dentry, inode); if (!actual) actual = dentry; - else - BUG_ON(!d_unhashed(actual)); + else if (unlikely(!d_unhashed(actual))) + goto shouldnt_be_hashed; +found_lock: spin_lock(&actual->d_lock); found: _d_rehash(actual); spin_unlock(&actual->d_lock); - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); out_nolock: if (actual == dentry) { security_d_instantiate(dentry, inode); @@ -2185,6 +1864,10 @@ out_nolock: iput(inode); return actual; + +shouldnt_be_hashed: + spin_unlock(&dcache_lock); + BUG(); } static int prepend(char **buffer, int *buflen, const char *str, int namelen) @@ -2215,7 +1898,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) * Returns a pointer into the buffer or an error code if the * path was too long. * - * "buflen" should be positive. Caller holds the path->dentry->d_lock. + * "buflen" should be positive. Caller holds the dcache_lock. * * If path is not reachable from the supplied root, then the value of * root is changed (without modifying refcounts). @@ -2223,22 +1906,13 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) char *__d_path(const struct path *path, struct path *root, char *buffer, int buflen) { - struct dentry *dentry; - struct vfsmount *vfsmnt; - char *end; + struct dentry *dentry = path->dentry; + struct vfsmount *vfsmnt = path->mnt; + char *end = buffer + buflen; char *retval; - unsigned seq; -rename_retry: - dentry = path->dentry; - vfsmnt = path->mnt; - end = buffer + buflen; + spin_lock(&vfsmount_lock); prepend(&end, &buflen, "\0", 1); - - seq = read_seqbegin(&rename_lock); - rcu_read_lock(); - spin_lock(&dentry->d_lock); -unlinked: if (d_unlinked(dentry) && (prepend(&end, &buflen, " (deleted)", 10) != 0)) goto Elong; @@ -2250,7 +1924,7 @@ unlinked: *retval = '/'; for (;;) { - struct dentry *parent; + struct dentry * parent; if (dentry == root->dentry && vfsmnt == root->mnt) break; @@ -2259,10 +1933,8 @@ unlinked: if (vfsmnt->mnt_parent == vfsmnt) { goto global_root; } - spin_unlock(&dentry->d_lock); dentry = vfsmnt->mnt_mountpoint; vfsmnt = vfsmnt->mnt_parent; - spin_lock(&dentry->d_lock); /* can't get unlinked because locked vfsmount */ continue; } parent = dentry->d_parent; @@ -2271,18 +1943,11 @@ unlinked: (prepend(&end, &buflen, "/", 1) != 0)) goto Elong; retval = end; - spin_unlock(&dentry->d_lock); dentry = parent; - spin_lock(&dentry->d_lock); - if (d_unlinked(dentry)) - goto unlinked; } out: - spin_unlock(&dentry->d_lock); - rcu_read_unlock(); - if (read_seqretry(&rename_lock, seq)) - goto rename_retry; + spin_unlock(&vfsmount_lock); return retval; global_root: @@ -2291,7 +1956,6 @@ global_root: goto Elong; root->mnt = vfsmnt; root->dentry = dentry; - /* XXX: this could wrongly modify root if we rename retry */ goto out; Elong: @@ -2320,8 +1984,6 @@ char *d_path(const struct path *path, char *buf, int buflen) char *res; struct path root; struct path tmp; - int cpu = get_cpu(); - put_cpu(); /* * We have various synthetic filesystems that never get mounted. On @@ -2337,12 +1999,10 @@ char *d_path(const struct path *path, char *buf, int buflen) root = current->fs->root; path_get(&root); read_unlock(¤t->fs->lock); - - vfsmount_read_lock(cpu); + spin_lock(&dcache_lock); tmp = root; res = __d_path(path, &tmp, buf, buflen); - vfsmount_read_unlock(cpu); - + spin_unlock(&dcache_lock); path_put(&root); return res; } @@ -2373,21 +2033,11 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, */ char *dentry_path(struct dentry *dentry, char *buf, int buflen) { - char *end; + char *end = buf + buflen; char *retval; - unsigned seq; - int cpu = get_cpu(); - put_cpu(); -rename_retry: - end = buf + buflen; + spin_lock(&dcache_lock); prepend(&end, &buflen, "\0", 1); - - seq = read_seqbegin(&rename_lock); - vfsmount_read_lock(cpu); - rcu_read_lock(); /* protect parent */ - spin_lock(&dentry->d_lock); -unlinked: if (d_unlinked(dentry) && (prepend(&end, &buflen, "//deleted", 9) != 0)) goto Elong; @@ -2406,22 +2056,13 @@ unlinked: goto Elong; retval = end; - spin_unlock(&dentry->d_lock); dentry = parent; - spin_lock(&dentry->d_lock); - if (d_unlinked(dentry)) - goto unlinked; } -out: - spin_unlock(&dentry->d_lock); - rcu_read_unlock(); - vfsmount_read_unlock(cpu); - if (read_seqretry(&rename_lock, seq)) - goto rename_retry; + spin_unlock(&dcache_lock); return retval; Elong: - retval = ERR_PTR(-ENAMETOOLONG); - goto out; + spin_unlock(&dcache_lock); + return ERR_PTR(-ENAMETOOLONG); } /* @@ -2447,8 +2088,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) int error; struct path pwd, root; char *page = (char *) __get_free_page(GFP_USER); - int cpu = get_cpu(); - put_cpu(); if (!page) return -ENOMEM; @@ -2461,17 +2100,14 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) read_unlock(¤t->fs->lock); error = -ENOENT; - vfsmount_read_lock(cpu); - spin_lock(&pwd.dentry->d_lock); + spin_lock(&dcache_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; struct path tmp = root; char * cwd; - spin_unlock(&pwd.dentry->d_lock); - /* XXX: race here, have to close (eg. return unlinked from __d_path) */ cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE); - vfsmount_read_unlock(cpu); + spin_unlock(&dcache_lock); error = PTR_ERR(cwd); if (IS_ERR(cwd)) @@ -2484,10 +2120,8 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) if (copy_to_user(buf, cwd, len)) error = -EFAULT; } - } else { - spin_unlock(&pwd.dentry->d_lock); - vfsmount_read_unlock(cpu); - } + } else + spin_unlock(&dcache_lock); out: path_put(&pwd); @@ -2515,39 +2149,35 @@ out: int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) { int result; - unsigned seq; + unsigned long seq; if (new_dentry == old_dentry) return 1; + /* + * Need rcu_readlock to protect against the d_parent trashing + * due to d_move + */ + rcu_read_lock(); do { /* for restarting inner loop in case of seq retry */ seq = read_seqbegin(&rename_lock); - /* - * Need rcu_readlock to protect against the d_parent trashing - * due to d_move - */ - rcu_read_lock(); if (d_ancestor(old_dentry, new_dentry)) result = 1; else result = 0; - rcu_read_unlock(); } while (read_seqretry(&rename_lock, seq)); + rcu_read_unlock(); return result; } void d_genocide(struct dentry *root) { - struct dentry *this_parent; + struct dentry *this_parent = root; struct list_head *next; - unsigned seq; -rename_retry: - this_parent = root; - seq = read_seqbegin(&rename_lock); - spin_lock(&this_parent->d_lock); + spin_lock(&dcache_lock); repeat: next = this_parent->d_subdirs.next; resume: @@ -2555,55 +2185,21 @@ resume: struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; - - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - if (d_unhashed(dentry) || !dentry->d_inode) { - spin_unlock(&dentry->d_lock); + if (d_unhashed(dentry)||!dentry->d_inode) continue; - } if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&this_parent->d_lock); - spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); this_parent = dentry; - spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); goto repeat; } - if (!(dentry->d_flags & DCACHE_GENOCIDE)) { - atomic_dec(&dentry->d_count); - dentry->d_flags |= DCACHE_GENOCIDE; - } - spin_unlock(&dentry->d_lock); + atomic_dec(&dentry->d_count); } if (this_parent != root) { - struct dentry *tmp; - struct dentry *child; - - tmp = this_parent->d_parent; - if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { - atomic_dec(&this_parent->d_count); - this_parent->d_flags |= DCACHE_GENOCIDE; - } - rcu_read_lock(); - spin_unlock(&this_parent->d_lock); - child = this_parent; - this_parent = tmp; - spin_lock(&this_parent->d_lock); - /* might go back up the wrong parent if we have had a rename - * or deletion */ - if (this_parent != child->d_parent || - // d_unlinked(this_parent) || XXX - read_seqretry(&rename_lock, seq)) { - spin_unlock(&this_parent->d_lock); - rcu_read_unlock(); - goto rename_retry; - } - rcu_read_unlock(); - next = child->d_u.d_child.next; + next = this_parent->d_u.d_child.next; + atomic_dec(&this_parent->d_count); + this_parent = this_parent->d_parent; goto resume; } - spin_unlock(&this_parent->d_lock); - if (read_seqretry(&rename_lock, seq)) - goto rename_retry; + spin_unlock(&dcache_lock); } /** @@ -2656,7 +2252,7 @@ static void __init dcache_init_early(void) dentry_hashtable = alloc_large_system_hash("Dentry cache", - sizeof(struct dcache_hash_bucket), + sizeof(struct hlist_head), dhash_entries, 13, HASH_EARLY, @@ -2664,10 +2260,8 @@ static void __init dcache_init_early(void) &d_hash_mask, 0); - for (loop = 0; loop < (1 << d_hash_shift); loop++) { - spin_lock_init(&dentry_hashtable[loop].lock); - INIT_HLIST_HEAD(&dentry_hashtable[loop].head); - } + for (loop = 0; loop < (1 << d_hash_shift); loop++) + INIT_HLIST_HEAD(&dentry_hashtable[loop]); } static void __init dcache_init(void) @@ -2690,7 +2284,7 @@ static void __init dcache_init(void) dentry_hashtable = alloc_large_system_hash("Dentry cache", - sizeof(struct dcache_hash_bucket), + sizeof(struct hlist_head), dhash_entries, 13, 0, @@ -2698,10 +2292,8 @@ static void __init dcache_init(void) &d_hash_mask, 0); - for (loop = 0; loop < (1 << d_hash_shift); loop++) { - spin_lock_init(&dentry_hashtable[loop].lock); - INIT_HLIST_HEAD(&dentry_hashtable[loop].head); - } + for (loop = 0; loop < (1 << d_hash_shift); loop++) + INIT_HLIST_HEAD(&dentry_hashtable[loop]); } /* SLAB cache for __getname() consumers */ @@ -2751,6 +2343,7 @@ EXPORT_SYMBOL(d_rehash); EXPORT_SYMBOL(d_splice_alias); EXPORT_SYMBOL(d_add_ci); EXPORT_SYMBOL(d_validate); +EXPORT_SYMBOL(dget_locked); EXPORT_SYMBOL(dput); EXPORT_SYMBOL(find_inode_number); EXPORT_SYMBOL(have_submounts); diff --git a/fs/drop_caches.c b/fs/drop_caches.c index ee55bbf3bd37..31f4b0e6d72c 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -14,35 +14,23 @@ int sysctl_drop_caches; static void drop_pagecache_sb(struct super_block *sb) { - int i; + struct inode *inode, *toput_inode = NULL; - for_each_possible_cpu(i) { - struct inode *inode, *toput_inode = NULL; - struct list_head *list; -#ifdef CONFIG_SMP - list = per_cpu_ptr(sb->s_inodes, i); -#else - list = &sb->s_inodes; -#endif - rcu_read_lock(); - list_for_each_entry_rcu(inode, list, i_sb_list) { - spin_lock(&inode->i_lock); - if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW) - || inode->i_mapping->nrpages == 0) { - spin_unlock(&inode->i_lock); - continue; - } - __iget(inode); - spin_unlock(&inode->i_lock); - rcu_read_unlock(); - invalidate_mapping_pages(inode->i_mapping, 0, -1); - iput(toput_inode); - toput_inode = inode; - rcu_read_lock(); - } - rcu_read_unlock(); + spin_lock(&inode_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) + continue; + if (inode->i_mapping->nrpages == 0) + continue; + __iget(inode); + spin_unlock(&inode_lock); + invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); + toput_inode = inode; + spin_lock(&inode_lock); } + spin_unlock(&inode_lock); + iput(toput_inode); } static void drop_pagecache(void) diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 44fcbf48c01f..2afbcebeda71 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -1054,9 +1054,7 @@ static void create_done(struct exofs_io_state *ios, void *p) set_obj_created(oi); - spin_lock(&inode->i_lock); - inode->i_count--; - spin_unlock(&inode->i_lock); + atomic_dec(&inode->i_count); wake_up(&oi->i_wq); } @@ -1118,18 +1116,14 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) /* increment the refcount so that the inode will still be around when we * reach the callback */ - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); ios->done = create_done; ios->private = inode; ios->cred = oi->i_cred; ret = exofs_sbi_create(ios); if (ret) { - spin_lock(&inode->i_lock); - inode->i_count--; - spin_unlock(&inode->i_lock); + atomic_dec(&inode->i_count); exofs_put_io_state(ios); return ERR_PTR(ret); } diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 506778ac4fcd..b7dd0c236863 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -153,9 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir, inode->i_ctime = CURRENT_TIME; inode_inc_link_count(inode); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); return exofs_add_nondir(dentry, inode); } diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index b55fee07c0d9..e9e175949a63 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -43,26 +43,24 @@ find_acceptable_alias(struct dentry *result, void *context) { struct dentry *dentry, *toput = NULL; - struct inode *inode; if (acceptable(context, result)) return result; - inode = result->d_inode; - spin_lock(&inode->i_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { - dget(dentry); - spin_unlock(&inode->i_lock); + spin_lock(&dcache_lock); + list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { + dget_locked(dentry); + spin_unlock(&dcache_lock); if (toput) dput(toput); if (dentry != result && acceptable(context, dentry)) { dput(result); return dentry; } - spin_lock(&inode->i_lock); + spin_lock(&dcache_lock); toput = dentry; } - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); if (toput) dput(toput); @@ -76,19 +74,12 @@ static struct dentry * find_disconnected_root(struct dentry *dentry) { dget(dentry); -again: spin_lock(&dentry->d_lock); while (!IS_ROOT(dentry) && (dentry->d_parent->d_flags & DCACHE_DISCONNECTED)) { struct dentry *parent = dentry->d_parent; - - if (!spin_trylock(&parent->d_lock)) { - spin_unlock(&dentry->d_lock); - goto again; - } - dget_dlock(parent); + dget(parent); spin_unlock(&dentry->d_lock); - spin_unlock(&parent->d_lock); dput(dentry); dentry = parent; spin_lock(&dentry->d_lock); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 34259ac72cef..dd7175ce5606 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -196,9 +196,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); err = ext2_add_link(dentry, inode); if (!err) { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index d5f19f954fb7..f9cb54a585ce 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -157,16 +157,9 @@ static struct inode *ext2_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void ext2_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); - kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); -} - static void ext2_destroy_inode(struct inode *inode) { - call_rcu(&inode->i_rcu, ext2_i_callback); + kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); } static void init_once(void *foo) diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 2b40cffa2432..b39991285136 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -100,9 +100,9 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) struct ext3_sb_info *sbi; int fatal = 0, err; - if (inode->i_count > 1) { + if (atomic_read(&inode->i_count) > 1) { printk ("ext3_free_inode: inode has count=%d\n", - inode->i_count); + atomic_read(&inode->i_count)); return; } if (inode->i_nlink) { diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index ee6676c11b5c..7b0e44f7d66f 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2246,9 +2246,7 @@ retry: inode->i_ctime = CURRENT_TIME_SEC; inc_nlink(inode); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); err = ext3_add_entry(handle, dentry, inode); if (!err) { diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 29dae329d4f6..afa2b569da10 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -483,13 +483,6 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void ext3_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); - kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); -} - static void ext3_destroy_inode(struct inode *inode) { if (!list_empty(&(EXT3_I(inode)->i_orphan))) { @@ -500,7 +493,7 @@ static void ext3_destroy_inode(struct inode *inode) false); dump_stack(); } - call_rcu(&inode->i_rcu, ext3_i_callback); + kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); } static void init_once(void *foo) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 8297fad406c6..f3624ead4f6c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -192,9 +192,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) struct ext4_sb_info *sbi; int fatal = 0, err, count, cleared; - if (inode->i_count > 1) { + if (atomic_read(&inode->i_count) > 1) { printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", - inode->i_count); + atomic_read(&inode->i_count)); return; } if (inode->i_nlink) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 70150b6f09a2..17a17e10dd60 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2326,9 +2326,7 @@ retry: inode->i_ctime = ext4_current_time(inode); ext4_inc_count(handle, inode); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); err = ext4_add_entry(handle, dentry, inode); if (!err) { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5220e057664d..5c5e1cd62414 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -497,16 +497,9 @@ static struct inode *fat_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void fat_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); - kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); -} - static void fat_destroy_inode(struct inode *inode) { - call_rcu(&inode->i_rcu, fat_i_callback); + kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); } static void init_once(void *foo) diff --git a/fs/file_table.c b/fs/file_table.c index a7c231786a9a..32d12b78bac8 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -22,7 +22,6 @@ #include <linux/sysctl.h> #include <linux/percpu_counter.h> #include <linux/ima.h> -#include <linux/percpu.h> #include <asm/atomic.h> @@ -33,7 +32,8 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -static DEFINE_PER_CPU(spinlock_t, files_cpulock); +/* public. Not pretty! */ +__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -258,7 +258,7 @@ void __fput(struct file *file) cdev_put(inode->i_cdev); fops_put(file->f_op); put_pid(file->f_owner.pid); - file_sb_list_del(file); + file_kill(file); if (file->f_mode & FMODE_WRITE) drop_file_write_access(file); file->f_path.dentry = NULL; @@ -320,112 +320,55 @@ struct file *fget_light(unsigned int fd, int *fput_needed) return file; } + void put_filp(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { security_file_free(file); - file_sb_list_del(file); + file_kill(file); file_free(file); } } -void file_sb_list_add(struct file *file, struct super_block *sb) +void file_move(struct file *file, struct list_head *list) { - spinlock_t *lock; - struct list_head *list; -#ifdef CONFIG_SMP - int cpu; -#endif - - lock = &get_cpu_var(files_cpulock); -#ifdef CONFIG_SMP - cpu = smp_processor_id(); - list = per_cpu_ptr(sb->s_files, cpu); - file->f_sb_list_cpu = cpu; -#else - list = &sb->s_files; -#endif - put_cpu_var(files_cpulock); - spin_lock(lock); - BUG_ON(!list_empty(&file->f_u.fu_list)); - list_add(&file->f_u.fu_list, list); - spin_unlock(lock); + if (!list) + return; + file_list_lock(); + list_move(&file->f_u.fu_list, list); + file_list_unlock(); } -void file_sb_list_del(struct file *file) +void file_kill(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - spinlock_t *lock; - -#ifdef CONFIG_SMP - lock = &per_cpu(files_cpulock, file->f_sb_list_cpu); -#else - lock = &__get_cpu_var(files_cpulock); -#endif - spin_lock(lock); + file_list_lock(); list_del_init(&file->f_u.fu_list); - spin_unlock(lock); - } -} - -static void file_list_lock_all(void) -{ - int i; - int nr = 0; - - for_each_possible_cpu(i) { - spinlock_t *lock; - - lock = &per_cpu(files_cpulock, i); - spin_lock_nested(lock, nr); - nr++; - } -} - -static void file_list_unlock_all(void) -{ - int i; - - for_each_possible_cpu(i) { - spinlock_t *lock; - - lock = &per_cpu(files_cpulock, i); - spin_unlock(lock); + file_list_unlock(); } } int fs_may_remount_ro(struct super_block *sb) { - int i; + struct file *file; /* Check that no files are currently opened for writing. */ - file_list_lock_all(); - for_each_possible_cpu(i) { - struct file *file; - struct list_head *list; - -#ifdef CONFIG_SMP - list = per_cpu_ptr(sb->s_files, i); -#else - list = &sb->s_files; -#endif - list_for_each_entry(file, list, f_u.fu_list) { - struct inode *inode = file->f_path.dentry->d_inode; + file_list_lock(); + list_for_each_entry(file, &sb->s_files, f_u.fu_list) { + struct inode *inode = file->f_path.dentry->d_inode; - /* File with pending delete? */ - if (inode->i_nlink == 0) - goto too_bad; + /* File with pending delete? */ + if (inode->i_nlink == 0) + goto too_bad; - /* Writeable file? */ - if (S_ISREG(inode->i_mode) && - (file->f_mode & FMODE_WRITE)) - goto too_bad; - } + /* Writeable file? */ + if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) + goto too_bad; } - file_list_unlock_all(); + file_list_unlock(); return 1; /* Tis' cool bro. */ too_bad: - file_list_unlock_all(); + file_list_unlock(); return 0; } @@ -438,48 +381,40 @@ too_bad: */ void mark_files_ro(struct super_block *sb) { - int i; + struct file *f; retry: - file_list_lock_all(); - for_each_possible_cpu(i) { - struct file *f; - struct list_head *list; - -#ifdef CONFIG_SMP - list = per_cpu_ptr(sb->s_files, i); -#else - list = &sb->s_files; -#endif - list_for_each_entry(f, list, f_u.fu_list) { - struct vfsmount *mnt; - if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) - continue; - if (!file_count(f)) - continue; - if (!(f->f_mode & FMODE_WRITE)) - continue; - spin_lock(&f->f_lock); - f->f_mode &= ~FMODE_WRITE; - spin_unlock(&f->f_lock); - if (file_check_writeable(f) != 0) - continue; - file_release_write(f); - mnt = mntget(f->f_path.mnt); - /* This can sleep, so we can't hold the spinlock. */ - file_list_unlock_all(); - mnt_drop_write(mnt); - mntput(mnt); - goto retry; - } + file_list_lock(); + list_for_each_entry(f, &sb->s_files, f_u.fu_list) { + struct vfsmount *mnt; + if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) + continue; + if (!file_count(f)) + continue; + if (!(f->f_mode & FMODE_WRITE)) + continue; + spin_lock(&f->f_lock); + f->f_mode &= ~FMODE_WRITE; + spin_unlock(&f->f_lock); + if (file_check_writeable(f) != 0) + continue; + file_release_write(f); + mnt = mntget(f->f_path.mnt); + file_list_unlock(); + /* + * This can sleep, so we can't hold + * the file_list_lock() spinlock. + */ + mnt_drop_write(mnt); + mntput(mnt); + goto retry; } - file_list_unlock_all(); + file_list_unlock(); } void __init files_init(unsigned long mempages) { int n; - int i; filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); @@ -494,7 +429,5 @@ void __init files_init(unsigned long mempages) if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; files_defer_init(); - for_each_possible_cpu(i) - spin_lock_init(&per_cpu(files_cpulock, i)); percpu_counter_init(&nr_files, 0); } diff --git a/fs/filesystems.c b/fs/filesystems.c index 3448e7c075e0..a24c58e181db 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -110,7 +110,6 @@ int unregister_filesystem(struct file_system_type * fs) *tmp = fs->next; fs->next = NULL; write_unlock(&file_systems_lock); - synchronize_rcu(); return 0; } tmp = &(*tmp)->next; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 4682776308d9..1a7c42c64ff4 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -286,7 +286,6 @@ static void redirty_tail(struct inode *inode) { struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; - assert_spin_locked(&wb_inode_list_lock); if (!list_empty(&wb->b_dirty)) { struct inode *tail; @@ -304,14 +303,13 @@ static void requeue_io(struct inode *inode) { struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; - assert_spin_locked(&wb_inode_list_lock); list_move(&inode->i_list, &wb->b_more_io); } static void inode_sync_complete(struct inode *inode) { /* - * Prevent speculative execution through spin_unlock(&inode->i_lock); + * Prevent speculative execution through spin_unlock(&inode_lock); */ smp_mb(); wake_up_bit(&inode->i_state, __I_SYNC); @@ -345,7 +343,6 @@ static void move_expired_inodes(struct list_head *delaying_queue, struct inode *inode; int do_sb_sort = 0; - assert_spin_locked(&wb_inode_list_lock); while (!list_empty(delaying_queue)) { inode = list_entry(delaying_queue->prev, struct inode, i_list); if (older_than_this && @@ -401,11 +398,9 @@ static void inode_wait_for_writeback(struct inode *inode) wqh = bit_waitqueue(&inode->i_state, __I_SYNC); do { - spin_unlock(&wb_inode_list_lock); - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); - spin_lock(&inode->i_lock); - spin_lock(&wb_inode_list_lock); + spin_lock(&inode_lock); } while (inode->i_state & I_SYNC); } @@ -430,7 +425,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) unsigned dirty; int ret; - if (!inode->i_count) + if (!atomic_read(&inode->i_count)) WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); else WARN_ON(inode->i_state & I_WILL_FREE); @@ -462,8 +457,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) inode->i_state |= I_SYNC; inode->i_state &= ~I_DIRTY; - spin_unlock(&wb_inode_list_lock); - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); ret = do_writepages(mapping, wbc); @@ -480,8 +474,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ret = err; } - spin_lock(&inode->i_lock); - spin_lock(&wb_inode_list_lock); + spin_lock(&inode_lock); inode->i_state &= ~I_SYNC; if (!(inode->i_state & (I_FREEING | I_CLEAR))) { if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { @@ -540,11 +533,11 @@ select_queue: inode->i_state |= I_DIRTY_PAGES; redirty_tail(inode); } - } else if (inode->i_count) { + } else if (atomic_read(&inode->i_count)) { /* * The inode is clean, inuse */ - list_del_init(&inode->i_list); + list_move(&inode->i_list, &inode_in_use); } else { /* * The inode is clean, unused @@ -624,8 +617,7 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, struct super_block *sb = wbc->sb, *pin_sb = NULL; const unsigned long start = jiffies; /* livelock avoidance */ -again: - spin_lock(&wb_inode_list_lock); + spin_lock(&inode_lock); if (!wbc->for_kupdate || list_empty(&wb->b_io)) queue_io(wb, wbc->older_than_this); @@ -635,23 +627,16 @@ again: struct inode, i_list); long pages_skipped; - if (!spin_trylock(&inode->i_lock)) { - spin_unlock(&wb_inode_list_lock); - goto again; - } - /* * super block given and doesn't match, skip this inode */ if (sb && sb != inode->i_sb) { redirty_tail(inode); - spin_unlock(&inode->i_lock); continue; } if (inode->i_state & (I_NEW | I_WILL_FREE)) { requeue_io(inode); - spin_unlock(&inode->i_lock); continue; } @@ -659,14 +644,11 @@ again: * Was this inode dirtied after sync_sb_inodes was called? * This keeps sync from extra jobs and livelock. */ - if (inode_dirtied_after(inode, start)) { - spin_unlock(&inode->i_lock); + if (inode_dirtied_after(inode, start)) break; - } if (pin_sb_for_writeback(wbc, inode, &pin_sb)) { requeue_io(inode); - spin_unlock(&inode->i_lock); continue; } @@ -681,11 +663,10 @@ again: */ redirty_tail(inode); } - spin_unlock(&wb_inode_list_lock); - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); iput(inode); cond_resched(); - spin_lock(&wb_inode_list_lock); + spin_lock(&inode_lock); if (wbc->nr_to_write <= 0) { wbc->more_io = 1; break; @@ -693,9 +674,10 @@ again: if (!list_empty(&wb->b_more_io)) wbc->more_io = 1; } - spin_unlock(&wb_inode_list_lock); unpin_sb_for_writeback(&pin_sb); + + spin_unlock(&inode_lock); /* Leave any unwritten inodes on b_io */ } @@ -807,19 +789,13 @@ static long wb_writeback(struct bdi_writeback *wb, * become available for writeback. Otherwise * we'll just busyloop. */ -retry: - spin_lock(&wb_inode_list_lock); + spin_lock(&inode_lock); if (!list_empty(&wb->b_more_io)) { inode = list_entry(wb->b_more_io.prev, struct inode, i_list); - if (!spin_trylock(&inode->i_lock)) { - spin_unlock(&wb_inode_list_lock); - goto retry; - } inode_wait_for_writeback(inode); - spin_unlock(&inode->i_lock); } - spin_unlock(&wb_inode_list_lock); + spin_unlock(&inode_lock); } return wrote; @@ -866,7 +842,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) wb->last_old_flush = jiffies; nr_pages = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS) + - get_nr_inodes() - inodes_stat.nr_unused; + (inodes_stat.nr_inodes - inodes_stat.nr_unused); if (nr_pages) { struct wb_writeback_args args = { @@ -1073,7 +1049,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) if (unlikely(block_dump)) block_dump___mark_inode_dirty(inode); - spin_lock(&inode->i_lock); + spin_lock(&inode_lock); if ((inode->i_state & flags) != flags) { const int was_dirty = inode->i_state & I_DIRTY; @@ -1114,13 +1090,11 @@ void __mark_inode_dirty(struct inode *inode, int flags) } inode->dirtied_when = jiffies; - spin_lock(&wb_inode_list_lock); list_move(&inode->i_list, &wb->b_dirty); - spin_unlock(&wb_inode_list_lock); } } out: - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); } EXPORT_SYMBOL(__mark_inode_dirty); @@ -1143,7 +1117,7 @@ EXPORT_SYMBOL(__mark_inode_dirty); */ static void wait_sb_inodes(struct super_block *sb) { - int i; + struct inode *inode, *old_inode = NULL; /* * We need to be protected against the filesystem going from @@ -1151,57 +1125,44 @@ static void wait_sb_inodes(struct super_block *sb) */ WARN_ON(!rwsem_is_locked(&sb->s_umount)); - for_each_possible_cpu(i) { - struct inode *inode, *old_inode = NULL; - struct list_head *list; -#ifdef CONFIG_SMP - list = per_cpu_ptr(sb->s_inodes, i); -#else - list = &sb->s_inodes; -#endif + spin_lock(&inode_lock); + + /* + * Data integrity sync. Must wait for all pages under writeback, + * because there may have been pages dirtied before our sync + * call, but which had writeout started before we write it out. + * In which case, the inode may not be on the dirty list, but + * we still have to wait for that writeout. + */ + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + struct address_space *mapping; + + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) + continue; + mapping = inode->i_mapping; + if (mapping->nrpages == 0) + continue; + __iget(inode); + spin_unlock(&inode_lock); /* - * Data integrity sync. Must wait for all pages under writeback, - * because there may have been pages dirtied before our sync - * call, but which had writeout started before we write it out. - * In which case, the inode may not be on the dirty list, but - * we still have to wait for that writeout. + * We hold a reference to 'inode' so it couldn't have + * been removed from s_inodes list while we dropped the + * inode_lock. We cannot iput the inode now as we can + * be holding the last reference and we cannot iput it + * under inode_lock. So we keep the reference and iput + * it later. */ - rcu_read_lock(); - list_for_each_entry_rcu(inode, list, i_sb_list) { - struct address_space *mapping; - - mapping = inode->i_mapping; - if (mapping->nrpages == 0) - continue; - - spin_lock(&inode->i_lock); - if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) { - spin_unlock(&inode->i_lock); - continue; - } - __iget(inode); - spin_unlock(&inode->i_lock); - rcu_read_unlock(); - /* - * We hold a reference to 'inode' so it couldn't have - * been removed from s_inodes list while we dropped the - * i_lock. We cannot iput the inode now as we can be - * holding the last reference and we cannot iput it - * under spinlock. So we keep the reference and iput it - * later. - */ - iput(old_inode); - old_inode = inode; + iput(old_inode); + old_inode = inode; - filemap_fdatawait(mapping); + filemap_fdatawait(mapping); - cond_resched(); + cond_resched(); - rcu_read_lock(); - } - rcu_read_unlock(); - iput(old_inode); + spin_lock(&inode_lock); } + spin_unlock(&inode_lock); + iput(old_inode); } /** @@ -1220,7 +1181,7 @@ void writeback_inodes_sb(struct super_block *sb) long nr_to_write; nr_to_write = nr_dirty + nr_unstable + - get_nr_inodes() - inodes_stat.nr_unused; + (inodes_stat.nr_inodes - inodes_stat.nr_unused); bdi_start_writeback(sb->s_bdi, sb, nr_to_write); } @@ -1281,11 +1242,9 @@ int write_inode_now(struct inode *inode, int sync) wbc.nr_to_write = 0; might_sleep(); - spin_lock(&inode->i_lock); - spin_lock(&wb_inode_list_lock); + spin_lock(&inode_lock); ret = writeback_single_inode(inode, &wbc); - spin_unlock(&wb_inode_list_lock); - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); if (sync) inode_sync_wait(inode); return ret; @@ -1307,11 +1266,9 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) { int ret; - spin_lock(&inode->i_lock); - spin_lock(&wb_inode_list_lock); + spin_lock(&inode_lock); ret = writeback_single_inode(inode, wbc); - spin_unlock(&wb_inode_list_lock); - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); return ret; } EXPORT_SYMBOL(sync_inode); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index f5a07d98ec06..84350e1be66d 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -253,9 +253,7 @@ out_parent: gfs2_holder_uninit(ghs); gfs2_holder_uninit(ghs + 1); if (!error) { - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); d_instantiate(dentry, inode); mark_inode_dirty(inode); } diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 557c5335de66..5f4023678251 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -301,9 +301,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, inc_nlink(inode); hfsplus_instantiate(dst_dentry, inode, cnid); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); HFSPLUS_SB(sb).file_count++; diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index b22044cb988b..fe703ae46bc7 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -182,7 +182,7 @@ void hpfs_write_inode(struct inode *i) struct hpfs_inode_info *hpfs_inode = hpfs_i(i); struct inode *parent; if (i->i_ino == hpfs_sb(i->i_sb)->sb_root) return; - if (hpfs_inode->i_rddir_off && !i->i_count) { + if (hpfs_inode->i_rddir_off && !atomic_read(&i->i_count)) { if (*hpfs_inode->i_rddir_off) printk("HPFS: write_inode: some position still there\n"); kfree(hpfs_inode->i_rddir_off); hpfs_inode->i_rddir_off = NULL; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d1a78fc2a78e..a0bbd3d1b41a 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -377,12 +377,11 @@ static void hugetlbfs_delete_inode(struct inode *inode) clear_inode(inode); } -static void hugetlbfs_forget_inode(struct inode *inode) +static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) { if (generic_detach_inode(inode)) { truncate_hugepages(inode, 0); clear_inode(inode); - /* XXX: why no wake_up_inode? */ destroy_inode(inode); } } @@ -665,18 +664,11 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) return &p->vfs_inode; } -static void hugetlbfs_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); - kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); -} - static void hugetlbfs_destroy_inode(struct inode *inode) { hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); - call_rcu(&inode->i_rcu, hugetlbfs_i_callback); + kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); } static const struct address_space_operations hugetlbfs_aops = { diff --git a/fs/inode.c b/fs/inode.c index 972169668aed..ce10948610e9 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -74,13 +74,9 @@ static unsigned int i_hash_shift __read_mostly; * allowing for low-overhead inode sync() operations. */ +LIST_HEAD(inode_in_use); LIST_HEAD(inode_unused); - -struct inode_hash_bucket { - spinlock_t lock; - struct hlist_head head; -}; -static struct inode_hash_bucket *inode_hashtable __read_mostly; +static struct hlist_head *inode_hashtable __read_mostly; /* * A simple spinlock to protect the list manipulations. @@ -88,8 +84,7 @@ static struct inode_hash_bucket *inode_hashtable __read_mostly; * NOTE! You also have to own the lock if you change * the i_state of an inode while it is in use.. */ -static DEFINE_PER_CPU(spinlock_t, inode_cpulock); -DEFINE_SPINLOCK(wb_inode_list_lock); +DEFINE_SPINLOCK(inode_lock); /* * iprune_sem provides exclusion between the kswapd or try_to_free_pages @@ -108,37 +103,10 @@ static DECLARE_RWSEM(iprune_sem); /* * Statistics gathering.. */ -struct inodes_stat_t inodes_stat = { - .nr_inodes = 0, - .nr_unused = 0, -}; -struct percpu_counter nr_inodes; +struct inodes_stat_t inodes_stat; static struct kmem_cache *inode_cachep __read_mostly; -int get_nr_inodes(void) -{ - return percpu_counter_sum_positive(&nr_inodes); -} - -/* - * Handle nr_dentry sysctl - */ -#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) -int proc_nr_inodes(ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - inodes_stat.nr_inodes = get_nr_inodes(); - return proc_dointvec(table, write, buffer, lenp, ppos); -} -#else -int proc_nr_inodes(ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} -#endif - static void wake_up_inode(struct inode *inode) { /* @@ -166,7 +134,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_sb = sb; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; - inode->i_count = 1; + atomic_set(&inode->i_count, 1); inode->i_op = &empty_iops; inode->i_fop = &empty_fops; inode->i_nlink = 1; @@ -269,20 +237,13 @@ void __destroy_inode(struct inode *inode) } EXPORT_SYMBOL(__destroy_inode); -static void i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); - kmem_cache_free(inode_cachep, inode); -} - void destroy_inode(struct inode *inode) { __destroy_inode(inode); if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); else - call_rcu(&inode->i_rcu, i_callback); + kmem_cache_free(inode_cachep, (inode)); } /* @@ -296,7 +257,6 @@ void inode_init_once(struct inode *inode) INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); - INIT_LIST_HEAD(&inode->i_list); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); spin_lock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.i_mmap_lock); @@ -322,6 +282,21 @@ static void init_once(void *foo) inode_init_once(inode); } +/* + * inode_lock must be held + */ +void __iget(struct inode *inode) +{ + if (atomic_read(&inode->i_count)) { + atomic_inc(&inode->i_count); + return; + } + atomic_inc(&inode->i_count); + if (!(inode->i_state & (I_DIRTY|I_SYNC))) + list_move(&inode->i_list, &inode_in_use); + inodes_stat.nr_unused--; +} + /** * clear_inode - clear an inode * @inode: inode to clear @@ -365,70 +340,65 @@ static void dispose_list(struct list_head *head) struct inode *inode; inode = list_first_entry(head, struct inode, i_list); - list_del_init(&inode->i_list); + list_del(&inode->i_list); if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); - spin_lock(&inode->i_lock); - __remove_inode_hash(inode); - inode_sb_list_del(inode); - spin_unlock(&inode->i_lock); + spin_lock(&inode_lock); + hlist_del_init(&inode->i_hash); + list_del_init(&inode->i_sb_list); + spin_unlock(&inode_lock); wake_up_inode(inode); destroy_inode(inode); nr_disposed++; } + spin_lock(&inode_lock); + inodes_stat.nr_inodes -= nr_disposed; + spin_unlock(&inode_lock); } /* * Invalidate all inodes for a device. */ -static int invalidate_sb_inodes(struct super_block *sb, struct list_head *dispose) +static int invalidate_list(struct list_head *head, struct list_head *dispose) { - int busy = 0; - int i; - - for_each_possible_cpu(i) { - struct list_head *next; - struct list_head *head; -#ifdef CONFIG_SMP - head = per_cpu_ptr(sb->s_inodes, i); -#else - head = &sb->s_inodes; -#endif + struct list_head *next; + int busy = 0, count = 0; - next = head->next; - for (;;) { - struct list_head *tmp = next; - struct inode *inode; - - next = next->next; - if (tmp == head) - break; - inode = list_entry(tmp, struct inode, i_sb_list); - spin_lock(&inode->i_lock); - if (inode->i_state & I_NEW) { - spin_unlock(&inode->i_lock); - continue; - } - invalidate_inode_buffers(inode); - if (!inode->i_count) { - spin_lock(&wb_inode_list_lock); - list_del(&inode->i_list); - inodes_stat.nr_unused--; - spin_unlock(&wb_inode_list_lock); - WARN_ON(inode->i_state & I_NEW); - inode->i_state |= I_FREEING; - spin_unlock(&inode->i_lock); - list_add(&inode->i_list, dispose); - continue; - } - spin_unlock(&inode->i_lock); - busy = 1; + next = head->next; + for (;;) { + struct list_head *tmp = next; + struct inode *inode; + + /* + * We can reschedule here without worrying about the list's + * consistency because the per-sb list of inodes must not + * change during umount anymore, and because iprune_sem keeps + * shrink_icache_memory() away. + */ + cond_resched_lock(&inode_lock); + + next = next->next; + if (tmp == head) + break; + inode = list_entry(tmp, struct inode, i_sb_list); + if (inode->i_state & I_NEW) + continue; + invalidate_inode_buffers(inode); + if (!atomic_read(&inode->i_count)) { + list_move(&inode->i_list, dispose); + WARN_ON(inode->i_state & I_NEW); + inode->i_state |= I_FREEING; + count++; + continue; } + busy = 1; } + /* only unused inodes may be cached with i_count zero */ + inodes_stat.nr_unused -= count; return busy; } @@ -445,17 +415,12 @@ int invalidate_inodes(struct super_block *sb) int busy; LIST_HEAD(throw_away); - /* - * Don't need to worry about the list's consistency because the per-sb - * list of inodes must not change during umount anymore, and because - * iprune_sem keeps shrink_icache_memory() away. - */ down_write(&iprune_sem); -// spin_lock(&sb_inode_list_lock); XXX: is this safe? - inotify_unmount_inodes(sb); - fsnotify_unmount_inodes(sb); - busy = invalidate_sb_inodes(sb, &throw_away); -// spin_unlock(&sb_inode_list_lock); + spin_lock(&inode_lock); + inotify_unmount_inodes(&sb->s_inodes); + fsnotify_unmount_inodes(&sb->s_inodes); + busy = invalidate_list(&sb->s_inodes, &throw_away); + spin_unlock(&inode_lock); dispose_list(&throw_away); up_write(&iprune_sem); @@ -470,7 +435,7 @@ static int can_unuse(struct inode *inode) return 0; if (inode_has_buffers(inode)) return 0; - if (inode->i_count) + if (atomic_read(&inode->i_count)) return 0; if (inode->i_data.nrpages) return 0; @@ -493,12 +458,12 @@ static int can_unuse(struct inode *inode) static void prune_icache(int nr_to_scan) { LIST_HEAD(freeable); + int nr_pruned = 0; int nr_scanned; unsigned long reap = 0; down_read(&iprune_sem); -again: - spin_lock(&wb_inode_list_lock); + spin_lock(&inode_lock); for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { struct inode *inode; @@ -507,56 +472,36 @@ again: inode = list_entry(inode_unused.prev, struct inode, i_list); - if (!spin_trylock(&inode->i_lock)) { - spin_unlock(&wb_inode_list_lock); - goto again; - } - if (inode->i_count) { - list_del_init(&inode->i_list); - spin_unlock(&inode->i_lock); - inodes_stat.nr_unused--; - continue; - } - if (inode->i_state) { + if (inode->i_state || atomic_read(&inode->i_count)) { list_move(&inode->i_list, &inode_unused); - spin_unlock(&inode->i_lock); continue; } if (inode_has_buffers(inode) || inode->i_data.nrpages) { - spin_unlock(&wb_inode_list_lock); __iget(inode); - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); if (remove_inode_buffers(inode)) reap += invalidate_mapping_pages(&inode->i_data, 0, -1); iput(inode); -again2: - spin_lock(&wb_inode_list_lock); + spin_lock(&inode_lock); - /* XXX: may no longer work well */ if (inode != list_entry(inode_unused.next, struct inode, i_list)) continue; /* wrong inode or list_empty */ - if (!spin_trylock(&inode->i_lock)) { - spin_unlock(&wb_inode_list_lock); - goto again2; - } - if (!can_unuse(inode)) { - spin_unlock(&inode->i_lock); + if (!can_unuse(inode)) continue; - } } list_move(&inode->i_list, &freeable); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - spin_unlock(&inode->i_lock); - inodes_stat.nr_unused--; + nr_pruned++; } + inodes_stat.nr_unused -= nr_pruned; if (current_is_kswapd()) __count_vm_events(KSWAPD_INODESTEAL, reap); else __count_vm_events(PGINODESTEAL, reap); - spin_unlock(&wb_inode_list_lock); + spin_unlock(&inode_lock); dispose_list(&freeable); up_read(&iprune_sem); @@ -583,7 +528,7 @@ static int shrink_icache_memory(int nr, gfp_t gfp_mask) return -1; prune_icache(nr); } - return inodes_stat.nr_unused / 100 * sysctl_vfs_cache_pressure; + return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; } static struct shrinker icache_shrinker = { @@ -599,7 +544,7 @@ static void __wait_on_freeing_inode(struct inode *inode); * add any additional branch in the common code. */ static struct inode *find_inode(struct super_block *sb, - struct inode_hash_bucket *b, + struct hlist_head *head, int (*test)(struct inode *, void *), void *data) { @@ -607,27 +552,17 @@ static struct inode *find_inode(struct super_block *sb, struct inode *inode = NULL; repeat: - rcu_read_lock(); - hlist_for_each_entry_rcu(inode, node, &b->head, i_hash) { + hlist_for_each_entry(inode, node, head, i_hash) { if (inode->i_sb != sb) continue; - spin_lock(&inode->i_lock); - if (hlist_unhashed(&inode->i_hash)) { - spin_unlock(&inode->i_lock); + if (!test(inode, data)) continue; - } - if (!test(inode, data)) { - spin_unlock(&inode->i_lock); - continue; - } if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { - rcu_read_unlock(); __wait_on_freeing_inode(inode); goto repeat; } break; } - rcu_read_unlock(); return node ? inode : NULL; } @@ -636,32 +571,23 @@ repeat: * iget_locked for details. */ static struct inode *find_inode_fast(struct super_block *sb, - struct inode_hash_bucket *b, - unsigned long ino) + struct hlist_head *head, unsigned long ino) { struct hlist_node *node; struct inode *inode = NULL; repeat: - rcu_read_lock(); - hlist_for_each_entry_rcu(inode, node, &b->head, i_hash) { + hlist_for_each_entry(inode, node, head, i_hash) { if (inode->i_ino != ino) continue; if (inode->i_sb != sb) continue; - spin_lock(&inode->i_lock); - if (hlist_unhashed(&inode->i_hash)) { - spin_unlock(&inode->i_lock); - continue; - } if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { - rcu_read_unlock(); __wait_on_freeing_inode(inode); goto repeat; } break; } - rcu_read_unlock(); return node ? inode : NULL; } @@ -675,88 +601,16 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval) return tmp & I_HASHMASK; } -static void inode_sb_list_add(struct inode *inode, struct super_block *sb) -{ - spinlock_t *lock; - struct list_head *list; -#ifdef CONFIG_SMP - int cpu; -#endif - - lock = &get_cpu_var(inode_cpulock); -#ifdef CONFIG_SMP - cpu = smp_processor_id(); - list = per_cpu_ptr(sb->s_inodes, cpu); - inode->i_sb_list_cpu = cpu; -#else - list = &sb->s_inodes; -#endif - put_cpu_var(inode_cpulock); - spin_lock(lock); - list_add_rcu(&inode->i_sb_list, list); - spin_unlock(lock); -} - -void inode_sb_list_del(struct inode *inode) -{ - spinlock_t *lock; - -#ifdef CONFIG_SMP - lock = &per_cpu(inode_cpulock, inode->i_sb_list_cpu); -#else - lock = &__get_cpu_var(inode_cpulock); -#endif - spin_lock(lock); - list_del_rcu(&inode->i_sb_list); - spin_unlock(lock); -} - static inline void -__inode_add_to_lists(struct super_block *sb, struct inode_hash_bucket *b, +__inode_add_to_lists(struct super_block *sb, struct hlist_head *head, struct inode *inode) { - inode_sb_list_add(inode, sb); - percpu_counter_inc(&nr_inodes); - if (b) { - spin_lock(&b->lock); - hlist_add_head(&inode->i_hash, &b->head); - spin_unlock(&b->lock); - } -} - -#ifdef CONFIG_SMP -/* - * Each cpu owns a range of 1024 numbers. - * 'shared_last_ino' is dirtied only once out of 1024 allocations, - * to renew the exhausted range. - * - * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW - * error if st_ino won't fit in target struct field. Use 32bit counter - * here to attempt to avoid that. - */ -static DEFINE_PER_CPU(int, last_ino); -static atomic_t shared_last_ino; - -static int last_ino_get(void) -{ - int *p = &get_cpu_var(last_ino); - int res = *p; - - if (unlikely((res & 1023) == 0)) - res = atomic_add_return(1024, &shared_last_ino) - 1024; - - *p = ++res; - put_cpu_var(last_ino); - return res; -} -#else -static int last_ino_get(void) -{ - static int last_ino; - - return ++last_ino; + inodes_stat.nr_inodes++; + list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_sb_list, &sb->s_inodes); + if (head) + hlist_add_head(&inode->i_hash, head); } -#endif /** * inode_add_to_lists - add a new inode to relevant lists @@ -772,11 +626,11 @@ static int last_ino_get(void) */ void inode_add_to_lists(struct super_block *sb, struct inode *inode) { - struct inode_hash_bucket *b = inode_hashtable + hash(sb, inode->i_ino); + struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino); - spin_lock(&inode->i_lock); - __inode_add_to_lists(sb, b, inode); - spin_unlock(&inode->i_lock); + spin_lock(&inode_lock); + __inode_add_to_lists(sb, head, inode); + spin_unlock(&inode_lock); } EXPORT_SYMBOL_GPL(inode_add_to_lists); @@ -794,15 +648,23 @@ EXPORT_SYMBOL_GPL(inode_add_to_lists); */ struct inode *new_inode(struct super_block *sb) { + /* + * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW + * error if st_ino won't fit in target struct field. Use 32bit counter + * here to attempt to avoid that. + */ + static unsigned int last_ino; struct inode *inode; + spin_lock_prefetch(&inode_lock); + inode = alloc_inode(sb); if (inode) { - spin_lock(&inode->i_lock); - inode->i_ino = last_ino_get(); - inode->i_state = 0; + spin_lock(&inode_lock); __inode_add_to_lists(sb, NULL, inode); - spin_unlock(&inode->i_lock); + inode->i_ino = ++last_ino; + inode->i_state = 0; + spin_unlock(&inode_lock); } return inode; } @@ -850,7 +712,7 @@ EXPORT_SYMBOL(unlock_new_inode); * -- rmk@arm.uk.linux.org */ static struct inode *get_new_inode(struct super_block *sb, - struct inode_hash_bucket *b, + struct hlist_head *head, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data) @@ -861,16 +723,16 @@ static struct inode *get_new_inode(struct super_block *sb, if (inode) { struct inode *old; + spin_lock(&inode_lock); /* We released the lock, so.. */ - old = find_inode(sb, b, test, data); + old = find_inode(sb, head, test, data); if (!old) { - spin_lock(&inode->i_lock); if (set(inode, data)) goto set_failed; + __inode_add_to_lists(sb, head, inode); inode->i_state = I_NEW; - __inode_add_to_lists(sb, b, inode); - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); /* Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents @@ -884,7 +746,7 @@ static struct inode *get_new_inode(struct super_block *sb, * allocated. */ __iget(old); - spin_unlock(&old->i_lock); + spin_unlock(&inode_lock); destroy_inode(inode); inode = old; wait_on_inode(inode); @@ -892,7 +754,7 @@ static struct inode *get_new_inode(struct super_block *sb, return inode; set_failed: - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); destroy_inode(inode); return NULL; } @@ -902,7 +764,7 @@ set_failed: * comment at iget_locked for details. */ static struct inode *get_new_inode_fast(struct super_block *sb, - struct inode_hash_bucket *b, unsigned long ino) + struct hlist_head *head, unsigned long ino) { struct inode *inode; @@ -910,14 +772,14 @@ static struct inode *get_new_inode_fast(struct super_block *sb, if (inode) { struct inode *old; + spin_lock(&inode_lock); /* We released the lock, so.. */ - old = find_inode_fast(sb, b, ino); + old = find_inode_fast(sb, head, ino); if (!old) { - spin_lock(&inode->i_lock); inode->i_ino = ino; + __inode_add_to_lists(sb, head, inode); inode->i_state = I_NEW; - __inode_add_to_lists(sb, b, inode); - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); /* Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents @@ -931,7 +793,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb, * allocated. */ __iget(old); - spin_unlock(&old->i_lock); + spin_unlock(&inode_lock); destroy_inode(inode); inode = old; wait_on_inode(inode); @@ -939,23 +801,6 @@ static struct inode *get_new_inode_fast(struct super_block *sb, return inode; } -static int test_inode_iunique(struct super_block *sb, - struct inode_hash_bucket *b, unsigned long ino) -{ - struct hlist_node *node; - struct inode *inode = NULL; - - rcu_read_lock(); - hlist_for_each_entry_rcu(inode, node, &b->head, i_hash) { - if (inode->i_ino == ino && inode->i_sb == sb) { - rcu_read_unlock(); - return 0; - } - } - rcu_read_unlock(); - return 1; -} - /** * iunique - get a unique inode number * @sb: superblock @@ -977,19 +822,20 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) * error if st_ino won't fit in target struct field. Use 32bit counter * here to attempt to avoid that. */ - static DEFINE_SPINLOCK(unique_lock); static unsigned int counter; - struct inode_hash_bucket *b; + struct inode *inode; + struct hlist_head *head; ino_t res; - spin_lock(&unique_lock); + spin_lock(&inode_lock); do { if (counter <= max_reserved) counter = max_reserved + 1; res = counter++; - b = inode_hashtable + hash(sb, res); - } while (!test_inode_iunique(sb, b, res)); - spin_unlock(&unique_lock); + head = inode_hashtable + hash(sb, res); + inode = find_inode_fast(sb, head, res); + } while (inode != NULL); + spin_unlock(&inode_lock); return res; } @@ -997,9 +843,7 @@ EXPORT_SYMBOL(iunique); struct inode *igrab(struct inode *inode) { - struct inode *ret = inode; - - spin_lock(&inode->i_lock); + spin_lock(&inode_lock); if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))) __iget(inode); else @@ -1008,10 +852,9 @@ struct inode *igrab(struct inode *inode) * called yet, and somebody is calling igrab * while the inode is getting freed. */ - ret = NULL; - spin_unlock(&inode->i_lock); - - return ret; + inode = NULL; + spin_unlock(&inode_lock); + return inode; } EXPORT_SYMBOL(igrab); @@ -1035,20 +878,21 @@ EXPORT_SYMBOL(igrab); * Note, @test is called with the inode_lock held, so can't sleep. */ static struct inode *ifind(struct super_block *sb, - struct inode_hash_bucket *b, - int (*test)(struct inode *, void *), + struct hlist_head *head, int (*test)(struct inode *, void *), void *data, const int wait) { struct inode *inode; - inode = find_inode(sb, b, test, data); + spin_lock(&inode_lock); + inode = find_inode(sb, head, test, data); if (inode) { __iget(inode); - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); if (likely(wait)) wait_on_inode(inode); return inode; } + spin_unlock(&inode_lock); return NULL; } @@ -1068,18 +912,19 @@ static struct inode *ifind(struct super_block *sb, * Otherwise NULL is returned. */ static struct inode *ifind_fast(struct super_block *sb, - struct inode_hash_bucket *b, - unsigned long ino) + struct hlist_head *head, unsigned long ino) { struct inode *inode; - inode = find_inode_fast(sb, b, ino); + spin_lock(&inode_lock); + inode = find_inode_fast(sb, head, ino); if (inode) { __iget(inode); - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); wait_on_inode(inode); return inode; } + spin_unlock(&inode_lock); return NULL; } @@ -1107,9 +952,9 @@ static struct inode *ifind_fast(struct super_block *sb, struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { - struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval); + struct hlist_head *head = inode_hashtable + hash(sb, hashval); - return ifind(sb, b, test, data, 0); + return ifind(sb, head, test, data, 0); } EXPORT_SYMBOL(ilookup5_nowait); @@ -1135,9 +980,9 @@ EXPORT_SYMBOL(ilookup5_nowait); struct inode *ilookup5(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { - struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval); + struct hlist_head *head = inode_hashtable + hash(sb, hashval); - return ifind(sb, b, test, data, 1); + return ifind(sb, head, test, data, 1); } EXPORT_SYMBOL(ilookup5); @@ -1157,9 +1002,9 @@ EXPORT_SYMBOL(ilookup5); */ struct inode *ilookup(struct super_block *sb, unsigned long ino) { - struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino); + struct hlist_head *head = inode_hashtable + hash(sb, ino); - return ifind_fast(sb, b, ino); + return ifind_fast(sb, head, ino); } EXPORT_SYMBOL(ilookup); @@ -1187,17 +1032,17 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data) { - struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval); + struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; - inode = ifind(sb, b, test, data, 1); + inode = ifind(sb, head, test, data, 1); if (inode) return inode; /* * get_new_inode() will do the right thing, re-trying the search * in case it had to block at any point. */ - return get_new_inode(sb, b, test, set, data); + return get_new_inode(sb, head, test, set, data); } EXPORT_SYMBOL(iget5_locked); @@ -1218,17 +1063,17 @@ EXPORT_SYMBOL(iget5_locked); */ struct inode *iget_locked(struct super_block *sb, unsigned long ino) { - struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino); + struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; - inode = ifind_fast(sb, b, ino); + inode = ifind_fast(sb, head, ino); if (inode) return inode; /* * get_new_inode_fast() will do the right thing, re-trying the search * in case it had to block at any point. */ - return get_new_inode_fast(sb, b, ino); + return get_new_inode_fast(sb, head, ino); } EXPORT_SYMBOL(iget_locked); @@ -1236,37 +1081,29 @@ int insert_inode_locked(struct inode *inode) { struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; - struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino); + struct hlist_head *head = inode_hashtable + hash(sb, ino); inode->i_state |= I_NEW; while (1) { struct hlist_node *node; struct inode *old = NULL; - -repeat: - spin_lock(&b->lock); - hlist_for_each_entry(old, node, &b->head, i_hash) { + spin_lock(&inode_lock); + hlist_for_each_entry(old, node, head, i_hash) { if (old->i_ino != ino) continue; if (old->i_sb != sb) continue; if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) continue; - if (!spin_trylock(&old->i_lock)) { - spin_unlock(&b->lock); - goto repeat; - } break; } if (likely(!node)) { - /* XXX: initialize inode->i_lock to locked? */ - hlist_add_head(&inode->i_hash, &b->head); - spin_unlock(&b->lock); + hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode_lock); return 0; } - spin_unlock(&b->lock); __iget(old); - spin_unlock(&old->i_lock); + spin_unlock(&inode_lock); wait_on_inode(old); if (unlikely(!hlist_unhashed(&old->i_hash))) { iput(old); @@ -1281,7 +1118,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, int (*test)(struct inode *, void *), void *data) { struct super_block *sb = inode->i_sb; - struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval); + struct hlist_head *head = inode_hashtable + hash(sb, hashval); inode->i_state |= I_NEW; @@ -1289,30 +1126,23 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, struct hlist_node *node; struct inode *old = NULL; -repeat: - spin_lock(&b->lock); - hlist_for_each_entry(old, node, &b->head, i_hash) { + spin_lock(&inode_lock); + hlist_for_each_entry(old, node, head, i_hash) { if (old->i_sb != sb) continue; if (!test(old, data)) continue; if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) continue; - if (!spin_trylock(&old->i_lock)) { - spin_unlock(&b->lock); - goto repeat; - } break; } if (likely(!node)) { - /* XXX: initialize inode->i_lock to locked? */ - hlist_add_head(&inode->i_hash, &b->head); - spin_unlock(&b->lock); + hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode_lock); return 0; } - spin_unlock(&b->lock); __iget(old); - spin_unlock(&old->i_lock); + spin_unlock(&inode_lock); wait_on_inode(old); if (unlikely(!hlist_unhashed(&old->i_hash))) { iput(old); @@ -1333,32 +1163,14 @@ EXPORT_SYMBOL(insert_inode_locked4); */ void __insert_inode_hash(struct inode *inode, unsigned long hashval) { - struct inode_hash_bucket *b = inode_hashtable + hash(inode->i_sb, hashval); - - spin_lock(&inode->i_lock); - spin_lock(&b->lock); - hlist_add_head(&inode->i_hash, &b->head); - spin_unlock(&b->lock); - spin_unlock(&inode->i_lock); + struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); + spin_lock(&inode_lock); + hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode_lock); } EXPORT_SYMBOL(__insert_inode_hash); /** - * __remove_inode_hash - remove an inode from the hash - * @inode: inode to unhash - * - * Remove an inode from the superblock. inode->i_lock must be - * held. - */ -void __remove_inode_hash(struct inode *inode) -{ - struct inode_hash_bucket *b = inode_hashtable + hash(inode->i_sb, inode->i_ino); - spin_lock(&b->lock); - hlist_del_init(&inode->i_hash); - spin_unlock(&b->lock); -} - -/** * remove_inode_hash - remove an inode from the hash * @inode: inode to unhash * @@ -1366,9 +1178,9 @@ void __remove_inode_hash(struct inode *inode) */ void remove_inode_hash(struct inode *inode) { - spin_lock(&inode->i_lock); - __remove_inode_hash(inode); - spin_unlock(&inode->i_lock); + spin_lock(&inode_lock); + hlist_del_init(&inode->i_hash); + spin_unlock(&inode_lock); } EXPORT_SYMBOL(remove_inode_hash); @@ -1388,16 +1200,12 @@ void generic_delete_inode(struct inode *inode) { const struct super_operations *op = inode->i_sb->s_op; - if (!list_empty(&inode->i_list)) { - spin_lock(&wb_inode_list_lock); - list_del_init(&inode->i_list); - spin_unlock(&wb_inode_list_lock); - } - inode_sb_list_del(inode); - percpu_counter_dec(&nr_inodes); + list_del_init(&inode->i_list); + list_del_init(&inode->i_sb_list); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - spin_unlock(&inode->i_lock); + inodes_stat.nr_inodes--; + spin_unlock(&inode_lock); security_inode_delete(inode); @@ -1414,15 +1222,9 @@ void generic_delete_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); } - /* - * i_lock not required to delete from hash. If there was a - * concurrency window, then it would be possible for the other - * thread to touch the inode after it has been freed, with - * destroy_inode. - * XXX: yes it is because find_inode_fast checks it. Maybe we - * can avoid it though... - */ - remove_inode_hash(inode); + spin_lock(&inode_lock); + hlist_del_init(&inode->i_hash); + spin_unlock(&inode_lock); wake_up_inode(inode); BUG_ON(inode->i_state != I_CLEAR); destroy_inode(inode); @@ -1443,36 +1245,29 @@ int generic_detach_inode(struct inode *inode) struct super_block *sb = inode->i_sb; if (!hlist_unhashed(&inode->i_hash)) { - if (list_empty(&inode->i_list)) { - spin_lock(&wb_inode_list_lock); - list_add(&inode->i_list, &inode_unused); - inodes_stat.nr_unused++; - spin_unlock(&wb_inode_list_lock); - } + if (!(inode->i_state & (I_DIRTY|I_SYNC))) + list_move(&inode->i_list, &inode_unused); + inodes_stat.nr_unused++; if (sb->s_flags & MS_ACTIVE) { - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); return 0; } WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_WILL_FREE; - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); write_inode_now(inode, 1); - spin_lock(&inode->i_lock); + spin_lock(&inode_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; - __remove_inode_hash(inode); - } - if (!list_empty(&inode->i_list)) { - spin_lock(&wb_inode_list_lock); - list_del_init(&inode->i_list); inodes_stat.nr_unused--; - spin_unlock(&wb_inode_list_lock); + hlist_del_init(&inode->i_hash); } - inode_sb_list_del(inode); - percpu_counter_dec(&nr_inodes); + list_del_init(&inode->i_list); + list_del_init(&inode->i_sb_list); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - spin_unlock(&inode->i_lock); + inodes_stat.nr_inodes--; + spin_unlock(&inode_lock); return 1; } EXPORT_SYMBOL_GPL(generic_detach_inode); @@ -1537,12 +1332,8 @@ void iput(struct inode *inode) if (inode) { BUG_ON(inode->i_state == I_CLEAR); - spin_lock(&inode->i_lock); - inode->i_count--; - if (inode->i_count == 0) + if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) iput_final(inode); - else - spin_unlock(&inode->i_lock); } } EXPORT_SYMBOL(iput); @@ -1723,8 +1514,6 @@ EXPORT_SYMBOL(inode_wait); * wake_up_inode() after removing from the hash list will DTRT. * * This is called with inode_lock held. - * - * Called with i_lock held and returns with it dropped. */ static void __wait_on_freeing_inode(struct inode *inode) { @@ -1732,9 +1521,10 @@ static void __wait_on_freeing_inode(struct inode *inode) DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); wq = bit_waitqueue(&inode->i_state, __I_NEW); prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); - spin_unlock(&inode->i_lock); + spin_unlock(&inode_lock); schedule(); finish_wait(wq, &wait.wait); + spin_lock(&inode_lock); } static __initdata unsigned long ihash_entries; @@ -1762,7 +1552,7 @@ void __init inode_init_early(void) inode_hashtable = alloc_large_system_hash("Inode-cache", - sizeof(struct inode_hash_bucket), + sizeof(struct hlist_head), ihash_entries, 14, HASH_EARLY, @@ -1770,17 +1560,14 @@ void __init inode_init_early(void) &i_hash_mask, 0); - for (loop = 0; loop < (1 << i_hash_shift); loop++) { - spin_lock_init(&inode_hashtable[loop].lock); - INIT_HLIST_HEAD(&inode_hashtable[loop].head); - } + for (loop = 0; loop < (1 << i_hash_shift); loop++) + INIT_HLIST_HEAD(&inode_hashtable[loop]); } void __init inode_init(void) { int loop; - percpu_counter_init(&nr_inodes, 0); /* inode slab cache */ inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode), @@ -1790,17 +1577,13 @@ void __init inode_init(void) init_once); register_shrinker(&icache_shrinker); - for_each_possible_cpu(loop) { - spin_lock_init(&per_cpu(inode_cpulock, loop)); - } - /* Hash may have been set up in inode_init_early */ if (!hashdist) return; inode_hashtable = alloc_large_system_hash("Inode-cache", - sizeof(struct inode_hash_bucket), + sizeof(struct hlist_head), ihash_entries, 14, 0, @@ -1808,10 +1591,8 @@ void __init inode_init(void) &i_hash_mask, 0); - for (loop = 0; loop < (1 << i_hash_shift); loop++) { - spin_lock_init(&inode_hashtable[loop].lock); - INIT_HLIST_HEAD(&inode_hashtable[loop].head); - } + for (loop = 0; loop < (1 << i_hash_shift); loop++) + INIT_HLIST_HEAD(&inode_hashtable[loop]); } void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 66e0e4b3ad20..1de640de962c 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1311,6 +1311,7 @@ int jbd2_journal_stop(handle_t *handle) if (handle->h_sync) transaction->t_synchronous_commit = 1; current->journal_info = NULL; + spin_lock(&journal->j_state_lock); spin_lock(&transaction->t_handle_lock); transaction->t_outstanding_credits -= handle->h_buffer_credits; transaction->t_updates--; @@ -1339,7 +1340,8 @@ int jbd2_journal_stop(handle_t *handle) jbd_debug(2, "transaction too old, requesting commit for " "handle %p\n", handle); /* This is non-blocking */ - jbd2_log_start_commit(journal, transaction->t_tid); + __jbd2_log_start_commit(journal, transaction->t_tid); + spin_unlock(&journal->j_state_lock); /* * Special case: JBD2_SYNC synchronous updates require us @@ -1349,6 +1351,7 @@ int jbd2_journal_stop(handle_t *handle) err = jbd2_log_wait_commit(journal, tid); } else { spin_unlock(&transaction->t_handle_lock); + spin_unlock(&journal->j_state_lock); } lock_map_release(&handle->h_lockdep_map); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 40bc1705fa05..7aa4417e085f 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -287,9 +287,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de mutex_unlock(&f->sem); d_instantiate(dentry, old_dentry->d_inode); dir_i->i_mtime = dir_i->i_ctime = ITIME(now); - spin_lock(&old_dentry->d_inode->i_lock); - old_dentry->d_inode->i_count++; - spin_unlock(&old_dentry->d_inode->i_lock); + atomic_inc(&old_dentry->d_inode->i_count); } return ret; } @@ -868,9 +866,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret); /* Might as well let the VFS know */ d_instantiate(new_dentry, old_dentry->d_inode); - spin_lock(&old_dentry->d_inode->i_lock); - old_dentry->d_inode->i_count++; - spin_unlock(&old_dentry->d_inode->i_lock); + atomic_inc(&old_dentry->d_inode->i_count); new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now); return ret; } diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 820212f3fc88..d945ea76b445 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -1279,9 +1279,7 @@ int txCommit(tid_t tid, /* transaction identifier */ * lazy commit thread finishes processing */ if (tblk->xflag & COMMIT_DELETE) { - spin_lock(&tblk->u.ip->i_lock); - tblk->u.ip->i_count++; - spin_unlock(&tblk->u.ip->i_lock); + atomic_inc(&tblk->u.ip->i_count); /* * Avoid a rare deadlock * diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 8fa6219d0eeb..c79a4270f083 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -831,9 +831,7 @@ static int jfs_link(struct dentry *old_dentry, ip->i_ctime = CURRENT_TIME; dir->i_ctime = dir->i_mtime = CURRENT_TIME; mark_inode_dirty(dir); - spin_lock(&ip->i_lock); - ip->i_count++; - spin_unlock(&ip->i_lock); + atomic_inc(&ip->i_count); iplist[0] = ip; iplist[1] = dir; diff --git a/fs/libfs.c b/fs/libfs.c index 98d2717e9027..6e8d17e1dc4c 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -14,11 +14,6 @@ #include <asm/uaccess.h> -static inline int simple_positive(struct dentry *dentry) -{ - return dentry->d_inode && !d_unhashed(dentry); -} - int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { @@ -84,8 +79,7 @@ int dcache_dir_close(struct inode *inode, struct file *file) loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) { - struct dentry *dentry = file->f_path.dentry; - mutex_lock(&dentry->d_inode->i_mutex); + mutex_lock(&file->f_path.dentry->d_inode->i_mutex); switch (origin) { case 1: offset += file->f_pos; @@ -93,7 +87,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) if (offset >= 0) break; default: - mutex_unlock(&dentry->d_inode->i_mutex); + mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); return -EINVAL; } if (offset != file->f_pos) { @@ -103,27 +97,21 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) struct dentry *cursor = file->private_data; loff_t n = file->f_pos - 2; - spin_lock(&dentry->d_lock); - spin_lock_nested(&cursor->d_lock, DENTRY_D_LOCK_NESTED); + spin_lock(&dcache_lock); list_del(&cursor->d_u.d_child); - spin_unlock(&cursor->d_lock); - p = dentry->d_subdirs.next; - while (n && p != &dentry->d_subdirs) { + p = file->f_path.dentry->d_subdirs.next; + while (n && p != &file->f_path.dentry->d_subdirs) { struct dentry *next; next = list_entry(p, struct dentry, d_u.d_child); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); - if (simple_positive(next)) + if (!d_unhashed(next) && next->d_inode) n--; - spin_unlock(&next->d_lock); p = p->next; } - spin_lock_nested(&cursor->d_lock, DENTRY_D_LOCK_NESTED); list_add_tail(&cursor->d_u.d_child, p); - spin_unlock(&cursor->d_lock); - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); } } - mutex_unlock(&dentry->d_inode->i_mutex); + mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); return offset; } @@ -163,38 +151,29 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) i++; /* fallthrough */ default: - spin_lock(&dentry->d_lock); - if (filp->f_pos == 2) { - spin_lock_nested(&cursor->d_lock, DENTRY_D_LOCK_NESTED); + spin_lock(&dcache_lock); + if (filp->f_pos == 2) list_move(q, &dentry->d_subdirs); - spin_unlock(&cursor->d_lock); - } for (p=q->next; p != &dentry->d_subdirs; p=p->next) { struct dentry *next; next = list_entry(p, struct dentry, d_u.d_child); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); - if (!simple_positive(next)) { - spin_unlock(&next->d_lock); + if (d_unhashed(next) || !next->d_inode) continue; - } - spin_unlock(&next->d_lock); - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0) return 0; - spin_lock(&dentry->d_lock); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); + spin_lock(&dcache_lock); /* next is still alive */ list_move(q, p); - spin_unlock(&next->d_lock); p = q; filp->f_pos++; } - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); } return 0; } @@ -265,8 +244,6 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, d_instantiate(dentry, root); s->s_root = dentry; s->s_flags |= MS_ACTIVE; - WARN_ON(mnt->mnt_flags & MNT_MOUNTED); - mnt->mnt_flags |= MNT_MOUNTED; simple_set_mnt(mnt, s); return 0; @@ -281,31 +258,29 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; inc_nlink(inode); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); dget(dentry); d_instantiate(dentry, inode); return 0; } +static inline int simple_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + int simple_empty(struct dentry *dentry) { struct dentry *child; int ret = 0; - spin_lock(&dentry->d_lock); - list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { - spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); - if (simple_positive(child)) { - spin_unlock(&child->d_lock); + spin_lock(&dcache_lock); + list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) + if (simple_positive(child)) goto out; - } - spin_unlock(&child->d_lock); - } ret = 1; out: - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); return ret; } diff --git a/fs/locks.c b/fs/locks.c index 9fdd6796e0ec..a8794f233bc9 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1374,7 +1374,8 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) goto out; if ((arg == F_WRLCK) - && (atomic_read(&dentry->d_count) > 1 || inode->i_count > 1)) + && ((atomic_read(&dentry->d_count) > 1) + || (atomic_read(&inode->i_count) > 1))) goto out; } diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 1b0b9f081ffa..32b131cd6121 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -103,9 +103,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir, inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); return add_nondir(dentry, inode); } diff --git a/fs/namei.c b/fs/namei.c index 488a6c07235e..a4855af776a8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -198,29 +198,6 @@ static int acl_permission_check(struct inode *inode, int mask, return -EACCES; } -static int acl_permission_check_rcu(struct inode *inode, int mask, - int (*check_acl)(struct inode *inode, int mask)) -{ - umode_t mode = inode->i_mode; - - mask &= MAY_READ | MAY_WRITE | MAY_EXEC; - - if (current_fsuid() == inode->i_uid) - mode >>= 6; - else { - if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) - return -EAGAIN; - if (in_group_p(inode->i_gid)) - mode >>= 3; - } - - /* - * If the DACs are ok we don't need any capability check. - */ - if ((mask & ~mode) == 0) - return 0; - return -EACCES; -} /** * generic_permission - check for access rights on a Posix-like filesystem * @inode: inode to check access rights for @@ -506,26 +483,6 @@ ok: return security_inode_permission(inode, MAY_EXEC); } -static int exec_permission_lite_rcu(struct inode *inode) -{ - int ret; - - if (inode->i_op->permission) - return -EAGAIN; - ret = acl_permission_check_rcu(inode, MAY_EXEC, inode->i_op->check_acl); - if (ret == -EAGAIN) - return ret; - if (!ret) - goto ok; - - if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) - goto ok; - - return ret; -ok: - return security_inode_permission(inode, MAY_EXEC); -} - static __always_inline void set_root(struct nameidata *nd) { if (!nd->root.mnt) { @@ -538,15 +495,6 @@ static __always_inline void set_root(struct nameidata *nd) } static int link_path_walk(const char *, struct nameidata *); -static __always_inline void set_root_rcu(struct nameidata *nd) -{ - if (!nd->root.mnt) { - struct fs_struct *fs = current->fs; - read_lock(&fs->lock); - nd->root = fs->root; - read_unlock(&fs->lock); - } -} static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) { @@ -590,12 +538,6 @@ static void path_put_conditional(struct path *path, struct nameidata *nd) mntput(path->mnt); } -static inline void path_to_nameidata_rcu(struct path *path, struct nameidata *nd) -{ - nd->path.mnt = path->mnt; - nd->path.dentry = path->dentry; -} - static inline void path_to_nameidata(struct path *path, struct nameidata *nd) { dput(nd->path.dentry); @@ -675,18 +617,15 @@ int follow_up(struct path *path) { struct vfsmount *parent; struct dentry *mountpoint; - int cpu = get_cpu(); - put_cpu(); - - vfsmount_read_lock(cpu); + spin_lock(&vfsmount_lock); parent = path->mnt->mnt_parent; if (parent == path->mnt) { - vfsmount_read_unlock(cpu); + spin_unlock(&vfsmount_lock); return 0; } mntget(parent); mountpoint = dget(path->mnt->mnt_mountpoint); - vfsmount_read_unlock(cpu); + spin_unlock(&vfsmount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -697,21 +636,6 @@ int follow_up(struct path *path) /* no need for dcache_lock, as serialization is taken care in * namespace.c */ -static int __follow_mount_rcu(struct path *path) -{ - int res = 0; - while (d_mountpoint(path->dentry)) { - struct vfsmount *mounted; - mounted = __lookup_mnt(path->mnt, path->dentry, 1); - if (!mounted) - break; - path->mnt = mounted; - path->dentry = mounted->mnt_root; - res = 1; - } - return res; -} - static int __follow_mount(struct path *path) { int res = 0; @@ -762,8 +686,6 @@ int follow_down(struct path *path) static __always_inline void follow_dotdot(struct nameidata *nd) { - int cpu = get_cpu(); - put_cpu(); set_root(nd); while(1) { @@ -774,20 +696,23 @@ static __always_inline void follow_dotdot(struct nameidata *nd) nd->path.mnt == nd->root.mnt) { break; } + spin_lock(&dcache_lock); if (nd->path.dentry != nd->path.mnt->mnt_root) { nd->path.dentry = dget(nd->path.dentry->d_parent); + spin_unlock(&dcache_lock); dput(old); break; } - vfsmount_read_lock(cpu); + spin_unlock(&dcache_lock); + spin_lock(&vfsmount_lock); parent = nd->path.mnt->mnt_parent; if (parent == nd->path.mnt) { - vfsmount_read_unlock(cpu); + spin_unlock(&vfsmount_lock); break; } mntget(parent); nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint); - vfsmount_read_unlock(cpu); + spin_unlock(&vfsmount_lock); dput(old); mntput(nd->path.mnt); nd->path.mnt = parent; @@ -800,24 +725,6 @@ static __always_inline void follow_dotdot(struct nameidata *nd) * small and for now I'd prefer to have fast path as straight as possible. * It _is_ time-critical. */ -static int do_lookup_rcu(struct nameidata *nd, struct qstr *name, - struct path *path) -{ - struct vfsmount *mnt = nd->path.mnt; - struct dentry *dentry; - - dentry = __d_lookup_rcu(nd->path.dentry, name); - - if (!dentry) - return -EAGAIN; - if (dentry->d_op && dentry->d_op->d_revalidate) - return -EAGAIN; - path->mnt = mnt; - path->dentry = dentry; - __follow_mount_rcu(path); - return 0; -} - static int do_lookup(struct nameidata *nd, struct qstr *name, struct path *path) { @@ -915,134 +822,6 @@ fail: return PTR_ERR(dentry); } -static noinline int link_path_walk_rcu(const char *name, struct nameidata *nd, struct path *next) -{ - struct inode *inode; - unsigned int lookup_flags = nd->flags; - - while (*name=='/') - name++; - if (!*name) - goto return_reval; - - inode = nd->path.dentry->d_inode; - if (nd->depth) - lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); - - /* At this point we know we have a real path component. */ - for(;;) { - unsigned long hash; - struct qstr this; - unsigned int c; - - nd->flags |= LOOKUP_CONTINUE; - if (exec_permission_lite_rcu(inode)) - return -EAGAIN; - - this.name = name; - c = *(const unsigned char *)name; - - hash = init_name_hash(); - do { - name++; - hash = partial_name_hash(c, hash); - c = *(const unsigned char *)name; - } while (c && (c != '/')); - this.len = name - (const char *) this.name; - this.hash = end_name_hash(hash); - - /* remove trailing slashes? */ - if (!c) - goto last_component; - while (*++name == '/'); - if (!*name) - goto last_with_slashes; - - if (this.name[0] == '.') switch (this.len) { - default: - break; - case 2: - if (this.name[1] != '.') - break; - return -EAGAIN; - case 1: - continue; - } - if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) - return -EAGAIN; - /* This does the actual lookups.. */ - if (do_lookup_rcu(nd, &this, next)) - return -EAGAIN; - - inode = next->dentry->d_inode; - if (!inode) - return -ENOENT; - if (inode->i_op->follow_link) - return -EAGAIN; - path_to_nameidata_rcu(next, nd); - if (!inode->i_op->lookup) - return -ENOTDIR; - continue; - /* here ends the main loop */ - -last_with_slashes: - lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; -last_component: - /* Clear LOOKUP_CONTINUE iff it was previously unset */ - nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; - if (lookup_flags & LOOKUP_PARENT) - return -EAGAIN; - if (this.name[0] == '.') switch (this.len) { - default: - break; - case 2: - if (this.name[1] != '.') - break; - return -EAGAIN; - case 1: - goto return_reval; - } - if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) - return -EAGAIN; - if (do_lookup_rcu(nd, &this, next)) - return -EAGAIN; - inode = next->dentry->d_inode; - if ((lookup_flags & LOOKUP_FOLLOW) - && inode && inode->i_op->follow_link) - return -EAGAIN; - - path_to_nameidata_rcu(next, nd); - if (!inode) - return -ENOENT; - if (lookup_flags & LOOKUP_DIRECTORY) { - if (!inode->i_op->lookup) - return -ENOTDIR; - } - goto return_base; - } -return_reval: - /* - * We bypassed the ordinary revalidation routines. - * We may need to check the cached dentry for staleness. - */ - if (nd->path.dentry && nd->path.dentry->d_sb && - (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) - return -EAGAIN; -return_base: - spin_lock(&nd->path.dentry->d_lock); - if (d_unhashed(nd->path.dentry)) { - spin_unlock(&nd->path.dentry->d_lock); - return -EAGAIN; - } - if (!nd->path.dentry->d_inode) { - spin_unlock(&nd->path.dentry->d_lock); - return -EAGAIN; - } - atomic_inc(&nd->path.dentry->d_count); - spin_unlock(&nd->path.dentry->d_lock); - return 0; -} - /* * This is a temporary kludge to deal with "automount" symlinks; proper * solution is to trigger them on follow_mount(), so that do_lookup() @@ -1116,7 +895,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (this.name[0] == '.') switch (this.len) { default: break; - case 2: + case 2: if (this.name[1] != '.') break; follow_dotdot(nd); @@ -1161,7 +940,7 @@ last_component: if (this.name[0] == '.') switch (this.len) { default: break; - case 2: + case 2: if (this.name[1] != '.') break; follow_dotdot(nd); @@ -1225,19 +1004,6 @@ return_err: return err; } -static int path_walk_rcu(const char *name, struct nameidata *nd) -{ - struct path save = nd->path; - struct path path = {.mnt = NULL}; - int err; - - current->total_link_count = 0; - err = link_path_walk_rcu(name, nd, &path); - if (unlikely(err == -EAGAIN)) - nd->path = save; - return err; -} - static int path_walk(const char *name, struct nameidata *nd) { struct path save = nd->path; @@ -1263,55 +1029,6 @@ static int path_walk(const char *name, struct nameidata *nd) return result; } -static noinline int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd) -{ - int retval = 0; - int fput_needed; - struct file *file; - - nd->last_type = LAST_ROOT; /* if there are only slashes... */ - nd->flags = flags; - nd->depth = 0; - nd->root.mnt = NULL; - - if (*name=='/') { - set_root_rcu(nd); - nd->path = nd->root; - } else if (dfd == AT_FDCWD) { - struct fs_struct *fs = current->fs; - read_lock(&fs->lock); - nd->path = fs->pwd; - read_unlock(&fs->lock); - } else { - struct dentry *dentry; - - file = fget_light(dfd, &fput_needed); - retval = -EBADF; - if (!file) - goto out_fail; - - dentry = file->f_path.dentry; - - retval = -ENOTDIR; - if (!S_ISDIR(dentry->d_inode->i_mode)) - goto fput_fail; - - retval = file_permission(file, MAY_EXEC); - if (retval) - goto fput_fail; - - nd->path = file->f_path; - - fput_light(file, fput_needed); - } - return 0; - -fput_fail: - fput_light(file, fput_needed); -out_fail: - return retval; -} - static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { int retval = 0; @@ -1368,51 +1085,16 @@ out_fail: static int do_path_lookup(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { - int retval; - int cpu = get_cpu(); - put_cpu(); - - vfsmount_read_lock(cpu); - rcu_read_lock(); - retval = path_init_rcu(dfd, name, flags, nd); - if (unlikely(retval)) { - rcu_read_unlock(); - vfsmount_read_unlock(cpu); - return retval; - } - retval = path_walk_rcu(name, nd); - rcu_read_unlock(); - if (likely(!retval)) - mntget(nd->path.mnt); - vfsmount_read_unlock(cpu); - if (likely(!retval)) { - if (unlikely(!audit_dummy_context())) { - if (nd->path.dentry && nd->path.dentry->d_inode) - audit_inode(name, nd->path.dentry); - } - } - if (nd->root.mnt) - nd->root.mnt = NULL; - - if (unlikely(retval == -EAGAIN)) { - /* slower, locked walk */ - retval = path_init(dfd, name, flags, nd); - if (unlikely(retval)) - return retval; + int retval = path_init(dfd, name, flags, nd); + if (!retval) retval = path_walk(name, nd); - if (likely(!retval)) { - if (unlikely(!audit_dummy_context())) { - if (nd->path.dentry && nd->path.dentry->d_inode) - audit_inode(name, nd->path.dentry); - } - } - - if (nd->root.mnt) { - path_put(&nd->root); - nd->root.mnt = NULL; - } + if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && + nd->path.dentry->d_inode)) + audit_inode(name, nd->path.dentry); + if (nd->root.mnt) { + path_put(&nd->root); + nd->root.mnt = NULL; } - return retval; } @@ -2481,10 +2163,12 @@ void dentry_unhash(struct dentry *dentry) { dget(dentry); shrink_dcache_parent(dentry); + spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (atomic_read(&dentry->d_count) == 2) __d_drop(dentry); spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); } int vfs_rmdir(struct inode *dir, struct dentry *dentry) @@ -2637,11 +2321,8 @@ static long do_unlinkat(int dfd, const char __user *pathname) if (nd.last.name[nd.last.len]) goto slashes; inode = dentry->d_inode; - if (inode) { - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); - } + if (inode) + atomic_inc(&inode->i_count); error = mnt_want_write(nd.path.mnt); if (error) goto exit2; diff --git a/fs/namespace.c b/fs/namespace.c index ad39c7f2d292..962fd96dbe4c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -11,8 +11,6 @@ #include <linux/syscalls.h> #include <linux/slab.h> #include <linux/sched.h> -#include <linux/spinlock.h> -#include <linux/percpu.h> #include <linux/smp_lock.h> #include <linux/init.h> #include <linux/kernel.h> @@ -39,16 +37,12 @@ #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) #define HASH_SIZE (1UL << HASH_SHIFT) -/* - * vfsmount "brlock" style spinlock for vfsmount related operations, use - * vfsmount_read_lock/vfsmount_write_lock functions. - */ -static DEFINE_PER_CPU(spinlock_t, vfsmount_lock); +/* spinlock for vfsmount related operations, inplace of dcache_lock */ +__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); static int event; static DEFINE_IDA(mnt_id_ida); static DEFINE_IDA(mnt_group_ida); -static DEFINE_SPINLOCK(mnt_id_lock); static int mnt_id_start = 0; static int mnt_group_start = 1; @@ -60,48 +54,6 @@ static struct rw_semaphore namespace_sem; struct kobject *fs_kobj; EXPORT_SYMBOL_GPL(fs_kobj); -void vfsmount_read_lock(int cpu) -{ - spinlock_t *lock; - - lock = &per_cpu(vfsmount_lock, cpu); - spin_lock(lock); -} - -void vfsmount_read_unlock(int cpu) -{ - spinlock_t *lock; - - lock = &per_cpu(vfsmount_lock, cpu); - spin_unlock(lock); -} - -void vfsmount_write_lock(void) -{ - int i; - int nr = 0; - - for_each_possible_cpu(i) { - spinlock_t *lock; - - lock = &per_cpu(vfsmount_lock, i); - spin_lock_nested(lock, nr); - nr++; - } -} - -void vfsmount_write_unlock(void) -{ - int i; - - for_each_possible_cpu(i) { - spinlock_t *lock; - - lock = &per_cpu(vfsmount_lock, i); - spin_unlock(lock); - } -} - static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); @@ -112,21 +64,18 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) -/* - * allocation is serialized by namespace_sem, but we need the spinlock to - * serialise with freeing. - */ +/* allocation is serialized by namespace_sem */ static int mnt_alloc_id(struct vfsmount *mnt) { int res; retry: ida_pre_get(&mnt_id_ida, GFP_KERNEL); - spin_lock(&mnt_id_lock); + spin_lock(&vfsmount_lock); res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); if (!res) mnt_id_start = mnt->mnt_id + 1; - spin_unlock(&mnt_id_lock); + spin_unlock(&vfsmount_lock); if (res == -EAGAIN) goto retry; @@ -136,11 +85,11 @@ retry: static void mnt_free_id(struct vfsmount *mnt) { int id = mnt->mnt_id; - spin_lock(&mnt_id_lock); + spin_lock(&vfsmount_lock); ida_remove(&mnt_id_ida, id); if (mnt_id_start > id) mnt_id_start = id; - spin_unlock(&mnt_id_lock); + spin_unlock(&vfsmount_lock); } /* @@ -176,49 +125,6 @@ void mnt_release_group_id(struct vfsmount *mnt) mnt->mnt_group_id = 0; } -static inline void add_mnt_count(struct vfsmount *mnt, int n) -{ -#ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_count, smp_processor_id())) += n; -#else - mnt->mnt_count += n; -#endif -} - -static inline void inc_mnt_count(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_count, smp_processor_id()))++; -#else - mnt->mnt_count++; -#endif -} - -static inline void dec_mnt_count(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - (*per_cpu_ptr(mnt->mnt_count, smp_processor_id()))--; -#else - mnt->mnt_count--; -#endif -} - -unsigned int count_mnt_count(struct vfsmount *mnt) -{ -#ifdef CONFIG_SMP - unsigned int count = 0; - int cpu; - - for_each_possible_cpu(cpu) { - count += *per_cpu_ptr(mnt->mnt_count, cpu); - } - - return count; -#else - return mnt->mnt_count; -#endif -} - struct vfsmount *alloc_vfsmnt(const char *name) { struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); @@ -235,13 +141,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) goto out_free_id; } -#ifdef CONFIG_SMP - mnt->mnt_count = alloc_percpu(int); - if (!mnt->mnt_count) - goto out_free_devname; -#else - mnt->mnt_count = 0; -#endif + atomic_set(&mnt->mnt_count, 1); INIT_LIST_HEAD(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); @@ -253,19 +153,14 @@ struct vfsmount *alloc_vfsmnt(const char *name) #ifdef CONFIG_SMP mnt->mnt_writers = alloc_percpu(int); if (!mnt->mnt_writers) - goto out_free_mntcount; + goto out_free_devname; #else mnt->mnt_writers = 0; #endif - preempt_disable(); - inc_mnt_count(mnt); - preempt_enable(); } return mnt; #ifdef CONFIG_SMP -out_free_mntcount: - free_percpu(mnt->mnt_count); out_free_devname: kfree(mnt->mnt_devname); #endif @@ -376,8 +271,8 @@ int mnt_want_write(struct vfsmount *mnt) * held by mnt_make_readonly(). Works on !RT as well. */ while (mnt->mnt_flags & MNT_WRITE_HOLD) { - vfsmount_write_lock(); - vfsmount_write_unlock(); + spin_lock(&vfsmount_lock); + spin_unlock(&vfsmount_lock); } /* * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will @@ -456,7 +351,7 @@ static int mnt_make_readonly(struct vfsmount *mnt) { int ret = 0; - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); mnt->mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -490,15 +385,15 @@ static int mnt_make_readonly(struct vfsmount *mnt) */ smp_wmb(); mnt->mnt_flags &= ~MNT_WRITE_HOLD; - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); return ret; } static void __mnt_unmake_readonly(struct vfsmount *mnt) { - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); mnt->mnt_flags &= ~MNT_READONLY; - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); } void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) @@ -551,13 +446,10 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *lookup_mnt(struct path *path) { struct vfsmount *child_mnt; - int cpu = get_cpu(); - put_cpu(); - - vfsmount_read_lock(cpu); + spin_lock(&vfsmount_lock); if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) mntget(child_mnt); - vfsmount_read_unlock(cpu); + spin_unlock(&vfsmount_lock); return child_mnt; } @@ -582,16 +474,6 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) } } -static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry) -{ - if (!__lookup_mnt(mnt, dentry, 0)) { - spin_lock(&dentry->d_lock); - WARN_ON(dentry->d_mounted == 0); - dentry->d_mounted--; - spin_unlock(&dentry->d_lock); - } -} - static void detach_mnt(struct vfsmount *mnt, struct path *old_path) { old_path->dentry = mnt->mnt_mountpoint; @@ -600,19 +482,15 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) mnt->mnt_mountpoint = mnt->mnt_root; list_del_init(&mnt->mnt_child); list_del_init(&mnt->mnt_hash); - dentry_reset_mounted(old_path->mnt, old_path->dentry); - WARN_ON(!(mnt->mnt_flags & MNT_MOUNTED)); - mnt->mnt_flags &= ~MNT_MOUNTED; + old_path->dentry->d_mounted--; } void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *child_mnt) { child_mnt->mnt_parent = mntget(mnt); - spin_lock(&dentry->d_lock); - child_mnt->mnt_mountpoint = dget_dlock(dentry); + child_mnt->mnt_mountpoint = dget(dentry); dentry->d_mounted++; - spin_unlock(&dentry->d_lock); } static void attach_mnt(struct vfsmount *mnt, struct path *path) @@ -621,8 +499,6 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path) list_add_tail(&mnt->mnt_hash, mount_hashtable + hash(path->mnt, path->dentry)); list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts); - WARN_ON(mnt->mnt_flags & MNT_MOUNTED); - mnt->mnt_flags |= MNT_MOUNTED; } /* @@ -645,8 +521,6 @@ static void commit_tree(struct vfsmount *mnt) list_add_tail(&mnt->mnt_hash, mount_hashtable + hash(parent, mnt->mnt_mountpoint)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); - WARN_ON(mnt->mnt_flags & MNT_MOUNTED); - mnt->mnt_flags |= MNT_MOUNTED; touch_mnt_namespace(n); } @@ -694,7 +568,7 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, goto out_free; } - mnt->mnt_flags = (old->mnt_flags & ~MNT_MOUNTED); + mnt->mnt_flags = old->mnt_flags; atomic_inc(&sb->s_active); mnt->mnt_sb = sb; mnt->mnt_root = dget(root); @@ -750,92 +624,43 @@ static inline void __mntput(struct vfsmount *mnt) void mntput_no_expire(struct vfsmount *mnt) { - int cpu = get_cpu(); - put_cpu(); repeat: - if (likely(mnt->mnt_flags & MNT_MOUNTED)) { - vfsmount_read_lock(cpu); - if (unlikely(!(mnt->mnt_flags & MNT_MOUNTED))) { - vfsmount_read_unlock(cpu); - goto repeat; + if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { + if (likely(!mnt->mnt_pinned)) { + spin_unlock(&vfsmount_lock); + __mntput(mnt); + return; } - preempt_disable(); - dec_mnt_count(mnt); - preempt_enable(); - vfsmount_read_unlock(cpu); - - return; - } - - vfsmount_write_lock(); - if (unlikely((mnt->mnt_flags & MNT_MOUNTED))) { - vfsmount_write_unlock(); + atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); + mnt->mnt_pinned = 0; + spin_unlock(&vfsmount_lock); + acct_auto_close_mnt(mnt); + security_sb_umount_close(mnt); goto repeat; } - preempt_disable(); - dec_mnt_count(mnt); - preempt_enable(); - if (count_mnt_count(mnt)) { - vfsmount_write_unlock(); - return; - } - if (likely(!mnt->mnt_pinned)) { - vfsmount_write_unlock(); - __mntput(mnt); - return; - } - preempt_disable(); - add_mnt_count(mnt, mnt->mnt_pinned + 1); - preempt_enable(); - mnt->mnt_pinned = 0; - vfsmount_write_unlock(); - acct_auto_close_mnt(mnt); - security_sb_umount_close(mnt); - goto repeat; -} -EXPORT_SYMBOL(mntput_no_expire); - -void mntput(struct vfsmount *mnt) -{ - if (mnt) { - /* avoid cacheline pingpong */ - if (unlikely(mnt->mnt_expiry_mark)) - mnt->mnt_expiry_mark = 0; - mntput_no_expire(mnt); - } } -EXPORT_SYMBOL(mntput); -struct vfsmount *mntget(struct vfsmount *mnt) -{ - if (mnt) { - preempt_disable(); - inc_mnt_count(mnt); - preempt_enable(); - } - return mnt; -} -EXPORT_SYMBOL(mntget); +EXPORT_SYMBOL(mntput_no_expire); void mnt_pin(struct vfsmount *mnt) { - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); mnt->mnt_pinned++; - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); } + EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *mnt) { - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); if (mnt->mnt_pinned) { - preempt_disable(); - inc_mnt_count(mnt); - preempt_enable(); + atomic_inc(&mnt->mnt_count); mnt->mnt_pinned--; } - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); } + EXPORT_SYMBOL(mnt_unpin); static inline void mangle(struct seq_file *m, const char *s) @@ -1116,13 +941,12 @@ int may_umount_tree(struct vfsmount *mnt) int minimum_refs = 0; struct vfsmount *p; - /* write lock needed for count_mnt_count */ - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { - actual_refs += count_mnt_count(p); + actual_refs += atomic_read(&p->mnt_count); minimum_refs += 2; } - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1149,12 +973,11 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); if (propagate_mount_busy(mnt, 2)) ret = 0; - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); up_read(&namespace_sem); - return ret; } @@ -1169,14 +992,13 @@ void release_mounts(struct list_head *head) if (mnt->mnt_parent != mnt) { struct dentry *dentry; struct vfsmount *m; - - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; m->mnt_ghosts--; - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); dput(dentry); mntput(m); } @@ -1200,11 +1022,9 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; list_del_init(&p->mnt_child); - WARN_ON(!(p->mnt_flags & MNT_MOUNTED)); - p->mnt_flags &= ~MNT_MOUNTED; if (p->mnt_parent != p) { p->mnt_parent->mnt_ghosts++; - dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint); + p->mnt_mountpoint->d_mounted--; } change_mnt_propagation(p, MS_PRIVATE); } @@ -1233,16 +1053,8 @@ static int do_umount(struct vfsmount *mnt, int flags) flags & (MNT_FORCE | MNT_DETACH)) return -EINVAL; - /* - * probably don't strictly need the lock here if we examined - * all race cases, but it's a slowpath. - */ - vfsmount_write_lock(); - if (count_mnt_count(mnt) != 2) { - vfsmount_write_unlock(); + if (atomic_read(&mnt->mnt_count) != 2) return -EBUSY; - } - vfsmount_write_unlock(); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1284,7 +1096,7 @@ static int do_umount(struct vfsmount *mnt, int flags) } down_write(&namespace_sem); - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); event++; if (!(flags & MNT_DETACH)) @@ -1296,7 +1108,7 @@ static int do_umount(struct vfsmount *mnt, int flags) umount_tree(mnt, 1, &umount_list); retval = 0; } - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); if (retval) security_sb_umount_busy(mnt); up_write(&namespace_sem); @@ -1383,13 +1195,6 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, goto Enomem; q->mnt_mountpoint = mnt->mnt_mountpoint; - /* - * We don't call attach_mnt on a cloned rootfs, so set it as - * mounted here. - */ - WARN_ON(q->mnt_flags & MNT_MOUNTED); - q->mnt_flags |= MNT_MOUNTED; - p = mnt; list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { if (!is_subdir(r->mnt_mountpoint, dentry)) @@ -1410,19 +1215,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, q = clone_mnt(p, p->mnt_root, flag); if (!q) goto Enomem; - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); } } return res; Enomem: if (res) { LIST_HEAD(umount_list); - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); umount_tree(res, 0, &umount_list); - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); release_mounts(&umount_list); } return NULL; @@ -1441,9 +1246,9 @@ void drop_collected_mounts(struct vfsmount *mnt) { LIST_HEAD(umount_list); down_write(&namespace_sem); - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); umount_tree(mnt, 0, &umount_list); - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); } @@ -1556,13 +1361,12 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, if (err) goto out_cleanup_ids; - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) set_mnt_shared(p); } - if (parent_path) { detach_mnt(source_mnt, parent_path); attach_mnt(source_mnt, path); @@ -1576,8 +1380,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, list_del_init(&child->mnt_hash); commit_tree(child); } - vfsmount_write_unlock(); - + spin_unlock(&vfsmount_lock); return 0; out_cleanup_ids: @@ -1639,10 +1442,10 @@ static int do_change_type(struct path *path, int flag) goto out_unlock; } - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); out_unlock: up_write(&namespace_sem); @@ -1675,13 +1478,9 @@ static int do_loopback(struct path *path, char *old_name, goto out; err = -ENOMEM; - if (recurse) { + if (recurse) mnt = copy_tree(old_path.mnt, old_path.dentry, 0); - /* Annoying. Since we graft the rootfs, we need to unmark - * it as mounted. */ - WARN_ON(!(mnt->mnt_flags & MNT_MOUNTED)); - mnt->mnt_flags &= ~MNT_MOUNTED; - } else + else mnt = clone_mnt(old_path.mnt, old_path.dentry, 0); if (!mnt) @@ -1690,10 +1489,9 @@ static int do_loopback(struct path *path, char *old_name, err = graft_tree(mnt, path); if (err) { LIST_HEAD(umount_list); - - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); umount_tree(mnt, 0, &umount_list); - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); release_mounts(&umount_list); } @@ -1746,19 +1544,18 @@ static int do_remount(struct path *path, int flags, int mnt_flags, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK; - mnt_flags |= path->mnt->mnt_flags & MNT_MOUNTED; path->mnt->mnt_flags = mnt_flags; - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); } up_write(&sb->s_umount); if (!err) { security_sb_post_remount(path->mnt, flags, data); - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); touch_mnt_namespace(path->mnt->mnt_ns); - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); } return err; } @@ -1935,7 +1732,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) return; down_write(&namespace_sem); - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -1954,7 +1751,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1, &umounts); } - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umounts); @@ -2229,9 +2026,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, kfree(new_ns); return ERR_PTR(-ENOMEM); } - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); list_add_tail(&new_ns->list, &new_ns->root->mnt_list); - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2428,7 +2225,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, goto out2; /* not attached */ /* make sure we can reach put_old from new_root */ tmp = old.mnt; - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); if (tmp != new.mnt) { for (;;) { if (tmp->mnt_parent == tmp) @@ -2448,7 +2245,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new.mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); chroot_fs_refs(&root, &new); security_sb_post_pivotroot(&root, &new); error = 0; @@ -2464,7 +2261,7 @@ out1: out0: return error; out3: - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); goto out2; } @@ -2494,7 +2291,6 @@ static void __init init_mount_tree(void) void __init mnt_init(void) { unsigned u; - int i; int err; init_rwsem(&namespace_sem); @@ -2512,9 +2308,6 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); - for_each_possible_cpu(i) - spin_lock_init(&per_cpu(vfsmount_lock, i)); - err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", @@ -2531,30 +2324,15 @@ void put_mnt_ns(struct mnt_namespace *ns) struct vfsmount *root; LIST_HEAD(umount_list); - /* - * We open code this to avoid vfsmount_write_lock() in case of - * ns->count > 1 - */ - if (atomic_add_unless(&ns->count, -1, 1)) - return; - - /* - * Do the full locking here as it's likely that ns->count will - * drop to zero and we have to take namespace_sem and all vfs - * mount locks anyway for umount_tree(). - */ - down_write(&namespace_sem); - vfsmount_write_lock(); - if (!atomic_dec_and_test(&ns->count)) { - vfsmount_write_unlock(); - up_write(&namespace_sem); + if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock)) return; - } root = ns->root; ns->root = NULL; - + spin_unlock(&vfsmount_lock); + down_write(&namespace_sem); + spin_lock(&vfsmount_lock); umount_tree(root, 0, &umount_list); - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); kfree(ns); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 0a5210802874..b8b5b30d53f0 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -364,21 +364,21 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) } /* If a pointer is invalid, we search the dentry. */ - spin_lock(&parent->d_lock); + spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dent = list_entry(next, struct dentry, d_u.d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) - dget(dent); + dget_locked(dent); else dent = NULL; - spin_unlock(&parent->d_lock); + spin_unlock(&dcache_lock); goto out; } next = next->next; } - spin_unlock(&parent->d_lock); + spin_unlock(&dcache_lock); return NULL; out: diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 0a041b0d432e..2441d1ab57dc 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -192,7 +192,7 @@ ncp_renew_dentries(struct dentry *parent) struct list_head *next; struct dentry *dentry; - spin_lock(&parent->d_lock); + spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dentry = list_entry(next, struct dentry, d_u.d_child); @@ -204,7 +204,7 @@ ncp_renew_dentries(struct dentry *parent) next = next->next; } - spin_unlock(&parent->d_lock); + spin_unlock(&dcache_lock); } static inline void @@ -214,7 +214,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) struct list_head *next; struct dentry *dentry; - spin_lock(&parent->d_lock); + spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dentry = list_entry(next, struct dentry, d_u.d_child); @@ -222,7 +222,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) ncp_age_dentry(server, dentry); next = next->next; } - spin_unlock(&parent->d_lock); + spin_unlock(&dcache_lock); } struct ncp_cache_head { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 01a0b9acb1f8..b5d55d39fb79 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1434,9 +1434,11 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (atomic_read(&dentry->d_count) > 1) { spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); /* Start asynchronous writeout of the inode */ write_inode_now(dentry->d_inode, 0); error = nfs_sillyrename(dir, dentry); @@ -1447,6 +1449,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) need_rehash = 1; } spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); error = nfs_safe_remove(dentry); if (!error || error == -ENOENT) { nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); @@ -1540,9 +1543,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); d_add(dentry, inode); } return error; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 5b79ff1dd3a9..b35d2a616066 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -55,9 +55,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i return -ENOMEM; } /* Circumvent igrab(): we know the inode is not being freed */ - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); /* * Ensure that this dentry is invisible to d_find_alias(). * Otherwise, it may be spliced into the tree by @@ -66,11 +64,9 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i * This again causes shrink_dcache_for_umount_subtree() to * Oops, since the test for IS_ROOT() will fail. */ - spin_lock(&sb->s_root->d_inode->i_lock); - spin_lock(&sb->s_root->d_lock); + spin_lock(&dcache_lock); list_del_init(&sb->s_root->d_alias); - spin_unlock(&sb->s_root->d_lock); - spin_unlock(&sb->s_root->d_inode->i_lock); + spin_unlock(&dcache_lock); } return 0; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 86eefbafb7b4..5f59a2df21aa 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -396,7 +396,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), - inode->i_count); + atomic_read(&inode->i_count)); out: return inode; @@ -1153,7 +1153,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, - inode->i_count, fattr->valid); + atomic_read(&inode->i_count), fattr->valid); if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) goto out_fileid; @@ -1395,16 +1395,9 @@ struct inode *nfs_alloc_inode(struct super_block *sb) return &nfsi->vfs_inode; } -static void nfs_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); - kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); -} - void nfs_destroy_inode(struct inode *inode) { - call_rcu(&inode->i_rcu, nfs_i_callback); + kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); } static inline void nfs4_init_once(struct nfs_inode *nfsi) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 961895616e8d..40c766782891 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -48,17 +48,12 @@ char *nfs_path(const char *base, const struct dentry *dentry, char *buffer, ssize_t buflen) { - char *end; + char *end = buffer+buflen; int namelen; - unsigned seq; -rename_retry: - end = buffer+buflen; *--end = '\0'; buflen--; - - seq = read_seqbegin(&rename_lock); - rcu_read_lock(); + spin_lock(&dcache_lock); while (!IS_ROOT(dentry) && dentry != droot) { namelen = dentry->d_name.len; buflen -= namelen + 1; @@ -69,9 +64,7 @@ rename_retry: *--end = '/'; dentry = dentry->d_parent; } - rcu_read_unlock(); - if (read_seqretry(&rename_lock, seq)) - goto rename_retry; + spin_unlock(&dcache_lock); if (*end != '/') { if (--buflen < 0) goto Elong; @@ -88,9 +81,7 @@ rename_retry: memcpy(end, base, namelen); return end; Elong_unlock: - rcu_read_unlock(); - if (read_seqretry(&rename_lock, seq)) - goto rename_retry; + spin_unlock(&dcache_lock); Elong: return ERR_PTR(-ENAMETOOLONG); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7ae43df98d04..c1e2733f4fa4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -501,8 +501,7 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner) state->owner = owner; atomic_inc(&owner->so_count); list_add(&state->inode_states, &nfsi->open_states); - __iget(inode); - state->inode = inode; + state->inode = igrab(inode); spin_unlock(&inode->i_lock); /* Note: The reclaim code dictates that we add stateless * and read-only stateids to the end of the list */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4f853b5ba3d7..d63d964a0392 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -377,7 +377,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); BUG_ON(error); if (!nfsi->npages) { - __iget(inode); + igrab(inode); if (nfs_have_delegation(inode, FMODE_WRITE)) nfsi->change_attr++; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 8de004cf26d5..8715d194561a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1759,7 +1759,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, goto out_dput_new; if (svc_msnfs(ffhp) && - ((atomic_read(&odentry->d_count) > 1) || (atomic_read(&ndentry->d_count) > 1))) { + ((atomic_read(&odentry->d_count) > 1) + || (atomic_read(&ndentry->d_count) > 1))) { host_err = -EPERM; goto out_dput_new; } diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c index fa6dd36e2ee1..dd5f7e0a95f6 100644 --- a/fs/nilfs2/gcdat.c +++ b/fs/nilfs2/gcdat.c @@ -27,7 +27,6 @@ #include "page.h" #include "mdt.h" -/* XXX: what protects i_state? */ int nilfs_init_gcdat_inode(struct the_nilfs *nilfs) { struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index a7dccc002a85..e7ed4cdd0ecf 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -478,7 +478,7 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, inode->i_sb = sb; /* sb may be NULL for some meta data files */ inode->i_blkbits = nilfs->ns_blocksize_bits; inode->i_flags = 0; - inode->i_count = 1; + atomic_set(&inode->i_count, 1); inode->i_nlink = 1; inode->i_ino = ino; inode->i_mode = S_IFREG; diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index bc9308f1d616..07ba838ef089 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -222,9 +222,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, inode->i_ctime = CURRENT_TIME; inode_inc_link_count(inode); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); err = nilfs_add_nondir(dentry, inode); if (!err) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index bdaaa5813d2c..037e878e03fc 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -52,7 +52,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) /* determine if the children should tell inode about their events */ watched = fsnotify_inode_watches_children(inode); - spin_lock(&inode->i_lock); + spin_lock(&dcache_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ list_for_each_entry(alias, &inode->i_dentry, d_alias) { @@ -61,21 +61,19 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) /* run all of the children of the original inode and fix their * d_flags to indicate parental interest (their parent is the * original inode) */ - spin_lock(&alias->d_lock); list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { if (!child->d_inode) continue; - spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); + spin_lock(&child->d_lock); if (watched) child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; else child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; spin_unlock(&child->d_lock); } - spin_unlock(&alias->d_lock); } - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); } /* Notify this dentry's parent about a child's events. */ @@ -89,18 +87,13 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask) if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) return; -again: spin_lock(&dentry->d_lock); parent = dentry->d_parent; - if (parent != dentry && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dentry->d_lock); - goto again; - } p_inode = parent->d_inode; if (fsnotify_inode_watches_children(p_inode)) { if (p_inode->i_fsnotify_mask & mask) { - dget_dlock(parent); + dget(parent); send = true; } } else { @@ -110,13 +103,11 @@ again: * children and update their d_flags to let them know p_inode * doesn't care about them any more. */ - dget_dlock(parent); + dget(parent); should_update_children = true; } spin_unlock(&dentry->d_lock); - if (parent != dentry) - spin_unlock(&parent->d_lock); if (send) { /* we are notifying a parent so come up with the new mask which diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index be1af807e67b..3165d85aada2 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -362,75 +362,65 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry, * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. * We temporarily drop inode_lock, however, and CAN block. */ -void fsnotify_unmount_inodes(struct super_block *sb) +void fsnotify_unmount_inodes(struct list_head *list) { - int i; - - for_each_possible_cpu(i) { - struct inode *inode, *next_i, *need_iput = NULL; - struct list_head *list; -#ifdef CONFIG_SMP - list = per_cpu_ptr(sb->s_inodes, i); -#else - list = &sb->s_inodes; -#endif - - list_for_each_entry_safe(inode, next_i, list, i_sb_list) { - struct inode *need_iput_tmp; - - spin_lock(&inode->i_lock); - /* - * We cannot __iget() an inode in state I_CLEAR, I_FREEING, - * I_WILL_FREE, or I_NEW which is fine because by that point - * the inode cannot have any associated watches. - */ - if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) { - spin_unlock(&inode->i_lock); - continue; - } - - /* - * If i_count is zero, the inode cannot have any watches and - * doing an __iget/iput with MS_ACTIVE clear would actually - * evict all inodes with zero i_count from icache which is - * unnecessarily violent and may in fact be illegal to do. - */ - if (!inode->i_count) { - spin_unlock(&inode->i_lock); - continue; - } - - need_iput_tmp = need_iput; - need_iput = NULL; - - /* In case fsnotify_inode_delete() drops a reference. */ - if (inode != need_iput_tmp) { - __iget(inode); - } else - need_iput_tmp = NULL; - spin_unlock(&inode->i_lock); - - /* In case the dropping of a reference would nuke next_i. */ - if (&next_i->i_sb_list != list) { - spin_lock(&next_i->i_lock); - if (next_i->i_count && - !(next_i->i_state & - (I_CLEAR | I_FREEING | I_WILL_FREE))) { - __iget(next_i); - need_iput = next_i; - } - spin_unlock(&next_i->i_lock); - } - - if (need_iput_tmp) - iput(need_iput_tmp); - - /* for each watch, send FS_UNMOUNT and then remove it */ - fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); - - fsnotify_inode_delete(inode); - - iput(inode); + struct inode *inode, *next_i, *need_iput = NULL; + + list_for_each_entry_safe(inode, next_i, list, i_sb_list) { + struct inode *need_iput_tmp; + + /* + * We cannot __iget() an inode in state I_CLEAR, I_FREEING, + * I_WILL_FREE, or I_NEW which is fine because by that point + * the inode cannot have any associated watches. + */ + if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) + continue; + + /* + * If i_count is zero, the inode cannot have any watches and + * doing an __iget/iput with MS_ACTIVE clear would actually + * evict all inodes with zero i_count from icache which is + * unnecessarily violent and may in fact be illegal to do. + */ + if (!atomic_read(&inode->i_count)) + continue; + + need_iput_tmp = need_iput; + need_iput = NULL; + + /* In case fsnotify_inode_delete() drops a reference. */ + if (inode != need_iput_tmp) + __iget(inode); + else + need_iput_tmp = NULL; + + /* In case the dropping of a reference would nuke next_i. */ + if ((&next_i->i_sb_list != list) && + atomic_read(&next_i->i_count) && + !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) { + __iget(next_i); + need_iput = next_i; } + + /* + * We can safely drop inode_lock here because we hold + * references on both inode and next_i. Also no new inodes + * will be added since the umount has begun. Finally, + * iprune_mutex keeps shrink_icache_memory() away. + */ + spin_unlock(&inode_lock); + + if (need_iput_tmp) + iput(need_iput_tmp); + + /* for each watch, send FS_UNMOUNT and then remove it */ + fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + + fsnotify_inode_delete(inode); + + iput(inode); + + spin_lock(&inode_lock); } } diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c index e0a92bf683d6..40b1cf914ccb 100644 --- a/fs/notify/inotify/inotify.c +++ b/fs/notify/inotify/inotify.c @@ -185,25 +185,23 @@ static void set_dentry_child_flags(struct inode *inode, int watched) { struct dentry *alias; - spin_lock(&inode->i_lock); + spin_lock(&dcache_lock); list_for_each_entry(alias, &inode->i_dentry, d_alias) { struct dentry *child; - spin_lock(&alias->d_lock); list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { if (!child->d_inode) continue; - spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); + spin_lock(&child->d_lock); if (watched) child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; else child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED; spin_unlock(&child->d_lock); } - spin_unlock(&alias->d_lock); } - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); } /* @@ -271,7 +269,6 @@ void inotify_d_instantiate(struct dentry *entry, struct inode *inode) if (!inode) return; - /* XXX: need parent lock in place of dcache_lock? */ spin_lock(&entry->d_lock); parent = entry->d_parent; if (parent->d_inode && inotify_inode_watched(parent->d_inode)) @@ -286,7 +283,6 @@ void inotify_d_move(struct dentry *entry) { struct dentry *parent; - /* XXX: need parent lock in place of dcache_lock? */ parent = entry->d_parent; if (inotify_inode_watched(parent->d_inode)) entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; @@ -343,28 +339,18 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask, if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED)) return; -again: spin_lock(&dentry->d_lock); parent = dentry->d_parent; - if (parent != dentry && !spin_trylock(&parent->d_lock)) { - spin_unlock(&dentry->d_lock); - goto again; - } inode = parent->d_inode; if (inotify_inode_watched(inode)) { - dget_dlock(parent); + dget(parent); spin_unlock(&dentry->d_lock); - if (parent != dentry) - spin_unlock(&parent->d_lock); inotify_inode_queue_event(inode, mask, cookie, name, dentry->d_inode); dput(parent); - } else { + } else spin_unlock(&dentry->d_lock); - if (parent != dentry) - spin_unlock(&parent->d_lock); - } } EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event); @@ -385,86 +371,76 @@ EXPORT_SYMBOL_GPL(inotify_get_cookie); * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. * We temporarily drop inode_lock, however, and CAN block. */ -void inotify_unmount_inodes(struct super_block *sb) -{ - int i; - - for_each_possible_cpu(i) { - struct inode *inode, *next_i, *need_iput = NULL; - struct list_head *list; -#ifdef CONFIG_SMP - list = per_cpu_ptr(sb->s_inodes, i); -#else - list = &sb->s_inodes; -#endif - - list_for_each_entry_safe(inode, next_i, list, i_sb_list) { - struct inotify_watch *watch, *next_w; - struct inode *need_iput_tmp; - struct list_head *watches; - - spin_lock(&inode->i_lock); - /* - * We cannot __iget() an inode in state I_CLEAR, I_FREEING, - * I_WILL_FREE, or I_NEW which is fine because by that point - * the inode cannot have any associated watches. - */ - if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) { - spin_unlock(&inode->i_lock); - continue; - } - - /* - * If i_count is zero, the inode cannot have any watches and - * doing an __iget/iput with MS_ACTIVE clear would actually - * evict all inodes with zero i_count from icache which is - * unnecessarily violent and may in fact be illegal to do. - */ - if (!inode->i_count) { - spin_unlock(&inode->i_lock); - continue; - } - - need_iput_tmp = need_iput; - need_iput = NULL; - /* In case inotify_remove_watch_locked() drops a reference. */ - if (inode != need_iput_tmp) { - __iget(inode); - } else - need_iput_tmp = NULL; - - spin_unlock(&inode->i_lock); - - /* In case the dropping of a reference would nuke next_i. */ - if (&next_i->i_sb_list != list) { - spin_lock(&next_i->i_lock); - if (next_i->i_count && - !(next_i->i_state & - (I_CLEAR|I_FREEING|I_WILL_FREE))) { - __iget(next_i); - need_iput = next_i; - } - spin_unlock(&next_i->i_lock); - } - - if (need_iput_tmp) - iput(need_iput_tmp); - - /* for each watch, send IN_UNMOUNT and then remove it */ - mutex_lock(&inode->inotify_mutex); - watches = &inode->inotify_watches; - list_for_each_entry_safe(watch, next_w, watches, i_list) { - struct inotify_handle *ih = watch->ih; - get_inotify_watch(watch); - mutex_lock(&ih->mutex); - ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, NULL, NULL); - inotify_remove_watch_locked(ih, watch); - mutex_unlock(&ih->mutex); - put_inotify_watch(watch); - } - mutex_unlock(&inode->inotify_mutex); - iput(inode); +void inotify_unmount_inodes(struct list_head *list) +{ + struct inode *inode, *next_i, *need_iput = NULL; + + list_for_each_entry_safe(inode, next_i, list, i_sb_list) { + struct inotify_watch *watch, *next_w; + struct inode *need_iput_tmp; + struct list_head *watches; + + /* + * We cannot __iget() an inode in state I_CLEAR, I_FREEING, + * I_WILL_FREE, or I_NEW which is fine because by that point + * the inode cannot have any associated watches. + */ + if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) + continue; + + /* + * If i_count is zero, the inode cannot have any watches and + * doing an __iget/iput with MS_ACTIVE clear would actually + * evict all inodes with zero i_count from icache which is + * unnecessarily violent and may in fact be illegal to do. + */ + if (!atomic_read(&inode->i_count)) + continue; + + need_iput_tmp = need_iput; + need_iput = NULL; + /* In case inotify_remove_watch_locked() drops a reference. */ + if (inode != need_iput_tmp) + __iget(inode); + else + need_iput_tmp = NULL; + /* In case the dropping of a reference would nuke next_i. */ + if ((&next_i->i_sb_list != list) && + atomic_read(&next_i->i_count) && + !(next_i->i_state & (I_CLEAR | I_FREEING | + I_WILL_FREE))) { + __iget(next_i); + need_iput = next_i; + } + + /* + * We can safely drop inode_lock here because we hold + * references on both inode and next_i. Also no new inodes + * will be added since the umount has begun. Finally, + * iprune_mutex keeps shrink_icache_memory() away. + */ + spin_unlock(&inode_lock); + + if (need_iput_tmp) + iput(need_iput_tmp); + + /* for each watch, send IN_UNMOUNT and then remove it */ + mutex_lock(&inode->inotify_mutex); + watches = &inode->inotify_watches; + list_for_each_entry_safe(watch, next_w, watches, i_list) { + struct inotify_handle *ih= watch->ih; + get_inotify_watch(watch); + mutex_lock(&ih->mutex); + ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, + NULL, NULL); + inotify_remove_watch_locked(ih, watch); + mutex_unlock(&ih->mutex); + put_inotify_watch(watch); } + mutex_unlock(&inode->inotify_mutex); + iput(inode); + + spin_lock(&inode_lock); } } EXPORT_SYMBOL_GPL(inotify_unmount_inodes); diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 3475c2e98c80..80b04770e8e9 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -2921,9 +2921,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) } if ((sb->s_root = d_alloc_root(vol->root_ino))) { /* We increment i_count simulating an ntfs_iget(). */ - spin_lock(&vol->root_ino->i_lock); - vol->root_ino->i_count++; - spin_unlock(&vol->root_ino->i_lock); + atomic_inc(&vol->root_ino->i_count); ntfs_debug("Exiting, status successful."); /* Release the default upcase if it has no users. */ mutex_lock(&ntfs_lock); diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index d825f00eda71..b4957c7d9fe2 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -151,25 +151,23 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, struct list_head *p; struct dentry *dentry = NULL; - spin_lock(&inode->i_lock); + spin_lock(&dcache_lock); + list_for_each(p, &inode->i_dentry) { dentry = list_entry(p, struct dentry, d_alias); - spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { mlog(0, "dentry found: %.*s\n", dentry->d_name.len, dentry->d_name.name); - dget_dlock(dentry); - spin_unlock(&dentry->d_lock); + dget_locked(dentry); break; } - spin_unlock(&dentry->d_lock); dentry = NULL; } - spin_unlock(&inode->i_lock); + spin_unlock(&dcache_lock); return dentry; } diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 3d4b7e210bb9..50fb26a6a5f5 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -719,9 +719,7 @@ static int ocfs2_link(struct dentry *old_dentry, goto out_commit; } - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); dentry->d_op = &ocfs2_dentry_ops; d_instantiate(dentry, inode); diff --git a/fs/open.c b/fs/open.c index 9f57c39c921e..040cef72bc00 100644 --- a/fs/open.c +++ b/fs/open.c @@ -844,7 +844,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_path.mnt = mnt; f->f_pos = 0; f->f_op = fops_get(inode->i_fop); - file_sb_list_add(f, inode->i_sb); + file_move(f, &inode->i_sb->s_files); error = security_dentry_open(f, cred); if (error) @@ -890,7 +890,7 @@ cleanup_all: mnt_drop_write(mnt); } } - file_sb_list_del(f); + file_kill(f); f->f_path.dentry = NULL; f->f_path.mnt = NULL; cleanup_file: diff --git a/fs/pnode.c b/fs/pnode.c index 5a48677f1562..8d5f392ec3d3 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -264,12 +264,12 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, prev_src_mnt = child; } out: - vfsmount_write_lock(); + spin_lock(&vfsmount_lock); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); umount_tree(child, 0, &umount_list); } - vfsmount_write_unlock(); + spin_unlock(&vfsmount_lock); release_mounts(&umount_list); return ret; } @@ -279,7 +279,7 @@ out: */ static inline int do_refcount_check(struct vfsmount *mnt, int count) { - int mycount = count_mnt_count(mnt) - mnt->mnt_ghosts; + int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts; return (mycount > count); } diff --git a/fs/proc/base.c b/fs/proc/base.c index 6ab0bd692968..8dce96c331f8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -650,17 +650,15 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) struct proc_mounts *p = file->private_data; struct mnt_namespace *ns = p->ns; unsigned res = POLLIN | POLLRDNORM; - int cpu = get_cpu(); - put_cpu(); poll_wait(file, &ns->poll, wait); - vfsmount_read_lock(cpu); + spin_lock(&vfsmount_lock); if (p->event != ns->event) { p->event = ns->event; res |= POLLERR | POLLPRI; } - vfsmount_read_unlock(cpu); + spin_unlock(&vfsmount_lock); return res; } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index ee0a77b56810..445a02bcaab3 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -65,16 +65,9 @@ static struct inode *proc_alloc_inode(struct super_block *sb) return inode; } -static void proc_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); - kmem_cache_free(proc_inode_cachep, PROC_I(inode)); -} - static void proc_destroy_inode(struct inode *inode) { - call_rcu(&inode->i_rcu, proc_i_callback); + kmem_cache_free(proc_inode_cachep, PROC_I(inode)); } static void init_once(void *foo) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 4c6a62baa109..6c9da00ddda2 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -841,55 +841,35 @@ static int dqinit_needed(struct inode *inode, int type) /* This routine is guarded by dqonoff_mutex mutex */ static void add_dquot_ref(struct super_block *sb, int type) { + struct inode *inode, *old_inode = NULL; int reserved = 0; - int i; - for_each_possible_cpu(i) { - struct inode *inode, *old_inode = NULL; - struct list_head *list; -#ifdef CONFIG_SMP - list = per_cpu_ptr(sb->s_inodes, i); -#else - list = &sb->s_inodes; -#endif - - rcu_read_lock(); - list_for_each_entry_rcu(inode, list, i_sb_list) { - spin_lock(&inode->i_lock); - if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) { - spin_unlock(&inode->i_lock); - continue; - } - if (unlikely(inode_get_rsv_space(inode) > 0)) - reserved = 1; + spin_lock(&inode_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) + continue; + if (unlikely(inode_get_rsv_space(inode) > 0)) + reserved = 1; + if (!atomic_read(&inode->i_writecount)) + continue; + if (!dqinit_needed(inode, type)) + continue; - if (!atomic_read(&inode->i_writecount)) { - spin_unlock(&inode->i_lock); - continue; - } + __iget(inode); + spin_unlock(&inode_lock); - if (!dqinit_needed(inode, type)) { - spin_unlock(&inode->i_lock); - continue; - } - - __iget(inode); - spin_unlock(&inode->i_lock); - rcu_read_unlock(); - - iput(old_inode); - sb->dq_op->initialize(inode, type); - /* We hold a reference to 'inode' so it couldn't have been - * removed from s_inodes list while we dropped the inode_lock. - * We cannot iput the inode now as we can be holding the last - * reference and we cannot iput it under inode_lock. So we - * keep the reference and iput it later. */ - old_inode = inode; - rcu_read_lock(); - } - rcu_read_unlock(); iput(old_inode); - } + sb->dq_op->initialize(inode, type); + /* We hold a reference to 'inode' so it couldn't have been + * removed from s_inodes list while we dropped the inode_lock. + * We cannot iput the inode now as we can be holding the last + * reference and we cannot iput it under inode_lock. So we + * keep the reference and iput it later. */ + old_inode = inode; + spin_lock(&inode_lock); + } + spin_unlock(&inode_lock); + iput(old_inode); if (reserved) { printk(KERN_WARNING "VFS (%s): Writes happened before quota" @@ -962,29 +942,20 @@ static void put_dquot_list(struct list_head *tofree_head) static void remove_dquot_ref(struct super_block *sb, int type, struct list_head *tofree_head) { - int i; - for_each_possible_cpu(i) { - struct inode *inode; - struct list_head *list; -#ifdef CONFIG_SMP - list = per_cpu_ptr(sb->s_inodes, i); -#else - list = &sb->s_inodes; -#endif + struct inode *inode; - rcu_read_lock(); - list_for_each_entry_rcu(inode, list, i_sb_list) { - /* - * We have to scan also I_NEW inodes because they can already - * have quota pointer initialized. Luckily, we need to touch - * only quota pointers and these have separate locking - * (dqptr_sem). - */ - if (!IS_NOQUOTA(inode)) - remove_inode_dquot_ref(inode, type, tofree_head); - } - rcu_read_unlock(); + spin_lock(&inode_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + /* + * We have to scan also I_NEW inodes because they can already + * have quota pointer initialized. Luckily, we need to touch + * only quota pointers and these have separate locking + * (dqptr_sem). + */ + if (!IS_NOQUOTA(inode)) + remove_inode_dquot_ref(inode, type, tofree_head); } + spin_unlock(&inode_lock); } /* Gather all references from inodes and drop them */ diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index d3f9b7d05307..da2dba082e2d 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -39,7 +39,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) BUG_ON(!S_ISREG(inode->i_mode)); /* fast out for when nothing needs to be done */ - if ((inode->i_count > 1 || + if ((atomic_read(&inode->i_count) > 1 || !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || !tail_has_to_be_packed(inode)) && REISERFS_I(inode)->i_prealloc_count <= 0) { @@ -94,7 +94,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) if (!err) err = jbegin_failure; - if (!err && inode->i_count <= 1 && + if (!err && atomic_read(&inode->i_count) <= 1 && (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && tail_has_to_be_packed(inode)) { /* if regular file is released by last holder and it has been diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 05b3240ed9ab..9d4dcf0b07cb 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -1151,9 +1151,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, inode->i_ctime = CURRENT_TIME_SEC; reiserfs_update_sd(&th, inode); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); d_instantiate(dentry, inode); retval = journal_end(&th, dir->i_sb, jbegin_count); reiserfs_write_unlock(dir->i_sb); diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 17f52e8bb739..5fa7118f04e1 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -1477,7 +1477,7 @@ static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th, ** reading in the last block. The user will hit problems trying to ** read the file, but for now we just skip the indirect2direct */ - if (inode->i_count > 1 || + if (atomic_read(&inode->i_count) > 1 || !tail_has_to_be_packed(inode) || !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) { /* leave tail in an unformatted node */ diff --git a/fs/seq_file.c b/fs/seq_file.c index 1326fc0d20b2..eae7d9dbf3ff 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -6,7 +6,6 @@ */ #include <linux/fs.h> -#include <linux/mount.h> #include <linux/module.h> #include <linux/seq_file.h> #include <linux/slab.h> @@ -459,16 +458,13 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root, char *buf; size_t size = seq_get_buf(m, &buf); int res = -ENAMETOOLONG; - int cpu = get_cpu(); - put_cpu(); if (size) { char *p; - vfsmount_read_lock(cpu); + spin_lock(&dcache_lock); p = __d_path(path, root, buf, size); - vfsmount_read_unlock(cpu); - + spin_unlock(&dcache_lock); res = PTR_ERR(p); if (!IS_ERR(p)) { char *end = mangle_path(buf, p, esc); diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c index 61e482123488..8c177eb7e344 100644 --- a/fs/smbfs/cache.c +++ b/fs/smbfs/cache.c @@ -62,7 +62,7 @@ smb_invalidate_dircache_entries(struct dentry *parent) struct list_head *next; struct dentry *dentry; - spin_lock(&parent->d_lock); + spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dentry = list_entry(next, struct dentry, d_u.d_child); @@ -70,7 +70,7 @@ smb_invalidate_dircache_entries(struct dentry *parent) smb_age_dentry(server, dentry); next = next->next; } - spin_unlock(&parent->d_lock); + spin_unlock(&dcache_lock); } /* @@ -96,13 +96,13 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) } /* If a pointer is invalid, we search the dentry. */ - spin_lock(&parent->d_lock); + spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dent = list_entry(next, struct dentry, d_u.d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) - dget(dent); + dget_locked(dent); else dent = NULL; goto out_unlock; @@ -111,7 +111,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) } dent = NULL; out_unlock: - spin_unlock(&parent->d_lock); + spin_unlock(&dcache_lock); return dent; } diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index dafa6316da0b..3e4803b4427e 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -405,7 +405,6 @@ void smb_renew_times(struct dentry * dentry) { dget(dentry); -again: spin_lock(&dentry->d_lock); for (;;) { struct dentry *parent; @@ -414,13 +413,8 @@ again: if (IS_ROOT(dentry)) break; parent = dentry->d_parent; - if (!spin_trylock(&parent->d_lock)) { - spin_unlock(&dentry->d_lock); - goto again; - } - dget_dlock(parent); + dget(parent); spin_unlock(&dentry->d_lock); - spin_unlock(&parent->d_lock); dput(dentry); dentry = parent; spin_lock(&dentry->d_lock); diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index 7b0c74971ad4..71c29b6670b4 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c @@ -332,7 +332,6 @@ static int smb_build_path(struct smb_sb_info *server, unsigned char *buf, * and store it in reversed order [see reverse_string()] */ dget(entry); -again: spin_lock(&entry->d_lock); while (!IS_ROOT(entry)) { struct dentry *parent; @@ -351,7 +350,6 @@ again: dput(entry); return len; } - reverse_string(path, len); path += len; if (unicode) { @@ -363,11 +361,7 @@ again: maxlen -= len+1; parent = entry->d_parent; - if (!spin_trylock(&parent->d_lock)) { - spin_unlock(&entry->d_lock); - goto again; - } - dget_dlock(parent); + dget(parent); spin_unlock(&entry->d_lock); dput(entry); entry = parent; diff --git a/fs/super.c b/fs/super.c index 84c4aafd6fae..aff046b0fe78 100644 --- a/fs/super.c +++ b/fs/super.c @@ -62,41 +62,10 @@ static struct super_block *alloc_super(struct file_system_type *type) s = NULL; goto out; } -#ifdef CONFIG_SMP - s->s_files = alloc_percpu(struct list_head); - if (!s->s_files) { - security_sb_free(s); - kfree(s); - s = NULL; - goto out; - } else { - int i; - - for_each_possible_cpu(i) - INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); - } -#else INIT_LIST_HEAD(&s->s_files); -#endif -#ifdef CONFIG_SMP - s->s_inodes = alloc_percpu(struct list_head); - if (!s->s_inodes) { - free_percpu(s->s_files); - security_sb_free(s); - kfree(s); - s = NULL; - goto out; - } else { - int i; - - for_each_possible_cpu(i) - INIT_LIST_HEAD(per_cpu_ptr(s->s_inodes, i)); - } -#else - INIT_LIST_HEAD(&s->s_inodes); -#endif INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); + INIT_LIST_HEAD(&s->s_inodes); INIT_LIST_HEAD(&s->s_dentry_lru); init_rwsem(&s->s_umount); mutex_init(&s->s_lock); @@ -148,10 +117,6 @@ out: */ static inline void destroy_super(struct super_block *s) { -#ifdef CONFIG_SMP - free_percpu(s->s_inodes); - free_percpu(s->s_files); -#endif security_sb_free(s); kfree(s->s_subtype); kfree(s->s_options); @@ -603,7 +568,7 @@ out: int do_remount_sb(struct super_block *sb, int flags, void *data, int force) { int retval; - int remount_rw, remount_ro; + int remount_rw; if (sb->s_frozen != SB_UNFROZEN) return -EBUSY; @@ -618,12 +583,9 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) shrink_dcache_sb(sb); sync_filesystem(sb); - remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); - remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); - /* If we are remounting RDONLY and current sb is read/write, make sure there are no rw files opened */ - if (remount_ro) { + if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { if (force) mark_files_ro(sb); else if (!fs_may_remount_ro(sb)) @@ -632,6 +594,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) if (retval < 0 && retval != -ENOSYS) return -EBUSY; } + remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); if (sb->s_op->remount_fs) { retval = sb->s_op->remount_fs(sb, &flags, data); @@ -641,14 +604,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); if (remount_rw) vfs_dq_quota_on_remount(sb); - /* Some filesystems modify their metadata via some other path - than the bdev buffer cache (eg. use a private mapping, or - directories in pagecache, etc). Also file data modifications - go via their own mappings. So If we try to mount readonly - then copy the filesystem from bdev, we could get stale data, - so invalidate it to give a best effort at coherency. */ - if (remount_ro && sb->s_bdev) - invalidate_bdev(sb->s_bdev); return 0; } diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index d63da9be14cc..33e047b59b8d 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -126,9 +126,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir, inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); return add_nondir(dentry, inode); } diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 1eed16ffdd68..552fb0111fff 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -557,9 +557,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, lock_2_inodes(dir, inode); inc_nlink(inode); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); inode->i_ctime = ubifs_current_time(inode); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index d020118df827..43f9d19a6f33 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -342,7 +342,7 @@ static void ubifs_delete_inode(struct inode *inode) goto out; dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); - ubifs_assert(!inode->i_count); + ubifs_assert(!atomic_read(&inode->i_count)); ubifs_assert(inode->i_nlink == 0); truncate_inode_pages(&inode->i_data, 0); diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 9e85540e9e01..cd2115060fdc 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1108,9 +1108,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir, inc_nlink(inode); inode->i_ctime = current_fs_time(inode->i_sb); mark_inode_dirty(inode); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); d_instantiate(dentry, inode); unlock_kernel(); diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index ece37bbb3ea4..4c26d9e8bc94 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -178,9 +178,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); error = ufs_add_nondir(dentry, inode); unlock_kernel(); diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index f78e1df5353c..225946012d0b 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -349,9 +349,7 @@ xfs_vn_link( if (unlikely(error)) return -error; - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); d_instantiate(dentry, inode); return 0; } diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index a1dc3a153ef9..c22a608321a3 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -539,7 +539,7 @@ DECLARE_EVENT_CLASS(xfs_inode_class, TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; - __entry->count = VFS_I(ip)->i_count; + __entry->count = atomic_read(&VFS_I(ip)->i_count); __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d ino 0x%llx count %d caller %pf", diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 53a14575bf3b..ec1f28c4fc4f 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -493,10 +493,8 @@ void xfs_mark_inode_dirty_sync(xfs_inode_t *); #define IHOLD(ip) \ do { \ - spin_lock(&VFS_I(ip)->i_lock); \ - ASSERT(&VFS_I(ip)->i_count > 0); \ - VFS_I(ip)->i_count++; \ - spin_unlock(&VFS_I(ip)->i_lock); \ + ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ + atomic_inc(&(VFS_I(ip)->i_count)); \ trace_xfs_ihold(ip, _THIS_IP_); \ } while (0) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 63b98bec6630..30b93b2a01a4 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -5,7 +5,6 @@ #include <linux/list.h> #include <linux/rculist.h> #include <linux/spinlock.h> -#include <linux/seqlock.h> #include <linux/cache.h> #include <linux/rcupdate.h> @@ -38,8 +37,8 @@ struct qstr { }; struct dentry_stat_t { - int nr_dentry; /* unused */ - int nr_unused; /* protected by dcache_lru_lock */ + int nr_dentry; + int nr_unused; int age_limit; /* age in seconds */ int want_pages; /* pages requested by system */ int dummy[2]; @@ -88,30 +87,20 @@ full_name_hash(const unsigned char *name, unsigned int len) #endif struct dentry { - /* - * The following 64 bytes of fields (on 64-bit) fit into a 64 byte - * cacheline. They are critical for path lookups. We can do most - * path lookups in 2 cachelines (these + name string) if we have - * correct sizing and alignment here. - * - * XXX: d_sb for revalidate needs to be duplicated into a d_flag. - */ atomic_t d_count; unsigned int d_flags; /* protected by d_lock */ spinlock_t d_lock; /* per dentry lock */ int d_mounted; - seqcount_t d_seq; /* per dentry seqlock */ struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ + /* + * The next three fields are touched by __d_lookup. Place them here + * so they all fit in a cache line. + */ struct hlist_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ struct qstr d_name; - const struct dentry_operations *d_op; - /* - * The following 64 bytes of lists tend to be required for tree - * manipulation, not required for lookups. - */ struct list_head d_lru; /* LRU list */ /* * d_child and d_rcu can share memory @@ -122,14 +111,10 @@ struct dentry { } d_u; struct list_head d_subdirs; /* our children */ struct list_head d_alias; /* inode alias list */ - - /* - * These following fields may be needed by some types of lookups, and - * d_iname is likely to be required too, so keep them together. - */ + unsigned long d_time; /* used by d_revalidate */ + const struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ void *d_fsdata; /* fs-specific data */ - unsigned long d_time; /* used by d_revalidate */ unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ }; @@ -165,44 +150,43 @@ struct dentry_operations { /* locking rules: - big lock d_lock may block -d_revalidate: no no yes -d_hash no no yes -d_compare: no yes no -d_delete: no no no -d_release: no no yes -d_iput: no no yes + big lock dcache_lock d_lock may block +d_revalidate: no no no yes +d_hash no no no yes +d_compare: no yes yes no +d_delete: no yes no no +d_release: no no no yes +d_iput: no no no yes */ /* d_flags entries */ #define DCACHE_AUTOFS_PENDING 0x0001 /* autofs: "under construction" */ -#define DCACHE_NFSFS_RENAMED 0x0002 - /* this dentry has been "silly renamed" and has to be deleted on the last - * dput() */ - -#define DCACHE_DISCONNECTED 0x0004 - /* This dentry is possibly not currently connected to the dcache tree, in - * which case its parent will either be itself, or will have this flag as - * well. nfsd will not use a dentry with this bit set, but will first - * endeavour to clear the bit either by discovering that it is connected, - * or by performing lookup operations. Any filesystem which supports - * nfsd_operations MUST have a lookup function which, if it finds a - * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that - * dentry into place and return that dentry rather than the passed one, - * typically using d_splice_alias. */ +#define DCACHE_NFSFS_RENAMED 0x0002 /* this dentry has been "silly + * renamed" and has to be + * deleted on the last dput() + */ +#define DCACHE_DISCONNECTED 0x0004 + /* This dentry is possibly not currently connected to the dcache tree, + * in which case its parent will either be itself, or will have this + * flag as well. nfsd will not use a dentry with this bit set, but will + * first endeavour to clear the bit either by discovering that it is + * connected, or by performing lookup operations. Any filesystem which + * supports nfsd_operations MUST have a lookup function which, if it finds + * a directory inode with a DCACHE_DISCONNECTED dentry, will d_move + * that dentry into place and return that dentry rather than the passed one, + * typically using d_splice_alias. + */ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ #define DCACHE_UNHASHED 0x0010 -#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 - /* Parent inode is watched by inotify */ + +#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched by inotify */ #define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */ -#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 - /* Parent inode is watched by some fsnotify listener */ -#define DCACHE_MOUNTED 0x0100 /* is a mountpoint */ -#define DCACHE_GENOCIDE 0x0200 /* being genocided */ +#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */ +extern spinlock_t dcache_lock; extern seqlock_t rename_lock; /** @@ -220,8 +204,23 @@ extern seqlock_t rename_lock; * * __d_drop requires dentry->d_lock. */ -void d_drop(struct dentry *dentry); -void __d_drop(struct dentry *dentry); + +static inline void __d_drop(struct dentry *dentry) +{ + if (!(dentry->d_flags & DCACHE_UNHASHED)) { + dentry->d_flags |= DCACHE_UNHASHED; + hlist_del_rcu(&dentry->d_hash); + } +} + +static inline void d_drop(struct dentry *dentry) +{ + spin_lock(&dcache_lock); + spin_lock(&dentry->d_lock); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); +} static inline int dname_external(struct dentry *dentry) { @@ -300,11 +299,9 @@ extern void d_move(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); /* appendix may either be NULL or be used for transname suffixes */ -extern struct dentry *d_lookup(struct dentry *, struct qstr *); -extern struct dentry *__d_lookup(struct dentry *, struct qstr *); -extern struct dentry *d_lookup_rcu(struct dentry *, struct qstr *); -extern struct dentry *__d_lookup_rcu(struct dentry *, struct qstr *); -extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); +extern struct dentry * d_lookup(struct dentry *, struct qstr *); +extern struct dentry * __d_lookup(struct dentry *, struct qstr *); +extern struct dentry * d_hash_and_lookup(struct dentry *, struct qstr *); /* validate "insecure" dentry pointer */ extern int d_validate(struct dentry *, struct dentry *); @@ -321,29 +318,28 @@ extern char *dentry_path(struct dentry *, char *, int); /* Allocation counts.. */ /** - * dget, dget_dlock - get a reference to a dentry + * dget, dget_locked - get a reference to a dentry * @dentry: dentry to get a reference to * * Given a dentry or %NULL pointer increment the reference count * if appropriate and return the dentry. A dentry will not be - * destroyed when it has references. + * destroyed when it has references. dget() should never be + * called for dentries with zero reference counter. For these cases + * (preferably none, functions in dcache.c are sufficient for normal + * needs and they take necessary precautions) you should hold dcache_lock + * and call dget_locked() instead of dget(). */ -static inline struct dentry *dget_dlock(struct dentry *dentry) -{ - if (dentry) - atomic_inc(&dentry->d_count); - return dentry; -} - + static inline struct dentry *dget(struct dentry *dentry) { if (dentry) { - dget_dlock(dentry); + BUG_ON(!atomic_read(&dentry->d_count)); + atomic_inc(&dentry->d_count); } return dentry; } -extern struct dentry *dget_parent(struct dentry *dentry); +extern struct dentry * dget_locked(struct dentry *); /** * d_unhashed - is dentry hashed @@ -362,6 +358,16 @@ static inline int d_unlinked(struct dentry *dentry) return d_unhashed(dentry) && !IS_ROOT(dentry); } +static inline struct dentry *dget_parent(struct dentry *dentry) +{ + struct dentry *ret; + + spin_lock(&dentry->d_lock); + ret = dget(dentry->d_parent); + spin_unlock(&dentry->d_lock); + return ret; +} + extern void dput(struct dentry *); static inline int d_mountpoint(struct dentry *dentry) diff --git a/include/linux/fs.h b/include/linux/fs.h index 4981e6ee3ba5..5191f49c2fec 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -406,8 +406,6 @@ extern struct files_stat_struct files_stat; extern int get_max_files(void); extern int sysctl_nr_open; extern struct inodes_stat_t inodes_stat; -extern struct percpu_counter nr_inodes; -extern int get_nr_inodes(void); extern int leases_enable, lease_break_time; #ifdef CONFIG_DNOTIFY extern int dir_notify_enable; @@ -727,15 +725,9 @@ struct inode { struct hlist_node i_hash; struct list_head i_list; /* backing dev IO list */ struct list_head i_sb_list; - union { - struct list_head i_dentry; - struct rcu_head i_rcu; - }; + struct list_head i_dentry; unsigned long i_ino; -#ifdef CONFIG_SMP - int i_sb_list_cpu; -#endif - unsigned int i_count; + atomic_t i_count; unsigned int i_nlink; uid_t i_uid; gid_t i_gid; @@ -932,9 +924,6 @@ struct file { #define f_vfsmnt f_path.mnt const struct file_operations *f_op; spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ -#ifdef CONFIG_SMP - int f_sb_list_cpu; -#endif atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; @@ -959,6 +948,9 @@ struct file { unsigned long f_mnt_write_state; #endif }; +extern spinlock_t files_lock; +#define file_list_lock() spin_lock(&files_lock); +#define file_list_unlock() spin_unlock(&files_lock); #define get_file(x) atomic_long_inc(&(x)->f_count) #define file_count(x) atomic_long_read(&(x)->f_count) @@ -1347,17 +1339,9 @@ struct super_block { #endif struct xattr_handler **s_xattr; - struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ -#ifdef CONFIG_SMP - struct list_head *s_inodes; -#else struct list_head s_inodes; /* all inodes */ -#endif -#ifdef CONFIG_SMP - struct list_head *s_files; -#else + struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ struct list_head s_files; -#endif /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ struct list_head s_dentry_lru; /* unused dentry lru */ int s_nr_dentry_unused; /* # of dentry on lru */ @@ -2053,7 +2037,6 @@ extern const struct file_operations read_pipefifo_fops; extern const struct file_operations write_pipefifo_fops; extern const struct file_operations rdwr_pipefifo_fops; -extern void mark_files_ro(struct super_block *sb); extern int fs_may_remount_ro(struct super_block *); #ifdef CONFIG_BLOCK @@ -2185,6 +2168,7 @@ extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struc extern int insert_inode_locked(struct inode *); extern void unlock_new_inode(struct inode *); +extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); extern void destroy_inode(struct inode *); @@ -2193,17 +2177,14 @@ extern struct inode *new_inode(struct super_block *); extern int should_remove_suid(struct dentry *); extern int file_remove_suid(struct file *); -extern void inode_sb_list_del(struct inode *inode); extern void __insert_inode_hash(struct inode *, unsigned long hashval); -extern void __remove_inode_hash(struct inode *); extern void remove_inode_hash(struct inode *); static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } -extern struct file * get_empty_filp(void); -extern void file_sb_list_add(struct file *f, struct super_block *sb); -extern void file_sb_list_del(struct file *f); +extern void file_move(struct file *f, struct list_head *list); +extern void file_kill(struct file *f); #ifdef CONFIG_BLOCK struct bio; extern void submit_bio(int, struct bio *); @@ -2404,20 +2385,10 @@ extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); extern void save_mount_options(struct super_block *sb, char *options); extern void replace_mount_options(struct super_block *sb, char *options); -static inline void __iget(struct inode *inode) -{ - assert_spin_locked(&inode->i_lock); - inode->i_count++; -} - static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; - /* - * Don't strictly need d_lock here? If the parent ino could change - * then surely we'd have a deeper race in the caller? - */ spin_lock(&dentry->d_lock); res = dentry->d_parent->d_inode->i_ino; spin_unlock(&dentry->d_lock); @@ -2493,8 +2464,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, struct ctl_table; int proc_nr_files(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -int proc_nr_inodes(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + int __init get_filesystem_list(char *buf); #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index e776fb5ed01a..4d6f47b51189 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -276,10 +276,10 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) { struct dentry *parent; + assert_spin_locked(&dcache_lock); assert_spin_locked(&dentry->d_lock); parent = dentry->d_parent; - /* XXX: after dcache_lock removal, there is a race with parent->d_inode and fsnotify_inode_watches_children. must fix */ if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode)) dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; else @@ -288,12 +288,15 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) /* * fsnotify_d_instantiate - instantiate a dentry for inode + * Called with dcache_lock held. */ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) { if (!inode) return; + assert_spin_locked(&dcache_lock); + spin_lock(&dentry->d_lock); __fsnotify_update_dcache_flags(dentry); spin_unlock(&dentry->d_lock); @@ -344,7 +347,7 @@ extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry); extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry); extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry); -extern void fsnotify_unmount_inodes(struct super_block *sb); +extern void fsnotify_unmount_inodes(struct list_head *list); /* put here because inotify does some weird stuff when destroying watches */ extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, @@ -374,7 +377,7 @@ static inline u32 fsnotify_get_cookie(void) return 0; } -static inline void fsnotify_unmount_inodes(struct super_block *sb) +static inline void fsnotify_unmount_inodes(struct list_head *list) {} #endif /* CONFIG_FSNOTIFY */ diff --git a/include/linux/inotify.h b/include/linux/inotify.h index e8bcd7c6c0cc..37ea2894b3c0 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -111,7 +111,7 @@ extern void inotify_inode_queue_event(struct inode *, __u32, __u32, const char *, struct inode *); extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32, const char *); -extern void inotify_unmount_inodes(struct super_block *); +extern void inotify_unmount_inodes(struct list_head *); extern void inotify_inode_is_dead(struct inode *); extern u32 inotify_get_cookie(void); @@ -161,7 +161,7 @@ static inline void inotify_dentry_parent_queue_event(struct dentry *dentry, { } -static inline void inotify_unmount_inodes(struct super_block *sb) +static inline void inotify_unmount_inodes(struct list_head *list) { } diff --git a/include/linux/mount.h b/include/linux/mount.h index 849e70535047..5d5275364867 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -31,13 +31,11 @@ struct mnt_namespace; #define MNT_SHRINKABLE 0x100 #define MNT_WRITE_HOLD 0x200 -#define MNT_MOUNTED 0x400 #define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ #define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ #define MNT_PNODE_MASK 0x3000 /* propagation flag mask */ - struct vfsmount { struct list_head mnt_hash; struct vfsmount *mnt_parent; /* fs we are mounted on */ @@ -58,6 +56,12 @@ struct vfsmount { struct mnt_namespace *mnt_ns; /* containing namespace */ int mnt_id; /* mount identifier */ int mnt_group_id; /* peer group identifier */ + /* + * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount + * to let these frequently modified fields in a separate cache line + * (so that reads of mnt_flags wont ping-pong on SMP machines) + */ + atomic_t mnt_count; int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; @@ -66,11 +70,6 @@ struct vfsmount { #else int mnt_writers; #endif -#ifdef CONFIG_SMP - int *mnt_count; -#else - int mnt_count; -#endif }; static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) @@ -82,28 +81,32 @@ static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) #endif } -struct file; /* forward dec */ - -extern void vfsmount_read_lock(int cpu); -extern void vfsmount_read_unlock(int cpu); -extern void vfsmount_write_lock(void); -extern void vfsmount_write_unlock(void); +static inline struct vfsmount *mntget(struct vfsmount *mnt) +{ + if (mnt) + atomic_inc(&mnt->mnt_count); + return mnt; +} -extern unsigned int count_mnt_count(struct vfsmount *mnt); +struct file; /* forward dec */ extern int mnt_want_write(struct vfsmount *mnt); extern int mnt_want_write_file(struct file *file); extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); - extern void mntput_no_expire(struct vfsmount *mnt); -extern struct vfsmount *mntget(struct vfsmount *mnt); -extern void mntput(struct vfsmount *mnt); - extern void mnt_pin(struct vfsmount *mnt); extern void mnt_unpin(struct vfsmount *mnt); extern int __mnt_is_readonly(struct vfsmount *mnt); +static inline void mntput(struct vfsmount *mnt) +{ + if (mnt) { + mnt->mnt_expiry_mark = 0; + mntput_no_expire(mnt); + } +} + extern struct vfsmount *do_kern_mount(const char *fstype, int flags, const char *name, void *data); @@ -120,6 +123,7 @@ extern int do_add_mount(struct vfsmount *newmnt, struct path *path, extern void mark_mounts_for_expiry(struct list_head *mounts); +extern spinlock_t vfsmount_lock; extern dev_t name_to_dev_t(char *name); #endif /* _LINUX_MOUNT_H */ diff --git a/include/linux/tty.h b/include/linux/tty.h index e5c5ba2327f1..42f207676016 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -465,7 +465,6 @@ extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty); extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); extern struct mutex tty_mutex; -extern spinlock_t tty_files_lock; extern void tty_write_unlock(struct tty_struct *tty); extern int tty_write_lock(struct tty_struct *tty, int ndelay); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 15e8bcd90cd1..76e8903cd204 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -9,8 +9,8 @@ struct backing_dev_info; -extern spinlock_t sb_inode_list_lock; -extern spinlock_t wb_inode_list_lock; +extern spinlock_t inode_lock; +extern struct list_head inode_in_use; extern struct list_head inode_unused; /* diff --git a/ipc/mqueue.c b/ipc/mqueue.c index f3a43f7747ca..01582e5fe998 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -237,16 +237,9 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void mqueue_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); - kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode)); -} - static void mqueue_destroy_inode(struct inode *inode) { - call_rcu(&inode->i_rcu, mqueue_i_callback); + kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode)); } static void mqueue_delete_inode(struct inode *inode) @@ -769,11 +762,8 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) } inode = dentry->d_inode; - if (inode) { - spin_lock(&inode->i_lock); - inode->i_count++; - spin_unlock(&inode->i_lock); - } + if (inode) + atomic_inc(&inode->i_count); err = mnt_want_write(ipc_ns->mq_mnt); if (err) goto out_err; diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index a4bdd2a18f1a..4b05bd9479db 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -720,8 +720,6 @@ int audit_tag_tree(char *old, char *new) struct vfsmount *mnt; struct dentry *dentry; int err; - int cpu = get_cpu(); - put_cpu(); err = kern_path(new, 0, &path); if (err) @@ -763,15 +761,15 @@ int audit_tag_tree(char *old, char *new) continue; } - vfsmount_read_lock(cpu); + spin_lock(&vfsmount_lock); if (!is_under(mnt, dentry, &path)) { - vfsmount_read_unlock(cpu); + spin_unlock(&vfsmount_lock); path_put(&path); put_tree(tree); mutex_lock(&audit_filter_mutex); continue; } - vfsmount_read_unlock(cpu); + spin_unlock(&vfsmount_lock); path_put(&path); list_for_each_entry(p, &list, mnt_list) { diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 84ad27652d40..11a8b34cfae0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -808,29 +808,25 @@ static void cgroup_clear_directory(struct dentry *dentry) struct list_head *node; BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); - spin_lock(&dentry->d_lock); + spin_lock(&dcache_lock); node = dentry->d_subdirs.next; while (node != &dentry->d_subdirs) { struct dentry *d = list_entry(node, struct dentry, d_u.d_child); - - spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); list_del_init(node); if (d->d_inode) { /* This should never be called on a cgroup * directory with child cgroups */ BUG_ON(d->d_inode->i_mode & S_IFDIR); - dget_dlock(d); - spin_unlock(&d->d_lock); - spin_unlock(&dentry->d_lock); + d = dget_locked(d); + spin_unlock(&dcache_lock); d_delete(d); simple_unlink(dentry->d_inode, d); dput(d); - spin_lock(&dentry->d_lock); - } else - spin_unlock(&d->d_lock); + spin_lock(&dcache_lock); + } node = dentry->d_subdirs.next; } - spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); } /* @@ -838,16 +834,11 @@ static void cgroup_clear_directory(struct dentry *dentry) */ static void cgroup_d_remove_dir(struct dentry *dentry) { - struct dentry *parent; - cgroup_clear_directory(dentry); - parent = dentry->d_parent; - spin_lock(&parent->d_lock); - spin_lock(&dentry->d_lock); + spin_lock(&dcache_lock); list_del_init(&dentry->d_u.d_child); - spin_unlock(&dentry->d_lock); - spin_unlock(&parent->d_lock); + spin_unlock(&dcache_lock); remove_dir(dentry); } @@ -3173,7 +3164,9 @@ again: list_del(&cgrp->sibling); cgroup_unlock_hierarchy(cgrp->root); + spin_lock(&cgrp->dentry->d_lock); d = dget(cgrp->dentry); + spin_unlock(&d->d_lock); cgroup_d_remove_dir(d); dput(d); diff --git a/kernel/futex.c b/kernel/futex.c index a6cec3270c78..e7a35f1039e7 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -168,9 +168,7 @@ static void get_futex_key_refs(union futex_key *key) switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: - spin_lock(&key->shared.inode->i_lock); - key->shared.inode->i_count++; - spin_unlock(&key->shared.inode->i_lock); + atomic_inc(&key->shared.inode->i_count); break; case FUT_OFF_MMSHARED: atomic_inc(&key->private.mm->mm_count); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e4baabf7893e..8a68b2448468 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1301,14 +1301,14 @@ static struct ctl_table fs_table[] = { .data = &inodes_stat, .maxlen = 2*sizeof(int), .mode = 0444, - .proc_handler = &proc_nr_inodes, + .proc_handler = proc_dointvec, }, { .procname = "inode-state", .data = &inodes_stat, .maxlen = 7*sizeof(int), .mode = 0444, - .proc_handler = &proc_nr_inodes, + .proc_handler = proc_dointvec, }, { .procname = "file-nr", @@ -1334,12 +1334,6 @@ static struct ctl_table fs_table[] = { .extra2 = &sysctl_nr_open_max, }, { - /* - * dentry_stat has an atomic_t member, so this is a bit of - * a hack, but it works for the moment, and I won't bother - * changing it now because we'll probably want to change to - * a more scalable counter anyway. - */ .procname = "dentry-state", .data = &dentry_stat, .maxlen = 6*sizeof(int), diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 8a0d9aa7b207..0e8ca0347707 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -71,7 +71,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) * RCU on the reader side */ nr_wb = nr_dirty = nr_io = nr_more_io = 0; - spin_lock(&wb_inode_list_lock); + spin_lock(&inode_lock); list_for_each_entry(wb, &bdi->wb_list, list) { nr_wb++; list_for_each_entry(inode, &wb->b_dirty, i_list) @@ -81,7 +81,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) list_for_each_entry(inode, &wb->b_more_io, i_list) nr_more_io++; } - spin_unlock(&wb_inode_list_lock); + spin_unlock(&inode_lock); get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); @@ -696,11 +696,11 @@ void bdi_destroy(struct backing_dev_info *bdi) if (bdi_has_dirty_io(bdi)) { struct bdi_writeback *dst = &default_backing_dev_info.wb; - spin_lock(&wb_inode_list_lock); + spin_lock(&inode_lock); list_splice(&bdi->wb.b_dirty, &dst->b_dirty); list_splice(&bdi->wb.b_io, &dst->b_io); list_splice(&bdi->wb.b_more_io, &dst->b_more_io); - spin_unlock(&wb_inode_list_lock); + spin_unlock(&inode_lock); } bdi_unregister(bdi); diff --git a/mm/shmem.c b/mm/shmem.c index d7905296c2d3..eef4ebea5158 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1882,9 +1882,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr dir->i_size += BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; inc_nlink(inode); - spin_lock(&inode->i_lock); - inode->i_count++; /* New dentry reference */ - spin_unlock(&inode->i_lock); + atomic_inc(&inode->i_count); /* New dentry reference */ dget(dentry); /* Extra pinning count for the created dentry */ d_instantiate(dentry, inode); out: @@ -2397,20 +2395,13 @@ static struct inode *shmem_alloc_inode(struct super_block *sb) return &p->vfs_inode; } -static void shmem_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); - kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); -} - static void shmem_destroy_inode(struct inode *inode) { if ((inode->i_mode & S_IFMT) == S_IFREG) { /* only struct inode is valid if it's an inline symlink */ mpol_free_shared_policy(&SHMEM_I(inode)->policy); } - call_rcu(&inode->i_rcu, shmem_i_callback); + kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); } static void init_once(void *foo) diff --git a/net/socket.c b/net/socket.c index 371eaf092a31..769c386bd428 100644 --- a/net/socket.c +++ b/net/socket.c @@ -263,19 +263,12 @@ static struct inode *sock_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void sock_i_callback(struct rcu_head *head) +static void sock_destroy_inode(struct inode *inode) { - struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(sock_inode_cachep, container_of(inode, struct socket_alloc, vfs_inode)); } -static void sock_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, sock_i_callback); -} - static void init_once(void *foo) { struct socket_alloc *ei = (struct socket_alloc *)foo; @@ -375,9 +368,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) &socket_file_ops); if (unlikely(!file)) { /* drop dentry, keep inode */ - spin_lock(&path.dentry->d_inode->i_lock); - path.dentry->d_inode->i_count++; - spin_unlock(&path.dentry->d_inode->i_lock); + atomic_inc(&path.dentry->d_inode->i_count); path_put(&path); put_unused_fd(fd); return -ENFILE; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 6792d5634fa0..27a23785a50d 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -162,17 +162,9 @@ rpc_alloc_inode(struct super_block *sb) } static void -rpc_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); - kmem_cache_free(rpc_inode_cachep, RPC_I(inode)); -} - -static void rpc_destroy_inode(struct inode *inode) { - call_rcu(&inode->i_rcu, rpc_i_callback); + kmem_cache_free(rpc_inode_cachep, RPC_I(inode)); } static int diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f013de205ea8..9a2ee845e9d4 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2246,7 +2246,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, tty = get_current_tty(); if (tty) { - spin_lock(&tty_files_lock); + file_list_lock(); if (!list_empty(&tty->tty_files)) { struct inode *inode; @@ -2262,7 +2262,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, drop_tty = 1; } } - spin_unlock(&tty_files_lock); + file_list_unlock(); tty_kref_put(tty); } /* Reset controlling tty. */ diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index a016c04e5d52..fab36fdf2769 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -943,28 +943,24 @@ static void sel_remove_entries(struct dentry *de) { struct list_head *node; - spin_lock(&de->d_lock); + spin_lock(&dcache_lock); node = de->d_subdirs.next; while (node != &de->d_subdirs) { struct dentry *d = list_entry(node, struct dentry, d_u.d_child); - - spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); list_del_init(node); if (d->d_inode) { - dget_dlock(d); - spin_unlock(&de->d_lock); - spin_unlock(&d->d_lock); + d = dget_locked(d); + spin_unlock(&dcache_lock); d_delete(d); simple_unlink(de->d_inode, d); dput(d); - spin_lock(&de->d_lock); - } else - spin_unlock(&d->d_lock); + spin_lock(&dcache_lock); + } node = de->d_subdirs.next; } - spin_unlock(&de->d_lock); + spin_unlock(&dcache_lock); } #define BOOL_DIR_NAME "booleans" diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index 4346c48964c3..18369d497eb8 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -93,21 +93,21 @@ int tomoyo_realpath_from_path2(struct path *path, char *newname, struct path root; struct path ns_root = { }; struct path tmp; - int cpu = get_cpu(); - put_cpu(); read_lock(¤t->fs->lock); root = current->fs->root; path_get(&root); read_unlock(¤t->fs->lock); - vfsmount_read_lock(cpu); + spin_lock(&vfsmount_lock); if (root.mnt && root.mnt->mnt_ns) ns_root.mnt = mntget(root.mnt->mnt_ns->root); if (ns_root.mnt) ns_root.dentry = dget(ns_root.mnt->mnt_root); + spin_unlock(&vfsmount_lock); + spin_lock(&dcache_lock); tmp = ns_root; sp = __d_path(path, &tmp, newname, newname_len); - vfsmount_read_unlock(cpu); + spin_unlock(&dcache_lock); path_put(&root); path_put(&ns_root); /* Prepend "/proc" prefix if using internal proc vfs mount. */ |