summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2010-07-13 15:57:14 +0200
committerThomas Gleixner <tglx@linutronix.de>2010-07-13 15:57:14 +0200
commitec646ea8db21abc1db436aac580a0464e460bd9d (patch)
treee1c137718bd94548589f0e2f2a89342c75e052da
parent596fc8ee275b6e4b441b6aa1e2c1a89aeeccb877 (diff)
downloadlwn-ec646ea8db21abc1db436aac580a0464e460bd9d.tar.gz
lwn-ec646ea8db21abc1db436aac580a0464e460bd9d.zip
vfs: Revert the scalability patches
We still have sporadic and hard to debug problems. Revert it for now and revisit with Nick's new version. Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--Documentation/filesystems/Locking2
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c8
-rw-r--r--drivers/char/pty.c6
-rw-r--r--drivers/char/tty_io.c27
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c8
-rw-r--r--drivers/staging/pohmelfs/path_entry.c17
-rw-r--r--drivers/usb/core/inode.c7
-rw-r--r--fs/affs/amigaffs.c4
-rw-r--r--fs/affs/inode.c4
-rw-r--r--fs/afs/dir.c4
-rw-r--r--fs/anon_inodes.c4
-rw-r--r--fs/autofs4/autofs_i.h3
-rw-r--r--fs/autofs4/expire.c102
-rw-r--r--fs/autofs4/inode.c19
-rw-r--r--fs/autofs4/root.c79
-rw-r--r--fs/autofs4/waitq.c23
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/block_dev.c24
-rw-r--r--fs/btrfs/inode.c20
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/coda/cache.c4
-rw-r--r--fs/coda/dir.c4
-rw-r--r--fs/configfs/configfs_internal.h4
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/configfs/inode.c8
-rw-r--r--fs/dcache.c921
-rw-r--r--fs/drop_caches.c40
-rw-r--r--fs/exofs/inode.c12
-rw-r--r--fs/exofs/namei.c4
-rw-r--r--fs/exportfs/expfs.c23
-rw-r--r--fs/ext2/namei.c4
-rw-r--r--fs/ext2/super.c9
-rw-r--r--fs/ext3/ialloc.c4
-rw-r--r--fs/ext3/namei.c4
-rw-r--r--fs/ext3/super.c9
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/fat/inode.c9
-rw-r--r--fs/file_table.c171
-rw-r--r--fs/filesystems.c1
-rw-r--r--fs/fs-writeback.c157
-rw-r--r--fs/gfs2/ops_inode.c4
-rw-r--r--fs/hfsplus/dir.c4
-rw-r--r--fs/hpfs/inode.c2
-rw-r--r--fs/hugetlbfs/inode.c12
-rw-r--r--fs/inode.c611
-rw-r--r--fs/jbd2/transaction.c5
-rw-r--r--fs/jffs2/dir.c8
-rw-r--r--fs/jfs/jfs_txnmgr.c4
-rw-r--r--fs/jfs/namei.c4
-rw-r--r--fs/libfs.c73
-rw-r--r--fs/locks.c3
-rw-r--r--fs/minix/namei.c4
-rw-r--r--fs/namei.c365
-rw-r--r--fs/namespace.c380
-rw-r--r--fs/ncpfs/dir.c8
-rw-r--r--fs/ncpfs/ncplib_kernel.h8
-rw-r--r--fs/nfs/dir.c7
-rw-r--r--fs/nfs/getroot.c10
-rw-r--r--fs/nfs/inode.c13
-rw-r--r--fs/nfs/namespace.c17
-rw-r--r--fs/nfs/nfs4state.c3
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/gcdat.c1
-rw-r--r--fs/nilfs2/mdt.c2
-rw-r--r--fs/nilfs2/namei.c4
-rw-r--r--fs/notify/fsnotify.c19
-rw-r--r--fs/notify/inode_mark.c126
-rw-r--r--fs/notify/inotify/inotify.c172
-rw-r--r--fs/ntfs/super.c4
-rw-r--r--fs/ocfs2/dcache.c10
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/open.c4
-rw-r--r--fs/pnode.c6
-rw-r--r--fs/proc/base.c6
-rw-r--r--fs/proc/inode.c9
-rw-r--r--fs/quota/dquot.c101
-rw-r--r--fs/reiserfs/file.c4
-rw-r--r--fs/reiserfs/namei.c4
-rw-r--r--fs/reiserfs/stree.c2
-rw-r--r--fs/seq_file.c8
-rw-r--r--fs/smbfs/cache.c10
-rw-r--r--fs/smbfs/dir.c8
-rw-r--r--fs/smbfs/proc.c8
-rw-r--r--fs/super.c53
-rw-r--r--fs/sysv/namei.c4
-rw-r--r--fs/ubifs/dir.c4
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/udf/namei.c4
-rw-r--r--fs/ufs/namei.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h2
-rw-r--r--fs/xfs/xfs_inode.h6
-rw-r--r--include/linux/dcache.h142
-rw-r--r--include/linux/fs.h50
-rw-r--r--include/linux/fsnotify_backend.h9
-rw-r--r--include/linux/inotify.h4
-rw-r--r--include/linux/mount.h40
-rw-r--r--include/linux/tty.h1
-rw-r--r--include/linux/writeback.h4
-rw-r--r--ipc/mqueue.c16
-rw-r--r--kernel/audit_tree.c8
-rw-r--r--kernel/cgroup.c27
-rw-r--r--kernel/futex.c4
-rw-r--r--kernel/sysctl.c10
-rw-r--r--mm/backing-dev.c8
-rw-r--r--mm/shmem.c13
-rw-r--r--net/socket.c13
-rw-r--r--net/sunrpc/rpc_pipe.c10
-rw-r--r--security/selinux/hooks.c4
-rw-r--r--security/selinux/selinuxfs.c16
-rw-r--r--security/tomoyo/realpath.c8
115 files changed, 1268 insertions, 3033 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index a4fd4040d4ac..18b9d0ca0630 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -17,7 +17,7 @@ prototypes:
void (*d_iput)(struct dentry *, struct inode *);
char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
-locking rules: XXX: update these!!
+locking rules:
none have BKL
dcache_lock rename_lock ->d_lock may block
d_revalidate: no no no yes
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index f078fe70d3cd..64a4c2d85f7c 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -1548,7 +1548,7 @@ static int spufs_mfc_open(struct inode *inode, struct file *file)
if (ctx->owner != current->mm)
return -EINVAL;
- if (inode->i_count != 1)
+ if (atomic_read(&inode->i_count) != 1)
return -EBUSY;
mutex_lock(&ctx->mapping_lock);
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 7f0737214788..fc1b1c42b1dc 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -158,18 +158,18 @@ static void spufs_prune_dir(struct dentry *dir)
mutex_lock(&dir->d_inode->i_mutex);
list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
+ spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
if (!(d_unhashed(dentry)) && dentry->d_inode) {
- dget_dlock(dentry);
+ dget_locked(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
simple_unlink(dir->d_inode, dentry);
- /* XXX: what was dcache_lock protecting here? Other
- * filesystems (IB, configfs) release dcache_lock
- * before unlink */
+ spin_unlock(&dcache_lock);
dput(dentry);
} else {
spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
}
}
shrink_dcache_parent(dir);
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 8fa273e76bb3..385c44b3034f 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -649,11 +649,7 @@ static int __ptmx_open(struct inode *inode, struct file *filp)
set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
filp->private_data = tty;
-
- file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
- spin_lock(&tty_files_lock);
- list_add(&filp->f_u.fu_list, &tty->tty_files);
- spin_unlock(&tty_files_lock);
+ file_move(filp, &tty->tty_files);
retval = devpts_pty_new(inode, tty->link);
if (retval)
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 44a7ce0c5f4d..9af676611759 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -136,9 +136,6 @@ LIST_HEAD(tty_drivers); /* linked list of tty drivers */
DEFINE_MUTEX(tty_mutex);
EXPORT_SYMBOL(tty_mutex);
-/* Spinlock to protect the tty->tty_files list */
-DEFINE_SPINLOCK(tty_files_lock);
-
static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
ssize_t redirected_tty_write(struct file *, const char __user *,
@@ -237,11 +234,11 @@ static int check_tty_count(struct tty_struct *tty, const char *routine)
struct list_head *p;
int count = 0;
- spin_lock(&tty_files_lock);
+ file_list_lock();
list_for_each(p, &tty->tty_files) {
count++;
}
- spin_unlock(&tty_files_lock);
+ file_list_unlock();
if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
tty->driver->subtype == PTY_TYPE_SLAVE &&
tty->link && tty->link->count)
@@ -519,7 +516,8 @@ static void do_tty_hangup(struct work_struct *work)
/* inuse_filps is protected by the single kernel lock */
lock_kernel();
check_tty_count(tty, "do_tty_hangup");
- spin_lock(&tty_files_lock);
+
+ file_list_lock();
/* This breaks for file handles being sent over AF_UNIX sockets ? */
list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
if (filp->f_op->write == redirected_tty_write)
@@ -530,7 +528,7 @@ static void do_tty_hangup(struct work_struct *work)
tty_fasync(-1, filp, 0); /* can't block */
filp->f_op = &hung_up_tty_fops;
}
- spin_unlock(&tty_files_lock);
+ file_list_unlock();
tty_ldisc_hangup(tty);
@@ -1421,9 +1419,9 @@ static void release_one_tty(struct work_struct *work)
tty_driver_kref_put(driver);
module_put(driver->owner);
- spin_lock(&tty_files_lock);
+ file_list_lock();
list_del_init(&tty->tty_files);
- spin_unlock(&tty_files_lock);
+ file_list_unlock();
put_pid(tty->pgrp);
put_pid(tty->session);
@@ -1668,10 +1666,7 @@ int tty_release(struct inode *inode, struct file *filp)
* - do_tty_hangup no longer sees this file descriptor as
* something that needs to be handled for hangups.
*/
- spin_lock(&tty_files_lock);
- BUG_ON(list_empty(&filp->f_u.fu_list));
- list_del_init(&filp->f_u.fu_list);
- spin_unlock(&tty_files_lock);
+ file_kill(filp);
filp->private_data = NULL;
/*
@@ -1840,11 +1835,7 @@ got_driver:
}
filp->private_data = tty;
- BUG_ON(list_empty(&filp->f_u.fu_list));
- file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
- spin_lock(&tty_files_lock);
- list_add(&filp->f_u.fu_list, &tty->tty_files);
- spin_unlock(&tty_files_lock);
+ file_move(filp, &tty->tty_files);
check_tty_count(tty, "tty_open");
if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
tty->driver->subtype == PTY_TYPE_MASTER)
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index ba08b0e6ea5a..100da8542bba 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -272,14 +272,18 @@ static int remove_file(struct dentry *parent, char *name)
goto bail;
}
+ spin_lock(&dcache_lock);
spin_lock(&tmp->d_lock);
if (!(d_unhashed(tmp) && tmp->d_inode)) {
- dget_dlock(tmp);
+ dget_locked(tmp);
__d_drop(tmp);
spin_unlock(&tmp->d_lock);
+ spin_unlock(&dcache_lock);
simple_unlink(parent->d_inode, tmp);
- } else
+ } else {
spin_unlock(&tmp->d_lock);
+ spin_unlock(&dcache_lock);
+ }
ret = 0;
bail:
diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c
index 9fd07a138dad..3bad888ced13 100644
--- a/drivers/staging/pohmelfs/path_entry.c
+++ b/drivers/staging/pohmelfs/path_entry.c
@@ -84,11 +84,10 @@ out:
int pohmelfs_path_length(struct pohmelfs_inode *pi)
{
struct dentry *d, *root, *first;
- int len;
- unsigned seq;
+ int len = 1; /* Root slash */
- first = d_find_alias(&pi->vfs_inode);
- if (!first) {
+ first = d = d_find_alias(&pi->vfs_inode);
+ if (!d) {
dprintk("%s: ino: %llu, mode: %o.\n", __func__, pi->ino, pi->vfs_inode.i_mode);
return -ENOENT;
}
@@ -97,11 +96,7 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi)
root = dget(current->fs->root.dentry);
read_unlock(&current->fs->lock);
-rename_retry:
- len = 1; /* Root slash */
- d = first;
- seq = read_seqbegin(&rename_lock);
- rcu_read_lock();
+ spin_lock(&dcache_lock);
if (!IS_ROOT(d) && d_unhashed(d))
len += UNHASHED_OBSCURE_STRING_SIZE; /* Obscure " (deleted)" string */
@@ -110,9 +105,7 @@ rename_retry:
len += d->d_name.len + 1; /* Plus slash */
d = d->d_parent;
}
- rcu_read_unlock();
- if (read_seqretry(&rename_lock, seq))
- goto rename_retry;
+ spin_unlock(&dcache_lock);
dput(root);
dput(first);
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 120500e1c96d..4a6366a42129 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -347,16 +347,17 @@ static int usbfs_empty (struct dentry *dentry)
{
struct list_head *list;
- spin_lock(&dentry->d_lock);
+ spin_lock(&dcache_lock);
+
list_for_each(list, &dentry->d_subdirs) {
struct dentry *de = list_entry(list, struct dentry, d_u.d_child);
if (usbfs_positive(de)) {
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
return 0;
}
}
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
return 1;
}
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 3a4557e8325c..7d0f0a30f7a3 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -128,7 +128,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
void *data = dentry->d_fsdata;
struct list_head *head, *next;
- spin_lock(&inode->i_lock);
+ spin_lock(&dcache_lock);
head = &inode->i_dentry;
next = head->next;
while (next != head) {
@@ -139,7 +139,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
}
next = next->next;
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
}
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 9af06f0b355c..3c4ec7d864c4 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -379,9 +379,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
mark_buffer_dirty_inode(inode_bh, inode);
inode->i_nlink = 2;
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
}
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 88106a018440..88067f36e5e7 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1007,9 +1007,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
if (ret < 0)
goto link_error;
- spin_lock(&vnode->vfs_inode.i_lock);
- vnode->vfs_inode.i_count++;
- spin_unlock(&vnode->vfs_inode.i_lock);
+ atomic_inc(&vnode->vfs_inode.i_count);
d_instantiate(dentry, &vnode->vfs_inode);
key_put(key);
_leave(" = 0");
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 81f82e7e7290..9f0bf13291e5 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -115,9 +115,7 @@ struct file *anon_inode_getfile(const char *name,
* so we can avoid doing an igrab() and we can use an open-coded
* atomic_inc().
*/
- spin_lock(&anon_inode_inode->i_lock);
- anon_inode_inode->i_count++;
- spin_unlock(&anon_inode_inode->i_lock);
+ atomic_inc(&anon_inode_inode->i_count);
path.dentry->d_op = &anon_inodefs_dentry_operations;
d_instantiate(path.dentry, anon_inode_inode);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 4ea26380a16b..0118d67221b2 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -16,7 +16,6 @@
#include <linux/auto_fs4.h>
#include <linux/auto_dev-ioctl.h>
#include <linux/mutex.h>
-#include <linux/spinlock.h>
#include <linux/list.h>
/* This is the range of ioctl() numbers we claim as ours */
@@ -66,8 +65,6 @@ struct rehash_entry {
struct list_head list;
};
-extern spinlock_t autofs4_lock;
-
/* Unified info structure. This is pointed to by both the dentry and
inode structures. Each file in the filesystem has an instance of this
structure. It holds a reference to the dentry, so dentries are never
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 60a7c6ce0a0d..74bc9aa6df31 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -93,59 +93,22 @@ done:
/*
* Calculate next entry in top down tree traversal.
* From next_mnt in namespace.c - elegant.
- *
- * How is this supposed to work if we drop autofs4_lock between calls anyway?
- * How does it cope with renames?
- * And also callers dput the returned dentry before taking autofs4_lock again
- * so what prevents it from being freed??
*/
-static struct dentry *get_next_positive_dentry(struct dentry *p,
- struct dentry *root)
+static struct dentry *next_dentry(struct dentry *p, struct dentry *root)
{
- struct list_head *next;
- struct dentry *ret;
+ struct list_head *next = p->d_subdirs.next;
- spin_lock(&autofs4_lock);
-again:
- spin_lock(&p->d_lock);
- next = p->d_subdirs.next;
if (next == &p->d_subdirs) {
while (1) {
- struct dentry *parent;
-
- if (p == root) {
- spin_unlock(&p->d_lock);
- spin_unlock(&autofs4_lock);
+ if (p == root)
return NULL;
- }
-
- parent = p->d_parent;
- if (!spin_trylock(&parent->d_lock)) {
- spin_unlock(&p->d_lock);
- goto again;
- }
- spin_unlock(&p->d_lock);
next = p->d_u.d_child.next;
- p = parent;
- if (next != &parent->d_subdirs)
+ if (next != &p->d_parent->d_subdirs)
break;
+ p = p->d_parent;
}
}
- ret = list_entry(next, struct dentry, d_u.d_child);
-
- spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
- /* Negative dentry - try next */
- if (!simple_positive(ret)) {
- spin_unlock(&ret->d_lock);
- p = ret;
- goto again;
- }
- dget_dlock(ret);
- spin_unlock(&ret->d_lock);
- spin_unlock(&p->d_lock);
- spin_unlock(&autofs4_lock);
-
- return ret;
+ return list_entry(next, struct dentry, d_u.d_child);
}
/*
@@ -195,11 +158,18 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
if (!simple_positive(top))
return 1;
- for (p = dget(top); p; p = get_next_positive_dentry(p, top)) {
+ spin_lock(&dcache_lock);
+ for (p = top; p; p = next_dentry(p, top)) {
+ /* Negative dentry - give up */
+ if (!simple_positive(p))
+ continue;
DPRINTK("dentry %p %.*s",
p, (int) p->d_name.len, p->d_name.name);
+ p = dget(p);
+ spin_unlock(&dcache_lock);
+
/*
* Is someone visiting anywhere in the subtree ?
* If there's no mount we need to check the usage
@@ -235,7 +205,9 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
}
}
dput(p);
+ spin_lock(&dcache_lock);
}
+ spin_unlock(&dcache_lock);
/* Timeout of a tree mount is ultimately determined by its top dentry */
if (!autofs4_can_expire(top, timeout, do_now))
@@ -254,11 +226,18 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
DPRINTK("parent %p %.*s",
parent, (int)parent->d_name.len, parent->d_name.name);
- for (p = dget(parent); p; p = get_next_positive_dentry(p, parent)) {
+ spin_lock(&dcache_lock);
+ for (p = parent; p; p = next_dentry(p, parent)) {
+ /* Negative dentry - give up */
+ if (!simple_positive(p))
+ continue;
DPRINTK("dentry %p %.*s",
p, (int) p->d_name.len, p->d_name.name);
+ p = dget(p);
+ spin_unlock(&dcache_lock);
+
if (d_mountpoint(p)) {
/* Can we umount this guy */
if (autofs4_mount_busy(mnt, p))
@@ -270,7 +249,9 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
}
cont:
dput(p);
+ spin_lock(&dcache_lock);
}
+ spin_unlock(&dcache_lock);
return NULL;
}
@@ -295,10 +276,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
struct autofs_info *ino = autofs4_dentry_ino(root);
if (d_mountpoint(root)) {
ino->flags |= AUTOFS_INF_MOUNTPOINT;
- spin_lock(&root->d_lock);
- WARN_ON(root->d_mounted == 0);
root->d_mounted--;
- spin_unlock(&root->d_lock);
}
ino->flags |= AUTOFS_INF_EXPIRING;
autofs4_add_expiring(root);
@@ -317,8 +295,6 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
* A tree is eligible if :-
* - it is unused by any user process
* - it has been unused for exp_timeout time
- * This seems to be racy dropping autofs4_lock and asking for next->next after
- * the lock has been dropped.
*/
struct dentry *autofs4_expire_indirect(struct super_block *sb,
struct vfsmount *mnt,
@@ -340,8 +316,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
now = jiffies;
timeout = sbi->exp_timeout;
- spin_lock(&autofs4_lock);
- spin_lock(&root->d_lock);
+ spin_lock(&dcache_lock);
next = root->d_subdirs.next;
/* On exit from the loop expire is set to a dgot dentry
@@ -355,11 +330,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
continue;
}
- spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- dentry = dget_dlock(dentry);
- spin_unlock(&dentry->d_lock);
- spin_unlock(&root->d_lock);
- spin_unlock(&autofs4_lock);
+ dentry = dget(dentry);
+ spin_unlock(&dcache_lock);
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
@@ -424,12 +396,10 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
next:
spin_unlock(&sbi->fs_lock);
dput(dentry);
- spin_lock(&autofs4_lock);
- spin_lock(&root->d_lock);
+ spin_lock(&dcache_lock);
next = next->next;
}
- spin_unlock(&root->d_lock);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
return NULL;
found:
@@ -440,13 +410,9 @@ found:
autofs4_add_expiring(expired);
init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
- spin_lock(&autofs4_lock);
- spin_lock(&expired->d_parent->d_lock);
- spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
+ spin_lock(&dcache_lock);
list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
- spin_unlock(&expired->d_lock);
- spin_unlock(&expired->d_parent->d_lock);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
return expired;
}
@@ -536,9 +502,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
spin_lock(&sbi->fs_lock);
if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
- spin_lock(&sb->s_root->d_lock);
sb->s_root->d_mounted++;
- spin_unlock(&sb->s_root->d_lock);
ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
}
ino->flags &= ~AUTOFS_INF_EXPIRING;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 0b9c391ddeb6..d0a3de247458 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -111,9 +111,8 @@ static void autofs4_force_release(struct autofs_sb_info *sbi)
if (!sbi->sb->s_root)
return;
- spin_lock(&autofs4_lock);
+ spin_lock(&dcache_lock);
repeat:
- spin_lock(&this_parent->d_lock);
next = this_parent->d_subdirs.next;
resume:
while (next != &this_parent->d_subdirs) {
@@ -126,39 +125,33 @@ resume:
}
if (!list_empty(&dentry->d_subdirs)) {
- spin_unlock(&this_parent->d_lock);
this_parent = dentry;
goto repeat;
}
next = next->next;
- spin_unlock(&this_parent->d_lock);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
DPRINTK("dentry %p %.*s",
dentry, (int)dentry->d_name.len, dentry->d_name.name);
dput(dentry);
- spin_lock(&autofs4_lock);
- spin_lock(&this_parent->d_lock);
+ spin_lock(&dcache_lock);
}
if (this_parent != sbi->sb->s_root) {
struct dentry *dentry = this_parent;
next = this_parent->d_u.d_child.next;
- spin_unlock(&this_parent->d_lock);
this_parent = this_parent->d_parent;
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
DPRINTK("parent dentry %p %.*s",
dentry, (int)dentry->d_name.len, dentry->d_name.name);
dput(dentry);
- spin_lock(&autofs4_lock);
- spin_lock(&this_parent->d_lock);
+ spin_lock(&dcache_lock);
goto resume;
}
- spin_unlock(&this_parent->d_lock);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
}
void autofs4_kill_sb(struct super_block *sb)
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 40ca9360caef..30cc9ddf4b70 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -17,11 +17,8 @@
#include <linux/stat.h>
#include <linux/param.h>
#include <linux/time.h>
-#include <linux/spinlock.h>
#include "autofs_i.h"
-DEFINE_SPINLOCK(autofs4_lock);
-
static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
static int autofs4_dir_unlink(struct inode *,struct dentry *);
static int autofs4_dir_rmdir(struct inode *,struct dentry *);
@@ -228,15 +225,12 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
* autofs file system so just let the libfs routines handle
* it.
*/
- spin_lock(&autofs4_lock);
- spin_lock(&dentry->d_lock);
+ spin_lock(&dcache_lock);
if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
- spin_unlock(&dentry->d_lock);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
return -ENOENT;
}
- spin_unlock(&dentry->d_lock);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
out:
return dcache_dir_open(inode, file);
@@ -305,9 +299,9 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
/* We trigger a mount for almost all flags */
lookup_type = autofs4_need_mount(nd->flags);
spin_lock(&sbi->fs_lock);
- spin_lock(&autofs4_lock);
+ spin_lock(&dcache_lock);
if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) {
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
spin_unlock(&sbi->fs_lock);
goto follow;
}
@@ -317,11 +311,10 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
* multi-mount with no root mount offset. So don't try to
* mount it again.
*/
- spin_lock(&dentry->d_lock);
if (ino->flags & AUTOFS_INF_PENDING ||
(!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
ino->flags |= AUTOFS_INF_PENDING;
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
spin_unlock(&sbi->fs_lock);
status = try_to_fill_dentry(dentry);
@@ -329,16 +322,14 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
spin_lock(&sbi->fs_lock);
ino->flags &= ~AUTOFS_INF_PENDING;
spin_unlock(&sbi->fs_lock);
- spin_unlock(&autofs4_lock);
if (status)
goto out_error;
goto follow;
}
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
spin_unlock(&sbi->fs_lock);
- spin_unlock(&autofs4_lock);
follow:
/*
* If there is no root mount it must be an autofs
@@ -389,7 +380,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
mutex_aquired = mutex_trylock(&dir->i_mutex);
spin_lock(&sbi->fs_lock);
- spin_lock(&autofs4_lock);
+ spin_lock(&dcache_lock);
/* Pending dentry */
if (autofs4_ispending(dentry)) {
int status;
@@ -403,11 +394,11 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
ino->flags |= AUTOFS_INF_PENDING;
if (!mutex_aquired) {
autofs4_revalidate_drop(dentry, entry);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
spin_unlock(&sbi->fs_lock);
return 0;
}
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
spin_unlock(&sbi->fs_lock);
mutex_unlock(&dir->i_mutex);
kfree(entry);
@@ -454,11 +445,11 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
ino->flags |= AUTOFS_INF_PENDING;
if (!mutex_aquired) {
autofs4_revalidate_drop(dentry, entry);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
spin_unlock(&sbi->fs_lock);
return 0;
}
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
spin_unlock(&sbi->fs_lock);
mutex_unlock(&dir->i_mutex);
kfree(entry);
@@ -479,7 +470,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
return status;
}
}
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
spin_unlock(&sbi->fs_lock);
if (mutex_aquired)
@@ -553,7 +544,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
struct list_head *p, *head;
restart:
- spin_lock(&autofs4_lock);
+ spin_lock(&dcache_lock);
spin_lock(&sbi->lookup_lock);
head = &sbi->active_list;
list_for_each(p, head) {
@@ -567,15 +558,15 @@ restart:
spin_lock(&active->d_lock);
/* Already gone? */
- if (atomic_read(&dentry->d_count) == 0)
+ if (atomic_read(&active->d_count) == 0)
goto next;
if (active->d_inode && IS_DEADDIR(active->d_inode)) {
if (!list_empty(&ino->rehash_list)) {
- dget_dlock(active);
+ dget(active);
spin_unlock(&active->d_lock);
spin_unlock(&sbi->lookup_lock);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
autofs4_remove_rehash_entrys(ino);
dput(active);
goto restart;
@@ -595,16 +586,16 @@ restart:
if (memcmp(qstr->name, str, len))
goto next;
- dget_dlock(active);
+ dget(active);
spin_unlock(&active->d_lock);
spin_unlock(&sbi->lookup_lock);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
return active;
next:
spin_unlock(&active->d_lock);
}
spin_unlock(&sbi->lookup_lock);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
return NULL;
}
@@ -619,7 +610,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
const unsigned char *str = name->name;
struct list_head *p, *head;
- spin_lock(&autofs4_lock);
+ spin_lock(&dcache_lock);
spin_lock(&sbi->lookup_lock);
head = &sbi->expiring_list;
list_for_each(p, head) {
@@ -648,16 +639,16 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
if (memcmp(qstr->name, str, len))
goto next;
- dget_dlock(expiring);
+ dget(expiring);
spin_unlock(&expiring->d_lock);
spin_unlock(&sbi->lookup_lock);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
return expiring;
next:
spin_unlock(&expiring->d_lock);
}
spin_unlock(&sbi->lookup_lock);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
return NULL;
}
@@ -918,15 +909,11 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
dir->i_mtime = CURRENT_TIME;
- spin_lock(&autofs4_lock);
- spin_lock(&sbi->lookup_lock);
- if (list_empty(&ino->expiring))
- list_add(&ino->expiring, &sbi->expiring_list);
- spin_unlock(&sbi->lookup_lock);
+ spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
return 0;
}
@@ -943,21 +930,15 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
if (!autofs4_oz_mode(sbi))
return -EACCES;
- spin_lock(&autofs4_lock);
- spin_lock(&sbi->lookup_lock);
- spin_lock(&dentry->d_lock);
+ spin_lock(&dcache_lock);
if (!list_empty(&dentry->d_subdirs)) {
- spin_unlock(&dentry->d_lock);
- spin_unlock(&sbi->lookup_lock);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
return -ENOTEMPTY;
}
- if (list_empty(&ino->expiring))
- list_add(&ino->expiring, &sbi->expiring_list);
- spin_unlock(&sbi->lookup_lock);
+ spin_lock(&dentry->d_lock);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&autofs4_lock);
+ spin_unlock(&dcache_lock);
if (atomic_dec_and_test(&ino->count)) {
p_ino = autofs4_dentry_ino(dentry->d_parent);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index c5f8459c905e..2341375386f8 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -186,26 +186,16 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
{
struct dentry *root = sbi->sb->s_root;
struct dentry *tmp;
- char *buf;
+ char *buf = *name;
char *p;
- int len;
- unsigned seq;
+ int len = 0;
-rename_retry:
- buf = *name;
- len = 0;
-
- seq = read_seqbegin(&rename_lock);
- rcu_read_lock();
- spin_lock(&autofs4_lock);
+ spin_lock(&dcache_lock);
for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
len += tmp->d_name.len + 1;
if (!len || --len > NAME_MAX) {
- spin_unlock(&autofs4_lock);
- rcu_read_unlock();
- if (read_seqretry(&rename_lock, seq))
- goto rename_retry;
+ spin_unlock(&dcache_lock);
return 0;
}
@@ -218,10 +208,7 @@ rename_retry:
p -= tmp->d_name.len;
strncpy(p, tmp->d_name.name, tmp->d_name.len);
}
- spin_unlock(&autofs4_lock);
- rcu_read_unlock();
- if (read_seqretry(&rename_lock, seq))
- goto rename_retry;
+ spin_unlock(&dcache_lock);
return len;
}
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index a822829df2f2..1e41aadb1068 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -178,9 +178,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
inc_nlink(inode);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
d_instantiate(new, inode);
mutex_unlock(&info->bfs_lock);
return 0;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 37a7a1f12329..8db62b2b6df8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -435,20 +435,13 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void bdev_i_callback(struct rcu_head *head)
+static void bdev_destroy_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
struct bdev_inode *bdi = BDEV_I(inode);
- INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(bdev_cachep, bdi);
}
-static void bdev_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, bdev_i_callback);
-}
-
static void init_once(void *foo)
{
struct bdev_inode *ei = (struct bdev_inode *) foo;
@@ -594,12 +587,7 @@ EXPORT_SYMBOL(bdget);
*/
struct block_device *bdgrab(struct block_device *bdev)
{
- struct inode *inode = bdev->bd_inode;
-
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
-
+ atomic_inc(&bdev->bd_inode->i_count);
return bdev;
}
@@ -629,9 +617,7 @@ static struct block_device *bd_acquire(struct inode *inode)
spin_lock(&bdev_lock);
bdev = inode->i_bdev;
if (bdev) {
- spin_lock(&inode->i_lock);
- bdev->bd_inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&bdev->bd_inode->i_count);
spin_unlock(&bdev_lock);
return bdev;
}
@@ -647,9 +633,7 @@ static struct block_device *bd_acquire(struct inode *inode)
* So, we can access it via ->i_mapping always
* without igrab().
*/
- spin_lock(&inode->i_lock);
- bdev->bd_inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&bdev->bd_inode->i_count);
inode->i_bdev = bdev;
inode->i_mapping = bdev->bd_inode->i_mapping;
list_add(&inode->i_devices, &bdev->bd_inodes);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3288f3a2899e..4deb280f8969 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1991,14 +1991,8 @@ void btrfs_add_delayed_iput(struct inode *inode)
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct delayed_iput *delayed;
- spin_lock(&inode->i_lock);
- if (inode->i_count == 1) {
- spin_unlock(&inode->i_lock);
+ if (atomic_add_unless(&inode->i_count, -1, 1))
return;
- }
- inode->i_count--;
- spin_unlock(&inode->i_lock);
-
delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
delayed->inode = inode;
@@ -3606,14 +3600,8 @@ again:
objectid = entry->vfs_inode.i_ino + 1;
inode = igrab(&entry->vfs_inode);
if (inode) {
- int count;
spin_unlock(&root->inode_lock);
-
- spin_lock(&inode->i_lock);
- count = inode->i_count;
- spin_unlock(&inode->i_lock);
-
- if (count > 1)
+ if (atomic_read(&inode->i_count) > 1)
d_prune_aliases(inode);
/*
* btrfs_drop_inode will remove it from
@@ -4458,9 +4446,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, dir);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
err = btrfs_add_nondir(trans, dentry, inode, 1, index);
diff --git a/fs/buffer.c b/fs/buffer.c
index 416a2686ec66..b34323cfe2da 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1145,7 +1145,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
* inode list.
*
* mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
- * and mapping->tree_lock.
+ * mapping->tree_lock and the global inode_lock.
*/
void mark_buffer_dirty(struct buffer_head *bh)
{
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 3d5ccbd5cef3..7ec8555cf164 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1442,7 +1442,7 @@ int cifs_revalidate(struct dentry *direntry)
}
cFYI(1, ("Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld "
"jiffies %ld", full_path, direntry->d_inode,
- direntry->d_inode->i_count, direntry,
+ direntry->d_inode->i_count.counter, direntry,
direntry->d_time, jiffies));
if (cifsInode->time == 0) {
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index e0ff265e49c5..a5bf5771a22a 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -86,7 +86,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
struct list_head *child;
struct dentry *de;
- spin_lock(&parent->d_lock);
+ spin_lock(&dcache_lock);
list_for_each(child, &parent->d_subdirs)
{
de = list_entry(child, struct dentry, d_u.d_child);
@@ -95,7 +95,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
continue;
coda_flag_inode(de->d_inode, flag);
}
- spin_unlock(&parent->d_lock);
+ spin_unlock(&dcache_lock);
return;
}
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 4d3bbd8514ac..4bb9d0a5decc 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -302,9 +302,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode,
}
coda_dir_update_mtime(dir_inode);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
d_instantiate(de, inode);
inc_nlink(inode);
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 026cf68553a4..da6061a6df40 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -120,7 +120,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
{
struct config_item * item = NULL;
- spin_lock(&dentry->d_lock);
+ spin_lock(&dcache_lock);
if (!d_unhashed(dentry)) {
struct configfs_dirent * sd = dentry->d_fsdata;
if (sd->s_type & CONFIGFS_ITEM_LINK) {
@@ -129,7 +129,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
} else
item = config_item_get(sd->s_element);
}
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
return item;
}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index c3638b63b8ce..8e48b52205aa 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -400,7 +400,7 @@ static void remove_dir(struct dentry * d)
simple_rmdir(parent->d_inode,d);
pr_debug(" o %s removing done (%d)\n",d->d_name.name,
- atomic_read(&d->d_count));
+ atomic_read(&d->d_count));
dput(parent);
}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index aa8e83ed0f0b..a2f746066c5d 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -254,14 +254,18 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
struct dentry * dentry = sd->s_dentry;
if (dentry) {
+ spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
if (!(d_unhashed(dentry) && dentry->d_inode)) {
- dget_dlock(dentry);
+ dget_locked(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
simple_unlink(parent->d_inode, dentry);
- } else
+ } else {
spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ }
}
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 18a3b762297c..116fd33f564b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,34 +35,13 @@
#include <linux/hardirq.h>
#include "internal.h"
-/*
- * Usage:
- * dcache->d_inode->i_lock protects:
- * - the inode alias lists, d_inode
- * dcache_hash_bucket->lock protects:
- * - the dcache hash table
- * dcache_lru_lock protects:
- * - the dcache lru lists and counters
- * d_lock protects:
- * - d_flags
- * - d_name
- * - d_lru
- * - d_unhashed
- * - d_subdirs and children's d_child
- *
- * Ordering:
- * dcache->d_inode->i_lock
- * dentry->d_lock
- * dcache_lru_lock
- * dcache_hash_bucket->lock
- */
int sysctl_vfs_cache_pressure __read_mostly = 100;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
-EXPORT_SYMBOL(rename_lock);
+EXPORT_SYMBOL(dcache_lock);
static struct kmem_cache *dentry_cache __read_mostly;
@@ -81,27 +60,13 @@ static struct kmem_cache *dentry_cache __read_mostly;
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
-
-struct dcache_hash_bucket {
- spinlock_t lock;
- struct hlist_head head;
-};
-static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
+static struct hlist_head *dentry_hashtable __read_mostly;
/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {
- .nr_dentry = 0,
.age_limit = 45,
};
-static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
- unsigned long hash)
-{
- hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
- hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
- return dentry_hashtable + (hash & D_HASHMASK);
-}
-
static void __d_free(struct dentry *dentry)
{
WARN_ON(!list_empty(&dentry->d_alias));
@@ -117,11 +82,11 @@ static void d_callback(struct rcu_head *head)
}
/*
- * no locks, please.
+ * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry
+ * inside dcache_lock.
*/
static void d_free(struct dentry *dentry)
{
- BUG_ON(atomic_read(&dentry->d_count));
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
/* if dentry was never inserted into hash, immediate free is OK */
@@ -137,13 +102,14 @@ static void d_free(struct dentry *dentry)
*/
static void dentry_iput(struct dentry * dentry)
__releases(dentry->d_lock)
+ __releases(dcache_lock)
{
struct inode *inode = dentry->d_inode;
if (inode) {
dentry->d_inode = NULL;
list_del_init(&dentry->d_alias);
spin_unlock(&dentry->d_lock);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
if (!inode->i_nlink)
fsnotify_inoderemove(inode);
if (dentry->d_op && dentry->d_op->d_iput)
@@ -152,60 +118,42 @@ static void dentry_iput(struct dentry * dentry)
iput(inode);
} else {
spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
}
}
/*
- * dentry_lru_(add|add_tail|del|del_init) must be called with d_lock held
- * to protect list_empty(d_lru) condition.
+ * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
*/
static void dentry_lru_add(struct dentry *dentry)
{
- spin_lock(&dcache_lru_lock);
list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
dentry->d_sb->s_nr_dentry_unused++;
dentry_stat.nr_unused++;
- spin_unlock(&dcache_lru_lock);
}
static void dentry_lru_add_tail(struct dentry *dentry)
{
- spin_lock(&dcache_lru_lock);
list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
dentry->d_sb->s_nr_dentry_unused++;
dentry_stat.nr_unused++;
- spin_unlock(&dcache_lru_lock);
-}
-
-static void __dentry_lru_del(struct dentry *dentry)
-{
- list_del(&dentry->d_lru);
- dentry->d_sb->s_nr_dentry_unused--;
- dentry_stat.nr_unused--;
-}
-
-static void __dentry_lru_del_init(struct dentry *dentry)
-{
- list_del_init(&dentry->d_lru);
- dentry->d_sb->s_nr_dentry_unused--;
- dentry_stat.nr_unused--;
}
static void dentry_lru_del(struct dentry *dentry)
{
if (!list_empty(&dentry->d_lru)) {
- spin_lock(&dcache_lru_lock);
- __dentry_lru_del(dentry);
- spin_unlock(&dcache_lru_lock);
+ list_del(&dentry->d_lru);
+ dentry->d_sb->s_nr_dentry_unused--;
+ dentry_stat.nr_unused--;
}
}
static void dentry_lru_del_init(struct dentry *dentry)
{
if (likely(!list_empty(&dentry->d_lru))) {
- spin_lock(&dcache_lru_lock);
- __dentry_lru_del_init(dentry);
- spin_unlock(&dcache_lru_lock);
+ list_del_init(&dentry->d_lru);
+ dentry->d_sb->s_nr_dentry_unused--;
+ dentry_stat.nr_unused--;
}
}
@@ -216,87 +164,25 @@ static void dentry_lru_del_init(struct dentry *dentry)
* The dentry must already be unhashed and removed from the LRU.
*
* If this is the root of the dentry tree, return NULL.
- *
- * d_lock and d_parent->d_lock must be held by caller, and
- * are dropped by d_kill.
*/
static struct dentry *d_kill(struct dentry *dentry)
__releases(dentry->d_lock)
+ __releases(dcache_lock)
{
struct dentry *parent;
list_del(&dentry->d_u.d_child);
- if (dentry->d_parent && dentry != dentry->d_parent)
- spin_unlock(&dentry->d_parent->d_lock);
+ dentry_stat.nr_dentry--; /* For d_free, below */
+ /*drops the locks, at that point nobody can reach this dentry */
+ dentry_iput(dentry);
if (IS_ROOT(dentry))
parent = NULL;
else
parent = dentry->d_parent;
- /*drops the locks, at that point nobody can reach this dentry */
- dentry_iput(dentry);
d_free(dentry);
return parent;
}
-void __d_drop(struct dentry *dentry)
-{
- if (!(dentry->d_flags & DCACHE_UNHASHED)) {
- struct dcache_hash_bucket *b;
- b = d_hash(dentry->d_parent, dentry->d_name.hash);
- dentry->d_flags |= DCACHE_UNHASHED;
- spin_lock(&b->lock);
- hlist_del_rcu(&dentry->d_hash);
- spin_unlock(&b->lock);
- }
-}
-EXPORT_SYMBOL(__d_drop);
-
-void d_drop(struct dentry *dentry)
-{
- spin_lock(&dentry->d_lock);
- __d_drop(dentry);
- spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL(d_drop);
-
-static inline struct dentry *__dget_dlock(struct dentry *dentry)
-{
- atomic_inc(&dentry->d_count);
- return dentry;
-}
-
-static inline struct dentry *__dget(struct dentry *dentry)
-{
- __dget_dlock(dentry);
- return dentry;
-}
-
-struct dentry *dget_parent(struct dentry *dentry)
-{
- struct dentry *ret;
-
-repeat:
- spin_lock(&dentry->d_lock);
- ret = dentry->d_parent;
- if (!ret)
- goto out;
- if (dentry == ret) {
- atomic_inc(&ret->d_count);
- goto out;
- }
- if (!spin_trylock(&ret->d_lock)) {
- spin_unlock(&dentry->d_lock);
- goto repeat;
- }
- BUG_ON(!atomic_read(&ret->d_count));
- atomic_inc(&ret->d_count);
- spin_unlock(&ret->d_lock);
-out:
- spin_unlock(&dentry->d_lock);
- return ret;
-}
-EXPORT_SYMBOL(dget_parent);
-
/*
* This is dput
*
@@ -328,68 +214,48 @@ EXPORT_SYMBOL(dget_parent);
void dput(struct dentry *dentry)
{
- struct dentry *parent;
- struct inode *inode;
-
if (!dentry)
return;
repeat:
if (atomic_read(&dentry->d_count) == 1)
might_sleep();
-
- if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
+ if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
return;
+ spin_lock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ return;
+ }
/*
* AV: ->d_delete() is _NOT_ allowed to block now.
*/
if (dentry->d_op && dentry->d_op->d_delete) {
- if (dentry->d_op->d_delete(dentry)) {
- __d_drop(dentry);
- goto kill_it;
- }
+ if (dentry->d_op->d_delete(dentry))
+ goto unhash_it;
}
/* Unreachable? Get rid of it */
- if (d_unhashed(dentry))
+ if (d_unhashed(dentry))
goto kill_it;
- if (list_empty(&dentry->d_lru)) {
- dentry->d_flags |= DCACHE_REFERENCED;
+ if (list_empty(&dentry->d_lru)) {
+ dentry->d_flags |= DCACHE_REFERENCED;
dentry_lru_add(dentry);
- }
- spin_unlock(&dentry->d_lock);
+ }
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
return;
+unhash_it:
+ __d_drop(dentry);
kill_it:
- inode = dentry->d_inode;
- if (inode && !spin_trylock(&inode->i_lock))
- goto retry;
-
- parent = dentry->d_parent;
- if (parent && parent != dentry && !spin_trylock(&parent->d_lock)) {
- if (inode)
- spin_unlock(&inode->i_lock);
- goto retry;
- }
-
/* if dentry was on the d_lru list delete it from there */
dentry_lru_del(dentry);
dentry = d_kill(dentry);
if (dentry)
goto repeat;
- return;
-
-retry:
- /*
- * We are about to drop dentry->d_lock. dentry->d_count is 0
- * so it could be freed by someone else and leave us with a
- * stale pointer. Prevent this by increasing d_count before
- * dropping d_lock.
- */
- atomic_inc(&dentry->d_count);
- spin_unlock(&dentry->d_lock);
- goto repeat;
}
/**
@@ -409,9 +275,9 @@ int d_invalidate(struct dentry * dentry)
/*
* If it's already been dropped, return OK.
*/
- spin_lock(&dentry->d_lock);
+ spin_lock(&dcache_lock);
if (d_unhashed(dentry)) {
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
return 0;
}
/*
@@ -419,9 +285,9 @@ int d_invalidate(struct dentry * dentry)
* to get rid of unused child entries.
*/
if (!list_empty(&dentry->d_subdirs)) {
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
shrink_dcache_parent(dentry);
- spin_lock(&dentry->d_lock);
+ spin_lock(&dcache_lock);
}
/*
@@ -434,18 +300,35 @@ int d_invalidate(struct dentry * dentry)
* we might still populate it if it was a
* working directory or similar).
*/
+ spin_lock(&dentry->d_lock);
if (atomic_read(&dentry->d_count) > 1) {
if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
return -EBUSY;
}
}
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
return 0;
}
+/* This should be called _only_ with dcache_lock held */
+
+static inline struct dentry * __dget_locked(struct dentry *dentry)
+{
+ atomic_inc(&dentry->d_count);
+ dentry_lru_del_init(dentry);
+ return dentry;
+}
+
+struct dentry * dget_locked(struct dentry *dentry)
+{
+ return __dget_locked(dentry);
+}
+
/**
* d_find_alias - grab a hashed alias of inode
* @inode: inode in question
@@ -475,21 +358,18 @@ static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
next = tmp->next;
prefetch(next);
alias = list_entry(tmp, struct dentry, d_alias);
- spin_lock(&alias->d_lock);
if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
if (IS_ROOT(alias) &&
(alias->d_flags & DCACHE_DISCONNECTED))
discon_alias = alias;
else if (!want_discon) {
- __dget_dlock(alias);
- spin_unlock(&alias->d_lock);
+ __dget_locked(alias);
return alias;
}
}
- spin_unlock(&alias->d_lock);
}
if (discon_alias)
- __dget(discon_alias);
+ __dget_locked(discon_alias);
return discon_alias;
}
@@ -498,9 +378,9 @@ struct dentry * d_find_alias(struct inode *inode)
struct dentry *de = NULL;
if (!list_empty(&inode->i_dentry)) {
- spin_lock(&inode->i_lock);
+ spin_lock(&dcache_lock);
de = __d_find_alias(inode, 0);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
}
return de;
}
@@ -513,20 +393,20 @@ void d_prune_aliases(struct inode *inode)
{
struct dentry *dentry;
restart:
- spin_lock(&inode->i_lock);
+ spin_lock(&dcache_lock);
list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
spin_lock(&dentry->d_lock);
if (!atomic_read(&dentry->d_count)) {
- __dget_dlock(dentry);
+ __dget_locked(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
dput(dentry);
goto restart;
}
spin_unlock(&dentry->d_lock);
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
}
/*
@@ -539,43 +419,27 @@ restart:
*/
static void prune_one_dentry(struct dentry * dentry)
__releases(dentry->d_lock)
+ __releases(dcache_lock)
+ __acquires(dcache_lock)
{
__d_drop(dentry);
dentry = d_kill(dentry);
/*
- * Prune ancestors.
+ * Prune ancestors. Locking is simpler than in dput(),
+ * because dcache_lock needs to be taken anyway.
*/
+ spin_lock(&dcache_lock);
while (dentry) {
- struct dentry *parent = NULL;
- struct inode *inode = dentry->d_inode;
-
- if (inode)
- spin_lock(&inode->i_lock);
-again:
- spin_lock(&dentry->d_lock);
- if (dentry->d_parent && dentry != dentry->d_parent) {
- if (!spin_trylock(&dentry->d_parent->d_lock)) {
- spin_unlock(&dentry->d_lock);
- goto again;
- }
- parent = dentry->d_parent;
- }
- atomic_dec(&dentry->d_count);
- if (atomic_read(&dentry->d_count)) {
- if (parent)
- spin_unlock(&parent->d_lock);
- spin_unlock(&dentry->d_lock);
- if (inode)
- spin_unlock(&inode->i_lock);
+ if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
return;
- }
if (dentry->d_op && dentry->d_op->d_delete)
dentry->d_op->d_delete(dentry);
dentry_lru_del_init(dentry);
__d_drop(dentry);
dentry = d_kill(dentry);
+ spin_lock(&dcache_lock);
}
}
@@ -596,11 +460,10 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
BUG_ON(!sb);
BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
+ spin_lock(&dcache_lock);
if (count != NULL)
/* called from prune_dcache() and shrink_dcache_parent() */
cnt = *count;
-relock:
- spin_lock(&dcache_lru_lock);
restart:
if (count == NULL)
list_splice_init(&sb->s_dentry_lru, &tmp);
@@ -610,10 +473,7 @@ restart:
struct dentry, d_lru);
BUG_ON(dentry->d_sb != sb);
- if (!spin_trylock(&dentry->d_lock)) {
- spin_unlock(&dcache_lru_lock);
- goto relock;
- }
+ spin_lock(&dentry->d_lock);
/*
* If we are honouring the DCACHE_REFERENCED flag and
* the dentry has this flag set, don't free it. Clear
@@ -631,61 +491,33 @@ restart:
if (!cnt)
break;
}
- cond_resched_lock(&dcache_lru_lock);
+ cond_resched_lock(&dcache_lock);
}
}
- spin_unlock(&dcache_lru_lock);
-
-again:
- spin_lock(&dcache_lru_lock); /* lru_lock also protects tmp list */
while (!list_empty(&tmp)) {
- struct inode *inode;
-
dentry = list_entry(tmp.prev, struct dentry, d_lru);
-
- if (!spin_trylock(&dentry->d_lock)) {
-again1:
- spin_unlock(&dcache_lru_lock);
- goto again;
- }
+ dentry_lru_del_init(dentry);
+ spin_lock(&dentry->d_lock);
/*
* We found an inuse dentry which was not removed from
* the LRU because of laziness during lookup. Do not free
* it - just keep it off the LRU list.
*/
if (atomic_read(&dentry->d_count)) {
- __dentry_lru_del_init(dentry);
spin_unlock(&dentry->d_lock);
continue;
}
- inode = dentry->d_inode;
- if (inode && !spin_trylock(&inode->i_lock)) {
-again2:
- spin_unlock(&dentry->d_lock);
- goto again1;
- }
- if (dentry->d_parent && dentry->d_parent != dentry) {
- if (!spin_trylock(&dentry->d_parent->d_lock)) {
- if (inode)
- spin_unlock(&inode->i_lock);
- goto again2;
- }
- }
- __dentry_lru_del_init(dentry);
- spin_unlock(&dcache_lru_lock);
-
prune_one_dentry(dentry);
- /* dentry->d_lock dropped */
- spin_lock(&dcache_lru_lock);
+ /* dentry->d_lock was dropped in prune_one_dentry() */
+ cond_resched_lock(&dcache_lock);
}
-
if (count == NULL && !list_empty(&sb->s_dentry_lru))
goto restart;
if (count != NULL)
*count = cnt;
if (!list_empty(&referenced))
list_splice(&referenced, &sb->s_dentry_lru);
- spin_unlock(&dcache_lru_lock);
+ spin_unlock(&dcache_lock);
}
/**
@@ -707,6 +539,7 @@ static void prune_dcache(int count)
if (unused == 0 || count == 0)
return;
+ spin_lock(&dcache_lock);
restart:
if (count >= unused)
prune_ratio = 1;
@@ -742,9 +575,11 @@ restart:
if (down_read_trylock(&sb->s_umount)) {
if ((sb->s_root != NULL) &&
(!list_empty(&sb->s_dentry_lru))) {
+ spin_unlock(&dcache_lock);
__shrink_dcache_sb(sb, &w_count,
DCACHE_REFERENCED);
pruned -= w_count;
+ spin_lock(&dcache_lock);
}
up_read(&sb->s_umount);
}
@@ -760,6 +595,7 @@ restart:
}
}
spin_unlock(&sb_lock);
+ spin_unlock(&dcache_lock);
}
/**
@@ -788,10 +624,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
BUG_ON(!IS_ROOT(dentry));
/* detach this root from the system */
- spin_lock(&dentry->d_lock);
+ spin_lock(&dcache_lock);
dentry_lru_del_init(dentry);
__d_drop(dentry);
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
for (;;) {
/* descend to the first leaf in the current subtree */
@@ -800,15 +636,14 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
/* this is a branch with children - detach all of them
* from the system in one go */
- spin_lock(&dentry->d_lock);
+ spin_lock(&dcache_lock);
list_for_each_entry(loop, &dentry->d_subdirs,
d_u.d_child) {
- spin_lock_nested(&loop->d_lock, DENTRY_D_LOCK_NESTED);
dentry_lru_del_init(loop);
__d_drop(loop);
- spin_unlock(&loop->d_lock);
+ cond_resched_lock(&dcache_lock);
}
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
/* move to the first child */
dentry = list_entry(dentry->d_subdirs.next,
@@ -835,17 +670,14 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
BUG();
}
- if (IS_ROOT(dentry)) {
+ if (IS_ROOT(dentry))
parent = NULL;
- list_del(&dentry->d_u.d_child);
- } else {
+ else {
parent = dentry->d_parent;
- spin_lock(&parent->d_lock);
atomic_dec(&parent->d_count);
- list_del(&dentry->d_u.d_child);
- spin_unlock(&parent->d_lock);
}
+ list_del(&dentry->d_u.d_child);
detached++;
inode = dentry->d_inode;
@@ -874,12 +706,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
struct dentry, d_u.d_child);
}
out:
- return;
+ /* several dentries were freed, need to correct nr_dentry */
+ spin_lock(&dcache_lock);
+ dentry_stat.nr_dentry -= detached;
+ spin_unlock(&dcache_lock);
}
/*
* destroy the dentries attached to a superblock on unmounting
- * - we don't need to use dentry->d_lock because:
+ * - we don't need to use dentry->d_lock, and only need dcache_lock when
+ * removing the dentry from the system lists and hashes because:
* - the superblock is detached from all mountings and open files, so the
* dentry trees will not be rearranged by the VFS
* - s_umount is write-locked, so the memory pressure shrinker will ignore
@@ -897,9 +733,7 @@ void shrink_dcache_for_umount(struct super_block *sb)
dentry = sb->s_root;
sb->s_root = NULL;
- spin_lock(&dentry->d_lock);
atomic_dec(&dentry->d_count);
- spin_unlock(&dentry->d_lock);
shrink_dcache_for_umount_subtree(dentry);
while (!hlist_empty(&sb->s_anon)) {
@@ -921,19 +755,15 @@ void shrink_dcache_for_umount(struct super_block *sb)
* Return true if the parent or its subdirectories contain
* a mount point
*/
+
int have_submounts(struct dentry *parent)
{
- struct dentry *this_parent;
+ struct dentry *this_parent = parent;
struct list_head *next;
- unsigned seq;
-
-rename_retry:
- this_parent = parent;
- seq = read_seqbegin(&rename_lock);
+ spin_lock(&dcache_lock);
if (d_mountpoint(parent))
goto positive;
- spin_lock(&this_parent->d_lock);
repeat:
next = this_parent->d_subdirs.next;
resume:
@@ -941,56 +771,26 @@ resume:
struct list_head *tmp = next;
struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
-
- spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
/* Have we found a mount point ? */
- if (d_mountpoint(dentry)) {
- spin_unlock(&dentry->d_lock);
- spin_unlock(&this_parent->d_lock);
+ if (d_mountpoint(dentry))
goto positive;
- }
if (!list_empty(&dentry->d_subdirs)) {
- spin_unlock(&this_parent->d_lock);
- spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
this_parent = dentry;
- spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
goto repeat;
}
- spin_unlock(&dentry->d_lock);
}
/*
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- // d_unlinked(this_parent) || XXX
- read_seqretry(&rename_lock, seq)) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
- goto rename_retry;
- }
- rcu_read_unlock();
- next = child->d_u.d_child.next;
+ next = this_parent->d_u.d_child.next;
+ this_parent = this_parent->d_parent;
goto resume;
}
- spin_unlock(&this_parent->d_lock);
- if (read_seqretry(&rename_lock, seq))
- goto rename_retry;
+ spin_unlock(&dcache_lock);
return 0; /* No mount points found in tree */
positive:
- if (read_seqretry(&rename_lock, seq))
- goto rename_retry;
+ spin_unlock(&dcache_lock);
return 1;
}
@@ -1010,17 +810,11 @@ positive:
*/
static int select_parent(struct dentry * parent)
{
- struct dentry *this_parent;
+ struct dentry *this_parent = parent;
struct list_head *next;
- unsigned seq;
- int found;
-
-rename_retry:
- found = 0;
- this_parent = parent;
- seq = read_seqbegin(&rename_lock);
+ int found = 0;
- spin_lock(&this_parent->d_lock);
+ spin_lock(&dcache_lock);
repeat:
next = this_parent->d_subdirs.next;
resume:
@@ -1029,7 +823,6 @@ resume:
struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
- spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
dentry_lru_del_init(dentry);
/*
* move only zero ref count dentries to the end
@@ -1045,49 +838,27 @@ resume:
* ensures forward progress). We'll be coming back to find
* the rest.
*/
- if (found && need_resched()) {
- spin_unlock(&dentry->d_lock);
+ if (found && need_resched())
goto out;
- }
/*
* Descend a level if the d_subdirs list is non-empty.
- * Note that we keep a hold on the parent lock while
- * we descend, so we don't have to reacquire it on
- * ascend.
*/
if (!list_empty(&dentry->d_subdirs)) {
this_parent = dentry;
goto repeat;
}
-
- spin_unlock(&dentry->d_lock);
}
/*
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- child = this_parent;
- next = child->d_u.d_child.next;
- spin_unlock(&this_parent->d_lock);
- this_parent = tmp;
+ next = this_parent->d_u.d_child.next;
+ this_parent = this_parent->d_parent;
goto resume;
}
-
out:
- /* Make sure we unlock all the way back up the tree */
- while (this_parent != parent) {
- struct dentry *tmp = this_parent->d_parent;
- spin_unlock(&this_parent->d_lock);
- this_parent = tmp;
- }
- spin_unlock(&this_parent->d_lock);
- if (read_seqretry(&rename_lock, seq))
- goto rename_retry;
+ spin_unlock(&dcache_lock);
return found;
}
@@ -1172,7 +943,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
atomic_set(&dentry->d_count, 1);
dentry->d_flags = DCACHE_UNHASHED;
spin_lock_init(&dentry->d_lock);
- seqcount_init(&dentry->d_seq);
dentry->d_inode = NULL;
dentry->d_parent = NULL;
dentry->d_sb = NULL;
@@ -1183,18 +953,20 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
INIT_LIST_HEAD(&dentry->d_alias);
- INIT_LIST_HEAD(&dentry->d_u.d_child);
if (parent) {
- spin_lock(&parent->d_lock);
- spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- dentry->d_parent = dget_dlock(parent);
+ dentry->d_parent = dget(parent);
dentry->d_sb = parent->d_sb;
- list_add(&dentry->d_u.d_child, &parent->d_subdirs);
- spin_unlock(&dentry->d_lock);
- spin_unlock(&parent->d_lock);
+ } else {
+ INIT_LIST_HEAD(&dentry->d_u.d_child);
}
+ spin_lock(&dcache_lock);
+ if (parent)
+ list_add(&dentry->d_u.d_child, &parent->d_subdirs);
+ dentry_stat.nr_dentry++;
+ spin_unlock(&dcache_lock);
+
return dentry;
}
@@ -1209,6 +981,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
}
EXPORT_SYMBOL(d_alloc_name);
+/* the caller must hold dcache_lock */
static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
if (inode)
@@ -1235,11 +1008,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
void d_instantiate(struct dentry *entry, struct inode * inode)
{
BUG_ON(!list_empty(&entry->d_alias));
- if (inode)
- spin_lock(&inode->i_lock);
+ spin_lock(&dcache_lock);
__d_instantiate(entry, inode);
- if (inode)
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
security_d_instantiate(entry, inode);
}
@@ -1283,7 +1054,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
continue;
if (memcmp(qstr->name, name, len))
continue;
- dget(alias);
+ dget_locked(alias);
return alias;
}
@@ -1297,11 +1068,9 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
BUG_ON(!list_empty(&entry->d_alias));
- if (inode)
- spin_lock(&inode->i_lock);
+ spin_lock(&dcache_lock);
result = __d_instantiate_unique(entry, inode);
- if (inode)
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
if (!result) {
security_d_instantiate(entry, inode);
@@ -1341,6 +1110,14 @@ struct dentry * d_alloc_root(struct inode * root_inode)
return res;
}
+static inline struct hlist_head *d_hash(struct dentry *parent,
+ unsigned long hash)
+{
+ hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
+ hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
+ return dentry_hashtable + (hash & D_HASHMASK);
+}
+
/**
* d_obtain_alias - find or allocate a dentry for a given inode
* @inode: inode to allocate the dentry for
@@ -1381,10 +1158,10 @@ struct dentry *d_obtain_alias(struct inode *inode)
}
tmp->d_parent = tmp; /* make sure dput doesn't croak */
- spin_lock(&inode->i_lock);
+ spin_lock(&dcache_lock);
res = __d_find_alias(inode, 0);
if (res) {
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
dput(tmp);
goto out_iput;
}
@@ -1398,8 +1175,8 @@ struct dentry *d_obtain_alias(struct inode *inode)
list_add(&tmp->d_alias, &inode->i_dentry);
hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
spin_unlock(&tmp->d_lock);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
return tmp;
out_iput:
@@ -1429,19 +1206,19 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
struct dentry *new = NULL;
if (inode && S_ISDIR(inode->i_mode)) {
- spin_lock(&inode->i_lock);
+ spin_lock(&dcache_lock);
new = __d_find_alias(inode, 1);
if (new) {
BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
security_d_instantiate(new, inode);
d_rehash(dentry);
d_move(new, dentry);
iput(inode);
} else {
- /* already taken inode->i_lock, d_add() by hand */
+ /* already taking dcache_lock, so d_add() by hand */
__d_instantiate(dentry, inode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
security_d_instantiate(dentry, inode);
d_rehash(dentry);
}
@@ -1513,10 +1290,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
* Negative dentry: instantiate it unless the inode is a directory and
* already has a dentry.
*/
- spin_lock(&inode->i_lock);
+ spin_lock(&dcache_lock);
if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
__d_instantiate(found, inode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
security_d_instantiate(found, inode);
return found;
}
@@ -1526,8 +1303,8 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
* reference to it, move it in place and use it.
*/
new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
- dget(new);
- spin_unlock(&inode->i_lock);
+ dget_locked(new);
+ spin_unlock(&dcache_lock);
security_d_instantiate(found, inode);
d_move(new, found);
iput(inode);
@@ -1549,7 +1326,7 @@ err_out:
* is returned. The caller must use dput to free the entry when it has
* finished using it. %NULL is returned on failure.
*
- * __d_lookup is global lock free. The hash list is protected using RCU.
+ * __d_lookup is dcache_lock free. The hash list is protected using RCU.
* Memory barriers are used while updating and doing lockless traversal.
* To avoid races with d_move while rename is happening, d_lock is used.
*
@@ -1561,18 +1338,33 @@ err_out:
*
* The dentry unused LRU is not updated even if lookup finds the required dentry
* in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
- * select_parent. This laziness saves lookup from LRU lock acquisition.
+ * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
+ * acquisition.
*
* d_lookup() is protected against the concurrent renames in some unrelated
* directory using the seqlockt_t rename_lock.
*/
+
+struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
+{
+ struct dentry * dentry = NULL;
+ unsigned long seq;
+
+ do {
+ seq = read_seqbegin(&rename_lock);
+ dentry = __d_lookup(parent, name);
+ if (dentry)
+ break;
+ } while (read_seqretry(&rename_lock, seq));
+ return dentry;
+}
+
struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
{
unsigned int len = name->len;
unsigned int hash = name->hash;
const unsigned char *str = name->name;
- struct dcache_hash_bucket *b = d_hash(parent, hash);
- struct hlist_head *head = &b->head;
+ struct hlist_head *head = d_hash(parent,hash);
struct dentry *found = NULL;
struct hlist_node *node;
struct dentry *dentry;
@@ -1628,78 +1420,6 @@ next:
return found;
}
-struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
-{
- struct dentry *dentry = NULL;
- unsigned seq;
-
- do {
- seq = read_seqbegin(&rename_lock);
- dentry = __d_lookup(parent, name);
- if (dentry)
- break;
- } while (read_seqretry(&rename_lock, seq));
- return dentry;
-}
-
-struct dentry * __d_lookup_rcu(struct dentry * parent, struct qstr * name)
-{
- unsigned int len = name->len;
- unsigned int hash = name->hash;
- const unsigned char *str = name->name;
- struct dcache_hash_bucket *b = d_hash(parent, hash);
- struct hlist_head *head = &b->head;
- struct hlist_node *node;
- struct dentry *dentry;
-
- hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
- unsigned seq;
- struct dentry *tparent;
- const char *tname;
- int tlen;
-
- if (unlikely(dentry->d_name.hash != hash))
- continue;
-
-seqretry:
- seq = read_seqcount_begin(&dentry->d_seq);
- tparent = dentry->d_parent;
- if (unlikely(tparent != parent))
- continue;
- tlen = dentry->d_name.len;
- if (unlikely(tlen != len))
- continue;
- tname = dentry->d_name.name;
- if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
- goto seqretry;
- if (unlikely(memcmp(tname, str, tlen)))
- continue;
- if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
- goto seqretry;
-
- return dentry;
- }
- return NULL;
-}
-
-struct dentry *d_lookup_rcu(struct dentry *parent, struct qstr * name)
-{
- struct dentry *dentry = NULL;
- unsigned seq;
-
- if (parent->d_op && parent->d_op->d_compare)
- goto out;
-
- do {
- seq = read_seqbegin(&rename_lock);
- dentry = __d_lookup_rcu(parent, name);
- if (dentry)
- break;
- } while (read_seqretry(&rename_lock, seq));
-out:
- return dentry;
-}
-
/**
* d_hash_and_lookup - hash the qstr then search for a dentry
* @dir: Directory to search in
@@ -1738,7 +1458,6 @@ out:
int d_validate(struct dentry *dentry, struct dentry *dparent)
{
- struct dcache_hash_bucket *b;
struct hlist_head *base;
struct hlist_node *lhp;
@@ -1749,23 +1468,19 @@ int d_validate(struct dentry *dentry, struct dentry *dparent)
if (dentry->d_parent != dparent)
goto out;
- spin_lock(&dentry->d_lock);
- b = d_hash(dparent, dentry->d_name.hash);
- base = &b->head;
- spin_lock(&b->lock);
- hlist_for_each(lhp, base) {
+ spin_lock(&dcache_lock);
+ base = d_hash(dparent, dentry->d_name.hash);
+ hlist_for_each(lhp,base) {
/* hlist_for_each_entry_rcu() not required for d_hash list
- * as it is parsed under dcache_hash_bucket->lock
+ * as it is parsed under dcache_lock
*/
if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
- spin_unlock(&b->lock);
- __dget_dlock(dentry);
- spin_unlock(&dentry->d_lock);
+ __dget_locked(dentry);
+ spin_unlock(&dcache_lock);
return 1;
}
}
- spin_unlock(&b->lock);
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
out:
return 0;
}
@@ -1793,20 +1508,14 @@ out:
void d_delete(struct dentry * dentry)
{
- struct inode *inode;
int isdir = 0;
/*
* Are we the only user?
*/
-again:
+ spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
- inode = dentry->d_inode;
- isdir = S_ISDIR(inode->i_mode);
+ isdir = S_ISDIR(dentry->d_inode->i_mode);
if (atomic_read(&dentry->d_count) == 1) {
- if (inode && !spin_trylock(&inode->i_lock)) {
- spin_unlock(&dentry->d_lock);
- goto again;
- }
dentry_iput(dentry);
fsnotify_nameremove(dentry, isdir);
return;
@@ -1816,16 +1525,16 @@ again:
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
fsnotify_nameremove(dentry, isdir);
}
-static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b)
+static void __d_rehash(struct dentry * entry, struct hlist_head *list)
{
+
entry->d_flags &= ~DCACHE_UNHASHED;
- spin_lock(&b->lock);
- hlist_add_head_rcu(&entry->d_hash, &b->head);
- spin_unlock(&b->lock);
+ hlist_add_head_rcu(&entry->d_hash, list);
}
static void _d_rehash(struct dentry * entry)
@@ -1842,9 +1551,11 @@ static void _d_rehash(struct dentry * entry)
void d_rehash(struct dentry * entry)
{
+ spin_lock(&dcache_lock);
spin_lock(&entry->d_lock);
_d_rehash(entry);
spin_unlock(&entry->d_lock);
+ spin_unlock(&dcache_lock);
}
/*
@@ -1921,46 +1632,32 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
*/
static void d_move_locked(struct dentry * dentry, struct dentry * target)
{
- struct dcache_hash_bucket *b;
+ struct hlist_head *list;
+
if (!dentry->d_inode)
printk(KERN_WARNING "VFS: moving negative dcache entry\n");
write_seqlock(&rename_lock);
-
- if (target->d_parent != dentry->d_parent) {
- if (target->d_parent < dentry->d_parent) {
- spin_lock(&target->d_parent->d_lock);
- spin_lock_nested(&dentry->d_parent->d_lock,
- DENTRY_D_LOCK_NESTED);
- } else {
- spin_lock(&dentry->d_parent->d_lock);
- spin_lock_nested(&target->d_parent->d_lock,
- DENTRY_D_LOCK_NESTED);
- }
- } else {
- spin_lock(&target->d_parent->d_lock);
- }
-
- if (dentry != dentry->d_parent) {
- if (target < dentry) {
- spin_lock_nested(&target->d_lock, 2);
- spin_lock_nested(&dentry->d_lock, 3);
- } else {
- spin_lock_nested(&dentry->d_lock, 2);
- spin_lock_nested(&target->d_lock, 3);
- }
+ /*
+ * XXXX: do we really need to take target->d_lock?
+ */
+ if (target < dentry) {
+ spin_lock(&target->d_lock);
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
} else {
- spin_lock_nested(&target->d_lock, 2);
+ spin_lock(&dentry->d_lock);
+ spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
}
/* Move the dentry to the target hash queue, if on different bucket */
- if (!d_unhashed(dentry)) {
- b = d_hash(dentry->d_parent, dentry->d_name.hash);
- spin_lock(&b->lock);
- hlist_del_rcu(&dentry->d_hash);
- spin_unlock(&b->lock);
- }
- __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
+ if (d_unhashed(dentry))
+ goto already_unhashed;
+
+ hlist_del_rcu(&dentry->d_hash);
+
+already_unhashed:
+ list = d_hash(target->d_parent, target->d_name.hash);
+ __d_rehash(dentry, list);
/* Unhash the target: dput() will then get rid of it */
__d_drop(target);
@@ -1969,8 +1666,6 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target)
list_del(&target->d_u.d_child);
/* Switch the names.. */
- write_seqcount_begin(&dentry->d_seq);
- write_seqcount_begin(&target->d_seq);
switch_names(dentry, target);
swap(dentry->d_name.hash, target->d_name.hash);
@@ -1985,14 +1680,8 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target)
/* And add them back to the (new) parent lists */
list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
}
- write_seqcount_end(&target->d_seq);
- write_seqcount_end(&dentry->d_seq);
list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
- if (target->d_parent != dentry->d_parent)
- spin_unlock(&dentry->d_parent->d_lock);
- if (target->d_parent != target)
- spin_unlock(&target->d_parent->d_lock);
spin_unlock(&target->d_lock);
fsnotify_d_move(dentry);
spin_unlock(&dentry->d_lock);
@@ -2010,7 +1699,9 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target)
void d_move(struct dentry * dentry, struct dentry * target)
{
+ spin_lock(&dcache_lock);
d_move_locked(dentry, target);
+ spin_unlock(&dcache_lock);
}
/**
@@ -2036,16 +1727,16 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
* This helper attempts to cope with remotely renamed directories
*
* It assumes that the caller is already holding
- * dentry->d_parent->d_inode->i_mutex
+ * dentry->d_parent->d_inode->i_mutex and the dcache_lock
*
* Note: If ever the locking in lock_rename() changes, then please
* remember to update this too...
*/
static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
+ __releases(dcache_lock)
{
struct mutex *m1 = NULL, *m2 = NULL;
struct dentry *ret;
- struct inode *inode = NULL;
/* If alias and dentry share a parent, then no extra locks required */
if (alias->d_parent == dentry->d_parent)
@@ -2061,15 +1752,14 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
goto out_err;
m1 = &dentry->d_sb->s_vfs_rename_mutex;
- inode = alias->d_parent->d_inode;
- if (!mutex_trylock(&inode->i_mutex))
+ if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
goto out_err;
- m2 = &inode->i_mutex;
+ m2 = &alias->d_parent->d_inode->i_mutex;
out_unalias:
d_move_locked(alias, dentry);
ret = alias;
out_err:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
if (m2)
mutex_unlock(m2);
if (m1)
@@ -2091,12 +1781,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
dparent = dentry->d_parent;
aparent = anon->d_parent;
- /* XXX: hack */
- spin_lock(&aparent->d_lock);
- spin_lock(&dparent->d_lock);
- spin_lock(&dentry->d_lock);
- spin_lock(&anon->d_lock);
-
dentry->d_parent = (aparent == anon) ? dentry : aparent;
list_del(&dentry->d_u.d_child);
if (!IS_ROOT(dentry))
@@ -2111,11 +1795,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
else
INIT_LIST_HEAD(&anon->d_u.d_child);
- spin_unlock(&anon->d_lock);
- spin_unlock(&dentry->d_lock);
- spin_unlock(&dparent->d_lock);
- spin_unlock(&aparent->d_lock);
-
anon->d_flags &= ~DCACHE_DISCONNECTED;
}
@@ -2133,15 +1812,14 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
BUG_ON(!d_unhashed(dentry));
+ spin_lock(&dcache_lock);
+
if (!inode) {
actual = dentry;
__d_instantiate(dentry, NULL);
- d_rehash(actual);
- goto out_nolock;
+ goto found_lock;
}
- spin_lock(&inode->i_lock);
-
if (S_ISDIR(inode->i_mode)) {
struct dentry *alias;
@@ -2169,14 +1847,15 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
actual = __d_instantiate_unique(dentry, inode);
if (!actual)
actual = dentry;
- else
- BUG_ON(!d_unhashed(actual));
+ else if (unlikely(!d_unhashed(actual)))
+ goto shouldnt_be_hashed;
+found_lock:
spin_lock(&actual->d_lock);
found:
_d_rehash(actual);
spin_unlock(&actual->d_lock);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
out_nolock:
if (actual == dentry) {
security_d_instantiate(dentry, inode);
@@ -2185,6 +1864,10 @@ out_nolock:
iput(inode);
return actual;
+
+shouldnt_be_hashed:
+ spin_unlock(&dcache_lock);
+ BUG();
}
static int prepend(char **buffer, int *buflen, const char *str, int namelen)
@@ -2215,7 +1898,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
* Returns a pointer into the buffer or an error code if the
* path was too long.
*
- * "buflen" should be positive. Caller holds the path->dentry->d_lock.
+ * "buflen" should be positive. Caller holds the dcache_lock.
*
* If path is not reachable from the supplied root, then the value of
* root is changed (without modifying refcounts).
@@ -2223,22 +1906,13 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
char *__d_path(const struct path *path, struct path *root,
char *buffer, int buflen)
{
- struct dentry *dentry;
- struct vfsmount *vfsmnt;
- char *end;
+ struct dentry *dentry = path->dentry;
+ struct vfsmount *vfsmnt = path->mnt;
+ char *end = buffer + buflen;
char *retval;
- unsigned seq;
-rename_retry:
- dentry = path->dentry;
- vfsmnt = path->mnt;
- end = buffer + buflen;
+ spin_lock(&vfsmount_lock);
prepend(&end, &buflen, "\0", 1);
-
- seq = read_seqbegin(&rename_lock);
- rcu_read_lock();
- spin_lock(&dentry->d_lock);
-unlinked:
if (d_unlinked(dentry) &&
(prepend(&end, &buflen, " (deleted)", 10) != 0))
goto Elong;
@@ -2250,7 +1924,7 @@ unlinked:
*retval = '/';
for (;;) {
- struct dentry *parent;
+ struct dentry * parent;
if (dentry == root->dentry && vfsmnt == root->mnt)
break;
@@ -2259,10 +1933,8 @@ unlinked:
if (vfsmnt->mnt_parent == vfsmnt) {
goto global_root;
}
- spin_unlock(&dentry->d_lock);
dentry = vfsmnt->mnt_mountpoint;
vfsmnt = vfsmnt->mnt_parent;
- spin_lock(&dentry->d_lock); /* can't get unlinked because locked vfsmount */
continue;
}
parent = dentry->d_parent;
@@ -2271,18 +1943,11 @@ unlinked:
(prepend(&end, &buflen, "/", 1) != 0))
goto Elong;
retval = end;
- spin_unlock(&dentry->d_lock);
dentry = parent;
- spin_lock(&dentry->d_lock);
- if (d_unlinked(dentry))
- goto unlinked;
}
out:
- spin_unlock(&dentry->d_lock);
- rcu_read_unlock();
- if (read_seqretry(&rename_lock, seq))
- goto rename_retry;
+ spin_unlock(&vfsmount_lock);
return retval;
global_root:
@@ -2291,7 +1956,6 @@ global_root:
goto Elong;
root->mnt = vfsmnt;
root->dentry = dentry;
- /* XXX: this could wrongly modify root if we rename retry */
goto out;
Elong:
@@ -2320,8 +1984,6 @@ char *d_path(const struct path *path, char *buf, int buflen)
char *res;
struct path root;
struct path tmp;
- int cpu = get_cpu();
- put_cpu();
/*
* We have various synthetic filesystems that never get mounted. On
@@ -2337,12 +1999,10 @@ char *d_path(const struct path *path, char *buf, int buflen)
root = current->fs->root;
path_get(&root);
read_unlock(&current->fs->lock);
-
- vfsmount_read_lock(cpu);
+ spin_lock(&dcache_lock);
tmp = root;
res = __d_path(path, &tmp, buf, buflen);
- vfsmount_read_unlock(cpu);
-
+ spin_unlock(&dcache_lock);
path_put(&root);
return res;
}
@@ -2373,21 +2033,11 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
*/
char *dentry_path(struct dentry *dentry, char *buf, int buflen)
{
- char *end;
+ char *end = buf + buflen;
char *retval;
- unsigned seq;
- int cpu = get_cpu();
- put_cpu();
-rename_retry:
- end = buf + buflen;
+ spin_lock(&dcache_lock);
prepend(&end, &buflen, "\0", 1);
-
- seq = read_seqbegin(&rename_lock);
- vfsmount_read_lock(cpu);
- rcu_read_lock(); /* protect parent */
- spin_lock(&dentry->d_lock);
-unlinked:
if (d_unlinked(dentry) &&
(prepend(&end, &buflen, "//deleted", 9) != 0))
goto Elong;
@@ -2406,22 +2056,13 @@ unlinked:
goto Elong;
retval = end;
- spin_unlock(&dentry->d_lock);
dentry = parent;
- spin_lock(&dentry->d_lock);
- if (d_unlinked(dentry))
- goto unlinked;
}
-out:
- spin_unlock(&dentry->d_lock);
- rcu_read_unlock();
- vfsmount_read_unlock(cpu);
- if (read_seqretry(&rename_lock, seq))
- goto rename_retry;
+ spin_unlock(&dcache_lock);
return retval;
Elong:
- retval = ERR_PTR(-ENAMETOOLONG);
- goto out;
+ spin_unlock(&dcache_lock);
+ return ERR_PTR(-ENAMETOOLONG);
}
/*
@@ -2447,8 +2088,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
int error;
struct path pwd, root;
char *page = (char *) __get_free_page(GFP_USER);
- int cpu = get_cpu();
- put_cpu();
if (!page)
return -ENOMEM;
@@ -2461,17 +2100,14 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
read_unlock(&current->fs->lock);
error = -ENOENT;
- vfsmount_read_lock(cpu);
- spin_lock(&pwd.dentry->d_lock);
+ spin_lock(&dcache_lock);
if (!d_unlinked(pwd.dentry)) {
unsigned long len;
struct path tmp = root;
char * cwd;
- spin_unlock(&pwd.dentry->d_lock);
- /* XXX: race here, have to close (eg. return unlinked from __d_path) */
cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE);
- vfsmount_read_unlock(cpu);
+ spin_unlock(&dcache_lock);
error = PTR_ERR(cwd);
if (IS_ERR(cwd))
@@ -2484,10 +2120,8 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
if (copy_to_user(buf, cwd, len))
error = -EFAULT;
}
- } else {
- spin_unlock(&pwd.dentry->d_lock);
- vfsmount_read_unlock(cpu);
- }
+ } else
+ spin_unlock(&dcache_lock);
out:
path_put(&pwd);
@@ -2515,39 +2149,35 @@ out:
int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
{
int result;
- unsigned seq;
+ unsigned long seq;
if (new_dentry == old_dentry)
return 1;
+ /*
+ * Need rcu_readlock to protect against the d_parent trashing
+ * due to d_move
+ */
+ rcu_read_lock();
do {
/* for restarting inner loop in case of seq retry */
seq = read_seqbegin(&rename_lock);
- /*
- * Need rcu_readlock to protect against the d_parent trashing
- * due to d_move
- */
- rcu_read_lock();
if (d_ancestor(old_dentry, new_dentry))
result = 1;
else
result = 0;
- rcu_read_unlock();
} while (read_seqretry(&rename_lock, seq));
+ rcu_read_unlock();
return result;
}
void d_genocide(struct dentry *root)
{
- struct dentry *this_parent;
+ struct dentry *this_parent = root;
struct list_head *next;
- unsigned seq;
-rename_retry:
- this_parent = root;
- seq = read_seqbegin(&rename_lock);
- spin_lock(&this_parent->d_lock);
+ spin_lock(&dcache_lock);
repeat:
next = this_parent->d_subdirs.next;
resume:
@@ -2555,55 +2185,21 @@ resume:
struct list_head *tmp = next;
struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
-
- spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- if (d_unhashed(dentry) || !dentry->d_inode) {
- spin_unlock(&dentry->d_lock);
+ if (d_unhashed(dentry)||!dentry->d_inode)
continue;
- }
if (!list_empty(&dentry->d_subdirs)) {
- spin_unlock(&this_parent->d_lock);
- spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
this_parent = dentry;
- spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
goto repeat;
}
- if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
- atomic_dec(&dentry->d_count);
- dentry->d_flags |= DCACHE_GENOCIDE;
- }
- spin_unlock(&dentry->d_lock);
+ atomic_dec(&dentry->d_count);
}
if (this_parent != root) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
- atomic_dec(&this_parent->d_count);
- this_parent->d_flags |= DCACHE_GENOCIDE;
- }
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- // d_unlinked(this_parent) || XXX
- read_seqretry(&rename_lock, seq)) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
- goto rename_retry;
- }
- rcu_read_unlock();
- next = child->d_u.d_child.next;
+ next = this_parent->d_u.d_child.next;
+ atomic_dec(&this_parent->d_count);
+ this_parent = this_parent->d_parent;
goto resume;
}
- spin_unlock(&this_parent->d_lock);
- if (read_seqretry(&rename_lock, seq))
- goto rename_retry;
+ spin_unlock(&dcache_lock);
}
/**
@@ -2656,7 +2252,7 @@ static void __init dcache_init_early(void)
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
- sizeof(struct dcache_hash_bucket),
+ sizeof(struct hlist_head),
dhash_entries,
13,
HASH_EARLY,
@@ -2664,10 +2260,8 @@ static void __init dcache_init_early(void)
&d_hash_mask,
0);
- for (loop = 0; loop < (1 << d_hash_shift); loop++) {
- spin_lock_init(&dentry_hashtable[loop].lock);
- INIT_HLIST_HEAD(&dentry_hashtable[loop].head);
- }
+ for (loop = 0; loop < (1 << d_hash_shift); loop++)
+ INIT_HLIST_HEAD(&dentry_hashtable[loop]);
}
static void __init dcache_init(void)
@@ -2690,7 +2284,7 @@ static void __init dcache_init(void)
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
- sizeof(struct dcache_hash_bucket),
+ sizeof(struct hlist_head),
dhash_entries,
13,
0,
@@ -2698,10 +2292,8 @@ static void __init dcache_init(void)
&d_hash_mask,
0);
- for (loop = 0; loop < (1 << d_hash_shift); loop++) {
- spin_lock_init(&dentry_hashtable[loop].lock);
- INIT_HLIST_HEAD(&dentry_hashtable[loop].head);
- }
+ for (loop = 0; loop < (1 << d_hash_shift); loop++)
+ INIT_HLIST_HEAD(&dentry_hashtable[loop]);
}
/* SLAB cache for __getname() consumers */
@@ -2751,6 +2343,7 @@ EXPORT_SYMBOL(d_rehash);
EXPORT_SYMBOL(d_splice_alias);
EXPORT_SYMBOL(d_add_ci);
EXPORT_SYMBOL(d_validate);
+EXPORT_SYMBOL(dget_locked);
EXPORT_SYMBOL(dput);
EXPORT_SYMBOL(find_inode_number);
EXPORT_SYMBOL(have_submounts);
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index ee55bbf3bd37..31f4b0e6d72c 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -14,35 +14,23 @@ int sysctl_drop_caches;
static void drop_pagecache_sb(struct super_block *sb)
{
- int i;
+ struct inode *inode, *toput_inode = NULL;
- for_each_possible_cpu(i) {
- struct inode *inode, *toput_inode = NULL;
- struct list_head *list;
-#ifdef CONFIG_SMP
- list = per_cpu_ptr(sb->s_inodes, i);
-#else
- list = &sb->s_inodes;
-#endif
- rcu_read_lock();
- list_for_each_entry_rcu(inode, list, i_sb_list) {
- spin_lock(&inode->i_lock);
- if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)
- || inode->i_mapping->nrpages == 0) {
- spin_unlock(&inode->i_lock);
- continue;
- }
- __iget(inode);
- spin_unlock(&inode->i_lock);
- rcu_read_unlock();
- invalidate_mapping_pages(inode->i_mapping, 0, -1);
- iput(toput_inode);
- toput_inode = inode;
- rcu_read_lock();
- }
- rcu_read_unlock();
+ spin_lock(&inode_lock);
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
+ continue;
+ if (inode->i_mapping->nrpages == 0)
+ continue;
+ __iget(inode);
+ spin_unlock(&inode_lock);
+ invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
+ toput_inode = inode;
+ spin_lock(&inode_lock);
}
+ spin_unlock(&inode_lock);
+ iput(toput_inode);
}
static void drop_pagecache(void)
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 44fcbf48c01f..2afbcebeda71 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1054,9 +1054,7 @@ static void create_done(struct exofs_io_state *ios, void *p)
set_obj_created(oi);
- spin_lock(&inode->i_lock);
- inode->i_count--;
- spin_unlock(&inode->i_lock);
+ atomic_dec(&inode->i_count);
wake_up(&oi->i_wq);
}
@@ -1118,18 +1116,14 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
/* increment the refcount so that the inode will still be around when we
* reach the callback
*/
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
ios->done = create_done;
ios->private = inode;
ios->cred = oi->i_cred;
ret = exofs_sbi_create(ios);
if (ret) {
- spin_lock(&inode->i_lock);
- inode->i_count--;
- spin_unlock(&inode->i_lock);
+ atomic_dec(&inode->i_count);
exofs_put_io_state(ios);
return ERR_PTR(ret);
}
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 506778ac4fcd..b7dd0c236863 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -153,9 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
inode->i_ctime = CURRENT_TIME;
inode_inc_link_count(inode);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
return exofs_add_nondir(dentry, inode);
}
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index b55fee07c0d9..e9e175949a63 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -43,26 +43,24 @@ find_acceptable_alias(struct dentry *result,
void *context)
{
struct dentry *dentry, *toput = NULL;
- struct inode *inode;
if (acceptable(context, result))
return result;
- inode = result->d_inode;
- spin_lock(&inode->i_lock);
- list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
- dget(dentry);
- spin_unlock(&inode->i_lock);
+ spin_lock(&dcache_lock);
+ list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) {
+ dget_locked(dentry);
+ spin_unlock(&dcache_lock);
if (toput)
dput(toput);
if (dentry != result && acceptable(context, dentry)) {
dput(result);
return dentry;
}
- spin_lock(&inode->i_lock);
+ spin_lock(&dcache_lock);
toput = dentry;
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
if (toput)
dput(toput);
@@ -76,19 +74,12 @@ static struct dentry *
find_disconnected_root(struct dentry *dentry)
{
dget(dentry);
-again:
spin_lock(&dentry->d_lock);
while (!IS_ROOT(dentry) &&
(dentry->d_parent->d_flags & DCACHE_DISCONNECTED)) {
struct dentry *parent = dentry->d_parent;
-
- if (!spin_trylock(&parent->d_lock)) {
- spin_unlock(&dentry->d_lock);
- goto again;
- }
- dget_dlock(parent);
+ dget(parent);
spin_unlock(&dentry->d_lock);
- spin_unlock(&parent->d_lock);
dput(dentry);
dentry = parent;
spin_lock(&dentry->d_lock);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 34259ac72cef..dd7175ce5606 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -196,9 +196,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
err = ext2_add_link(dentry, inode);
if (!err) {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index d5f19f954fb7..f9cb54a585ce 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -157,16 +157,9 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void ext2_i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
- kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
-}
-
static void ext2_destroy_inode(struct inode *inode)
{
- call_rcu(&inode->i_rcu, ext2_i_callback);
+ kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
}
static void init_once(void *foo)
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 2b40cffa2432..b39991285136 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -100,9 +100,9 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
struct ext3_sb_info *sbi;
int fatal = 0, err;
- if (inode->i_count > 1) {
+ if (atomic_read(&inode->i_count) > 1) {
printk ("ext3_free_inode: inode has count=%d\n",
- inode->i_count);
+ atomic_read(&inode->i_count));
return;
}
if (inode->i_nlink) {
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index ee6676c11b5c..7b0e44f7d66f 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2246,9 +2246,7 @@ retry:
inode->i_ctime = CURRENT_TIME_SEC;
inc_nlink(inode);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
err = ext3_add_entry(handle, dentry, inode);
if (!err) {
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 29dae329d4f6..afa2b569da10 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -483,13 +483,6 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void ext3_i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
- kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
-}
-
static void ext3_destroy_inode(struct inode *inode)
{
if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
@@ -500,7 +493,7 @@ static void ext3_destroy_inode(struct inode *inode)
false);
dump_stack();
}
- call_rcu(&inode->i_rcu, ext3_i_callback);
+ kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
}
static void init_once(void *foo)
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 8297fad406c6..f3624ead4f6c 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -192,9 +192,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
struct ext4_sb_info *sbi;
int fatal = 0, err, count, cleared;
- if (inode->i_count > 1) {
+ if (atomic_read(&inode->i_count) > 1) {
printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
- inode->i_count);
+ atomic_read(&inode->i_count));
return;
}
if (inode->i_nlink) {
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 70150b6f09a2..17a17e10dd60 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2326,9 +2326,7 @@ retry:
inode->i_ctime = ext4_current_time(inode);
ext4_inc_count(handle, inode);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
err = ext4_add_entry(handle, dentry, inode);
if (!err) {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 5220e057664d..5c5e1cd62414 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -497,16 +497,9 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void fat_i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
- kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
-}
-
static void fat_destroy_inode(struct inode *inode)
{
- call_rcu(&inode->i_rcu, fat_i_callback);
+ kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
}
static void init_once(void *foo)
diff --git a/fs/file_table.c b/fs/file_table.c
index a7c231786a9a..32d12b78bac8 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -22,7 +22,6 @@
#include <linux/sysctl.h>
#include <linux/percpu_counter.h>
#include <linux/ima.h>
-#include <linux/percpu.h>
#include <asm/atomic.h>
@@ -33,7 +32,8 @@ struct files_stat_struct files_stat = {
.max_files = NR_FILE
};
-static DEFINE_PER_CPU(spinlock_t, files_cpulock);
+/* public. Not pretty! */
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
/* SLAB cache for file structures */
static struct kmem_cache *filp_cachep __read_mostly;
@@ -258,7 +258,7 @@ void __fput(struct file *file)
cdev_put(inode->i_cdev);
fops_put(file->f_op);
put_pid(file->f_owner.pid);
- file_sb_list_del(file);
+ file_kill(file);
if (file->f_mode & FMODE_WRITE)
drop_file_write_access(file);
file->f_path.dentry = NULL;
@@ -320,112 +320,55 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
return file;
}
+
void put_filp(struct file *file)
{
if (atomic_long_dec_and_test(&file->f_count)) {
security_file_free(file);
- file_sb_list_del(file);
+ file_kill(file);
file_free(file);
}
}
-void file_sb_list_add(struct file *file, struct super_block *sb)
+void file_move(struct file *file, struct list_head *list)
{
- spinlock_t *lock;
- struct list_head *list;
-#ifdef CONFIG_SMP
- int cpu;
-#endif
-
- lock = &get_cpu_var(files_cpulock);
-#ifdef CONFIG_SMP
- cpu = smp_processor_id();
- list = per_cpu_ptr(sb->s_files, cpu);
- file->f_sb_list_cpu = cpu;
-#else
- list = &sb->s_files;
-#endif
- put_cpu_var(files_cpulock);
- spin_lock(lock);
- BUG_ON(!list_empty(&file->f_u.fu_list));
- list_add(&file->f_u.fu_list, list);
- spin_unlock(lock);
+ if (!list)
+ return;
+ file_list_lock();
+ list_move(&file->f_u.fu_list, list);
+ file_list_unlock();
}
-void file_sb_list_del(struct file *file)
+void file_kill(struct file *file)
{
if (!list_empty(&file->f_u.fu_list)) {
- spinlock_t *lock;
-
-#ifdef CONFIG_SMP
- lock = &per_cpu(files_cpulock, file->f_sb_list_cpu);
-#else
- lock = &__get_cpu_var(files_cpulock);
-#endif
- spin_lock(lock);
+ file_list_lock();
list_del_init(&file->f_u.fu_list);
- spin_unlock(lock);
- }
-}
-
-static void file_list_lock_all(void)
-{
- int i;
- int nr = 0;
-
- for_each_possible_cpu(i) {
- spinlock_t *lock;
-
- lock = &per_cpu(files_cpulock, i);
- spin_lock_nested(lock, nr);
- nr++;
- }
-}
-
-static void file_list_unlock_all(void)
-{
- int i;
-
- for_each_possible_cpu(i) {
- spinlock_t *lock;
-
- lock = &per_cpu(files_cpulock, i);
- spin_unlock(lock);
+ file_list_unlock();
}
}
int fs_may_remount_ro(struct super_block *sb)
{
- int i;
+ struct file *file;
/* Check that no files are currently opened for writing. */
- file_list_lock_all();
- for_each_possible_cpu(i) {
- struct file *file;
- struct list_head *list;
-
-#ifdef CONFIG_SMP
- list = per_cpu_ptr(sb->s_files, i);
-#else
- list = &sb->s_files;
-#endif
- list_for_each_entry(file, list, f_u.fu_list) {
- struct inode *inode = file->f_path.dentry->d_inode;
+ file_list_lock();
+ list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
+ struct inode *inode = file->f_path.dentry->d_inode;
- /* File with pending delete? */
- if (inode->i_nlink == 0)
- goto too_bad;
+ /* File with pending delete? */
+ if (inode->i_nlink == 0)
+ goto too_bad;
- /* Writeable file? */
- if (S_ISREG(inode->i_mode) &&
- (file->f_mode & FMODE_WRITE))
- goto too_bad;
- }
+ /* Writeable file? */
+ if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
+ goto too_bad;
}
- file_list_unlock_all();
+ file_list_unlock();
return 1; /* Tis' cool bro. */
too_bad:
- file_list_unlock_all();
+ file_list_unlock();
return 0;
}
@@ -438,48 +381,40 @@ too_bad:
*/
void mark_files_ro(struct super_block *sb)
{
- int i;
+ struct file *f;
retry:
- file_list_lock_all();
- for_each_possible_cpu(i) {
- struct file *f;
- struct list_head *list;
-
-#ifdef CONFIG_SMP
- list = per_cpu_ptr(sb->s_files, i);
-#else
- list = &sb->s_files;
-#endif
- list_for_each_entry(f, list, f_u.fu_list) {
- struct vfsmount *mnt;
- if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
- continue;
- if (!file_count(f))
- continue;
- if (!(f->f_mode & FMODE_WRITE))
- continue;
- spin_lock(&f->f_lock);
- f->f_mode &= ~FMODE_WRITE;
- spin_unlock(&f->f_lock);
- if (file_check_writeable(f) != 0)
- continue;
- file_release_write(f);
- mnt = mntget(f->f_path.mnt);
- /* This can sleep, so we can't hold the spinlock. */
- file_list_unlock_all();
- mnt_drop_write(mnt);
- mntput(mnt);
- goto retry;
- }
+ file_list_lock();
+ list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
+ struct vfsmount *mnt;
+ if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
+ continue;
+ if (!file_count(f))
+ continue;
+ if (!(f->f_mode & FMODE_WRITE))
+ continue;
+ spin_lock(&f->f_lock);
+ f->f_mode &= ~FMODE_WRITE;
+ spin_unlock(&f->f_lock);
+ if (file_check_writeable(f) != 0)
+ continue;
+ file_release_write(f);
+ mnt = mntget(f->f_path.mnt);
+ file_list_unlock();
+ /*
+ * This can sleep, so we can't hold
+ * the file_list_lock() spinlock.
+ */
+ mnt_drop_write(mnt);
+ mntput(mnt);
+ goto retry;
}
- file_list_unlock_all();
+ file_list_unlock();
}
void __init files_init(unsigned long mempages)
{
int n;
- int i;
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
@@ -494,7 +429,5 @@ void __init files_init(unsigned long mempages)
if (files_stat.max_files < NR_FILE)
files_stat.max_files = NR_FILE;
files_defer_init();
- for_each_possible_cpu(i)
- spin_lock_init(&per_cpu(files_cpulock, i));
percpu_counter_init(&nr_files, 0);
}
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 3448e7c075e0..a24c58e181db 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -110,7 +110,6 @@ int unregister_filesystem(struct file_system_type * fs)
*tmp = fs->next;
fs->next = NULL;
write_unlock(&file_systems_lock);
- synchronize_rcu();
return 0;
}
tmp = &(*tmp)->next;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4682776308d9..1a7c42c64ff4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -286,7 +286,6 @@ static void redirty_tail(struct inode *inode)
{
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
- assert_spin_locked(&wb_inode_list_lock);
if (!list_empty(&wb->b_dirty)) {
struct inode *tail;
@@ -304,14 +303,13 @@ static void requeue_io(struct inode *inode)
{
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
- assert_spin_locked(&wb_inode_list_lock);
list_move(&inode->i_list, &wb->b_more_io);
}
static void inode_sync_complete(struct inode *inode)
{
/*
- * Prevent speculative execution through spin_unlock(&inode->i_lock);
+ * Prevent speculative execution through spin_unlock(&inode_lock);
*/
smp_mb();
wake_up_bit(&inode->i_state, __I_SYNC);
@@ -345,7 +343,6 @@ static void move_expired_inodes(struct list_head *delaying_queue,
struct inode *inode;
int do_sb_sort = 0;
- assert_spin_locked(&wb_inode_list_lock);
while (!list_empty(delaying_queue)) {
inode = list_entry(delaying_queue->prev, struct inode, i_list);
if (older_than_this &&
@@ -401,11 +398,9 @@ static void inode_wait_for_writeback(struct inode *inode)
wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
do {
- spin_unlock(&wb_inode_list_lock);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
- spin_lock(&inode->i_lock);
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lock);
} while (inode->i_state & I_SYNC);
}
@@ -430,7 +425,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
unsigned dirty;
int ret;
- if (!inode->i_count)
+ if (!atomic_read(&inode->i_count))
WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
else
WARN_ON(inode->i_state & I_WILL_FREE);
@@ -462,8 +457,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
inode->i_state |= I_SYNC;
inode->i_state &= ~I_DIRTY;
- spin_unlock(&wb_inode_list_lock);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
ret = do_writepages(mapping, wbc);
@@ -480,8 +474,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
ret = err;
}
- spin_lock(&inode->i_lock);
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lock);
inode->i_state &= ~I_SYNC;
if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) {
@@ -540,11 +533,11 @@ select_queue:
inode->i_state |= I_DIRTY_PAGES;
redirty_tail(inode);
}
- } else if (inode->i_count) {
+ } else if (atomic_read(&inode->i_count)) {
/*
* The inode is clean, inuse
*/
- list_del_init(&inode->i_list);
+ list_move(&inode->i_list, &inode_in_use);
} else {
/*
* The inode is clean, unused
@@ -624,8 +617,7 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
struct super_block *sb = wbc->sb, *pin_sb = NULL;
const unsigned long start = jiffies; /* livelock avoidance */
-again:
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lock);
if (!wbc->for_kupdate || list_empty(&wb->b_io))
queue_io(wb, wbc->older_than_this);
@@ -635,23 +627,16 @@ again:
struct inode, i_list);
long pages_skipped;
- if (!spin_trylock(&inode->i_lock)) {
- spin_unlock(&wb_inode_list_lock);
- goto again;
- }
-
/*
* super block given and doesn't match, skip this inode
*/
if (sb && sb != inode->i_sb) {
redirty_tail(inode);
- spin_unlock(&inode->i_lock);
continue;
}
if (inode->i_state & (I_NEW | I_WILL_FREE)) {
requeue_io(inode);
- spin_unlock(&inode->i_lock);
continue;
}
@@ -659,14 +644,11 @@ again:
* Was this inode dirtied after sync_sb_inodes was called?
* This keeps sync from extra jobs and livelock.
*/
- if (inode_dirtied_after(inode, start)) {
- spin_unlock(&inode->i_lock);
+ if (inode_dirtied_after(inode, start))
break;
- }
if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
requeue_io(inode);
- spin_unlock(&inode->i_lock);
continue;
}
@@ -681,11 +663,10 @@ again:
*/
redirty_tail(inode);
}
- spin_unlock(&wb_inode_list_lock);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
iput(inode);
cond_resched();
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lock);
if (wbc->nr_to_write <= 0) {
wbc->more_io = 1;
break;
@@ -693,9 +674,10 @@ again:
if (!list_empty(&wb->b_more_io))
wbc->more_io = 1;
}
- spin_unlock(&wb_inode_list_lock);
unpin_sb_for_writeback(&pin_sb);
+
+ spin_unlock(&inode_lock);
/* Leave any unwritten inodes on b_io */
}
@@ -807,19 +789,13 @@ static long wb_writeback(struct bdi_writeback *wb,
* become available for writeback. Otherwise
* we'll just busyloop.
*/
-retry:
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lock);
if (!list_empty(&wb->b_more_io)) {
inode = list_entry(wb->b_more_io.prev,
struct inode, i_list);
- if (!spin_trylock(&inode->i_lock)) {
- spin_unlock(&wb_inode_list_lock);
- goto retry;
- }
inode_wait_for_writeback(inode);
- spin_unlock(&inode->i_lock);
}
- spin_unlock(&wb_inode_list_lock);
+ spin_unlock(&inode_lock);
}
return wrote;
@@ -866,7 +842,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
wb->last_old_flush = jiffies;
nr_pages = global_page_state(NR_FILE_DIRTY) +
global_page_state(NR_UNSTABLE_NFS) +
- get_nr_inodes() - inodes_stat.nr_unused;
+ (inodes_stat.nr_inodes - inodes_stat.nr_unused);
if (nr_pages) {
struct wb_writeback_args args = {
@@ -1073,7 +1049,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
if (unlikely(block_dump))
block_dump___mark_inode_dirty(inode);
- spin_lock(&inode->i_lock);
+ spin_lock(&inode_lock);
if ((inode->i_state & flags) != flags) {
const int was_dirty = inode->i_state & I_DIRTY;
@@ -1114,13 +1090,11 @@ void __mark_inode_dirty(struct inode *inode, int flags)
}
inode->dirtied_when = jiffies;
- spin_lock(&wb_inode_list_lock);
list_move(&inode->i_list, &wb->b_dirty);
- spin_unlock(&wb_inode_list_lock);
}
}
out:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
}
EXPORT_SYMBOL(__mark_inode_dirty);
@@ -1143,7 +1117,7 @@ EXPORT_SYMBOL(__mark_inode_dirty);
*/
static void wait_sb_inodes(struct super_block *sb)
{
- int i;
+ struct inode *inode, *old_inode = NULL;
/*
* We need to be protected against the filesystem going from
@@ -1151,57 +1125,44 @@ static void wait_sb_inodes(struct super_block *sb)
*/
WARN_ON(!rwsem_is_locked(&sb->s_umount));
- for_each_possible_cpu(i) {
- struct inode *inode, *old_inode = NULL;
- struct list_head *list;
-#ifdef CONFIG_SMP
- list = per_cpu_ptr(sb->s_inodes, i);
-#else
- list = &sb->s_inodes;
-#endif
+ spin_lock(&inode_lock);
+
+ /*
+ * Data integrity sync. Must wait for all pages under writeback,
+ * because there may have been pages dirtied before our sync
+ * call, but which had writeout started before we write it out.
+ * In which case, the inode may not be on the dirty list, but
+ * we still have to wait for that writeout.
+ */
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+ struct address_space *mapping;
+
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
+ continue;
+ mapping = inode->i_mapping;
+ if (mapping->nrpages == 0)
+ continue;
+ __iget(inode);
+ spin_unlock(&inode_lock);
/*
- * Data integrity sync. Must wait for all pages under writeback,
- * because there may have been pages dirtied before our sync
- * call, but which had writeout started before we write it out.
- * In which case, the inode may not be on the dirty list, but
- * we still have to wait for that writeout.
+ * We hold a reference to 'inode' so it couldn't have
+ * been removed from s_inodes list while we dropped the
+ * inode_lock. We cannot iput the inode now as we can
+ * be holding the last reference and we cannot iput it
+ * under inode_lock. So we keep the reference and iput
+ * it later.
*/
- rcu_read_lock();
- list_for_each_entry_rcu(inode, list, i_sb_list) {
- struct address_space *mapping;
-
- mapping = inode->i_mapping;
- if (mapping->nrpages == 0)
- continue;
-
- spin_lock(&inode->i_lock);
- if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
- __iget(inode);
- spin_unlock(&inode->i_lock);
- rcu_read_unlock();
- /*
- * We hold a reference to 'inode' so it couldn't have
- * been removed from s_inodes list while we dropped the
- * i_lock. We cannot iput the inode now as we can be
- * holding the last reference and we cannot iput it
- * under spinlock. So we keep the reference and iput it
- * later.
- */
- iput(old_inode);
- old_inode = inode;
+ iput(old_inode);
+ old_inode = inode;
- filemap_fdatawait(mapping);
+ filemap_fdatawait(mapping);
- cond_resched();
+ cond_resched();
- rcu_read_lock();
- }
- rcu_read_unlock();
- iput(old_inode);
+ spin_lock(&inode_lock);
}
+ spin_unlock(&inode_lock);
+ iput(old_inode);
}
/**
@@ -1220,7 +1181,7 @@ void writeback_inodes_sb(struct super_block *sb)
long nr_to_write;
nr_to_write = nr_dirty + nr_unstable +
- get_nr_inodes() - inodes_stat.nr_unused;
+ (inodes_stat.nr_inodes - inodes_stat.nr_unused);
bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
}
@@ -1281,11 +1242,9 @@ int write_inode_now(struct inode *inode, int sync)
wbc.nr_to_write = 0;
might_sleep();
- spin_lock(&inode->i_lock);
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lock);
ret = writeback_single_inode(inode, &wbc);
- spin_unlock(&wb_inode_list_lock);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
if (sync)
inode_sync_wait(inode);
return ret;
@@ -1307,11 +1266,9 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
{
int ret;
- spin_lock(&inode->i_lock);
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lock);
ret = writeback_single_inode(inode, wbc);
- spin_unlock(&wb_inode_list_lock);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
return ret;
}
EXPORT_SYMBOL(sync_inode);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index f5a07d98ec06..84350e1be66d 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -253,9 +253,7 @@ out_parent:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
if (!error) {
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
d_instantiate(dentry, inode);
mark_inode_dirty(inode);
}
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 557c5335de66..5f4023678251 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -301,9 +301,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
inc_nlink(inode);
hfsplus_instantiate(dst_dentry, inode, cnid);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
HFSPLUS_SB(sb).file_count++;
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index b22044cb988b..fe703ae46bc7 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -182,7 +182,7 @@ void hpfs_write_inode(struct inode *i)
struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
struct inode *parent;
if (i->i_ino == hpfs_sb(i->i_sb)->sb_root) return;
- if (hpfs_inode->i_rddir_off && !i->i_count) {
+ if (hpfs_inode->i_rddir_off && !atomic_read(&i->i_count)) {
if (*hpfs_inode->i_rddir_off) printk("HPFS: write_inode: some position still there\n");
kfree(hpfs_inode->i_rddir_off);
hpfs_inode->i_rddir_off = NULL;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d1a78fc2a78e..a0bbd3d1b41a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -377,12 +377,11 @@ static void hugetlbfs_delete_inode(struct inode *inode)
clear_inode(inode);
}
-static void hugetlbfs_forget_inode(struct inode *inode)
+static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
{
if (generic_detach_inode(inode)) {
truncate_hugepages(inode, 0);
clear_inode(inode);
- /* XXX: why no wake_up_inode? */
destroy_inode(inode);
}
}
@@ -665,18 +664,11 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
return &p->vfs_inode;
}
-static void hugetlbfs_i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
- kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
-}
-
static void hugetlbfs_destroy_inode(struct inode *inode)
{
hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
- call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
+ kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
}
static const struct address_space_operations hugetlbfs_aops = {
diff --git a/fs/inode.c b/fs/inode.c
index 972169668aed..ce10948610e9 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -74,13 +74,9 @@ static unsigned int i_hash_shift __read_mostly;
* allowing for low-overhead inode sync() operations.
*/
+LIST_HEAD(inode_in_use);
LIST_HEAD(inode_unused);
-
-struct inode_hash_bucket {
- spinlock_t lock;
- struct hlist_head head;
-};
-static struct inode_hash_bucket *inode_hashtable __read_mostly;
+static struct hlist_head *inode_hashtable __read_mostly;
/*
* A simple spinlock to protect the list manipulations.
@@ -88,8 +84,7 @@ static struct inode_hash_bucket *inode_hashtable __read_mostly;
* NOTE! You also have to own the lock if you change
* the i_state of an inode while it is in use..
*/
-static DEFINE_PER_CPU(spinlock_t, inode_cpulock);
-DEFINE_SPINLOCK(wb_inode_list_lock);
+DEFINE_SPINLOCK(inode_lock);
/*
* iprune_sem provides exclusion between the kswapd or try_to_free_pages
@@ -108,37 +103,10 @@ static DECLARE_RWSEM(iprune_sem);
/*
* Statistics gathering..
*/
-struct inodes_stat_t inodes_stat = {
- .nr_inodes = 0,
- .nr_unused = 0,
-};
-struct percpu_counter nr_inodes;
+struct inodes_stat_t inodes_stat;
static struct kmem_cache *inode_cachep __read_mostly;
-int get_nr_inodes(void)
-{
- return percpu_counter_sum_positive(&nr_inodes);
-}
-
-/*
- * Handle nr_dentry sysctl
- */
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
-int proc_nr_inodes(ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- inodes_stat.nr_inodes = get_nr_inodes();
- return proc_dointvec(table, write, buffer, lenp, ppos);
-}
-#else
-int proc_nr_inodes(ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return -ENOSYS;
-}
-#endif
-
static void wake_up_inode(struct inode *inode)
{
/*
@@ -166,7 +134,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
- inode->i_count = 1;
+ atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
inode->i_fop = &empty_fops;
inode->i_nlink = 1;
@@ -269,20 +237,13 @@ void __destroy_inode(struct inode *inode)
}
EXPORT_SYMBOL(__destroy_inode);
-static void i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
- kmem_cache_free(inode_cachep, inode);
-}
-
void destroy_inode(struct inode *inode)
{
__destroy_inode(inode);
if (inode->i_sb->s_op->destroy_inode)
inode->i_sb->s_op->destroy_inode(inode);
else
- call_rcu(&inode->i_rcu, i_callback);
+ kmem_cache_free(inode_cachep, (inode));
}
/*
@@ -296,7 +257,6 @@ void inode_init_once(struct inode *inode)
INIT_HLIST_NODE(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_dentry);
INIT_LIST_HEAD(&inode->i_devices);
- INIT_LIST_HEAD(&inode->i_list);
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
spin_lock_init(&inode->i_data.tree_lock);
spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -322,6 +282,21 @@ static void init_once(void *foo)
inode_init_once(inode);
}
+/*
+ * inode_lock must be held
+ */
+void __iget(struct inode *inode)
+{
+ if (atomic_read(&inode->i_count)) {
+ atomic_inc(&inode->i_count);
+ return;
+ }
+ atomic_inc(&inode->i_count);
+ if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+ list_move(&inode->i_list, &inode_in_use);
+ inodes_stat.nr_unused--;
+}
+
/**
* clear_inode - clear an inode
* @inode: inode to clear
@@ -365,70 +340,65 @@ static void dispose_list(struct list_head *head)
struct inode *inode;
inode = list_first_entry(head, struct inode, i_list);
- list_del_init(&inode->i_list);
+ list_del(&inode->i_list);
if (inode->i_data.nrpages)
truncate_inode_pages(&inode->i_data, 0);
clear_inode(inode);
- spin_lock(&inode->i_lock);
- __remove_inode_hash(inode);
- inode_sb_list_del(inode);
- spin_unlock(&inode->i_lock);
+ spin_lock(&inode_lock);
+ hlist_del_init(&inode->i_hash);
+ list_del_init(&inode->i_sb_list);
+ spin_unlock(&inode_lock);
wake_up_inode(inode);
destroy_inode(inode);
nr_disposed++;
}
+ spin_lock(&inode_lock);
+ inodes_stat.nr_inodes -= nr_disposed;
+ spin_unlock(&inode_lock);
}
/*
* Invalidate all inodes for a device.
*/
-static int invalidate_sb_inodes(struct super_block *sb, struct list_head *dispose)
+static int invalidate_list(struct list_head *head, struct list_head *dispose)
{
- int busy = 0;
- int i;
-
- for_each_possible_cpu(i) {
- struct list_head *next;
- struct list_head *head;
-#ifdef CONFIG_SMP
- head = per_cpu_ptr(sb->s_inodes, i);
-#else
- head = &sb->s_inodes;
-#endif
+ struct list_head *next;
+ int busy = 0, count = 0;
- next = head->next;
- for (;;) {
- struct list_head *tmp = next;
- struct inode *inode;
-
- next = next->next;
- if (tmp == head)
- break;
- inode = list_entry(tmp, struct inode, i_sb_list);
- spin_lock(&inode->i_lock);
- if (inode->i_state & I_NEW) {
- spin_unlock(&inode->i_lock);
- continue;
- }
- invalidate_inode_buffers(inode);
- if (!inode->i_count) {
- spin_lock(&wb_inode_list_lock);
- list_del(&inode->i_list);
- inodes_stat.nr_unused--;
- spin_unlock(&wb_inode_list_lock);
- WARN_ON(inode->i_state & I_NEW);
- inode->i_state |= I_FREEING;
- spin_unlock(&inode->i_lock);
- list_add(&inode->i_list, dispose);
- continue;
- }
- spin_unlock(&inode->i_lock);
- busy = 1;
+ next = head->next;
+ for (;;) {
+ struct list_head *tmp = next;
+ struct inode *inode;
+
+ /*
+ * We can reschedule here without worrying about the list's
+ * consistency because the per-sb list of inodes must not
+ * change during umount anymore, and because iprune_sem keeps
+ * shrink_icache_memory() away.
+ */
+ cond_resched_lock(&inode_lock);
+
+ next = next->next;
+ if (tmp == head)
+ break;
+ inode = list_entry(tmp, struct inode, i_sb_list);
+ if (inode->i_state & I_NEW)
+ continue;
+ invalidate_inode_buffers(inode);
+ if (!atomic_read(&inode->i_count)) {
+ list_move(&inode->i_list, dispose);
+ WARN_ON(inode->i_state & I_NEW);
+ inode->i_state |= I_FREEING;
+ count++;
+ continue;
}
+ busy = 1;
}
+ /* only unused inodes may be cached with i_count zero */
+ inodes_stat.nr_unused -= count;
return busy;
}
@@ -445,17 +415,12 @@ int invalidate_inodes(struct super_block *sb)
int busy;
LIST_HEAD(throw_away);
- /*
- * Don't need to worry about the list's consistency because the per-sb
- * list of inodes must not change during umount anymore, and because
- * iprune_sem keeps shrink_icache_memory() away.
- */
down_write(&iprune_sem);
-// spin_lock(&sb_inode_list_lock); XXX: is this safe?
- inotify_unmount_inodes(sb);
- fsnotify_unmount_inodes(sb);
- busy = invalidate_sb_inodes(sb, &throw_away);
-// spin_unlock(&sb_inode_list_lock);
+ spin_lock(&inode_lock);
+ inotify_unmount_inodes(&sb->s_inodes);
+ fsnotify_unmount_inodes(&sb->s_inodes);
+ busy = invalidate_list(&sb->s_inodes, &throw_away);
+ spin_unlock(&inode_lock);
dispose_list(&throw_away);
up_write(&iprune_sem);
@@ -470,7 +435,7 @@ static int can_unuse(struct inode *inode)
return 0;
if (inode_has_buffers(inode))
return 0;
- if (inode->i_count)
+ if (atomic_read(&inode->i_count))
return 0;
if (inode->i_data.nrpages)
return 0;
@@ -493,12 +458,12 @@ static int can_unuse(struct inode *inode)
static void prune_icache(int nr_to_scan)
{
LIST_HEAD(freeable);
+ int nr_pruned = 0;
int nr_scanned;
unsigned long reap = 0;
down_read(&iprune_sem);
-again:
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lock);
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
struct inode *inode;
@@ -507,56 +472,36 @@ again:
inode = list_entry(inode_unused.prev, struct inode, i_list);
- if (!spin_trylock(&inode->i_lock)) {
- spin_unlock(&wb_inode_list_lock);
- goto again;
- }
- if (inode->i_count) {
- list_del_init(&inode->i_list);
- spin_unlock(&inode->i_lock);
- inodes_stat.nr_unused--;
- continue;
- }
- if (inode->i_state) {
+ if (inode->i_state || atomic_read(&inode->i_count)) {
list_move(&inode->i_list, &inode_unused);
- spin_unlock(&inode->i_lock);
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages) {
- spin_unlock(&wb_inode_list_lock);
__iget(inode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
if (remove_inode_buffers(inode))
reap += invalidate_mapping_pages(&inode->i_data,
0, -1);
iput(inode);
-again2:
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lock);
- /* XXX: may no longer work well */
if (inode != list_entry(inode_unused.next,
struct inode, i_list))
continue; /* wrong inode or list_empty */
- if (!spin_trylock(&inode->i_lock)) {
- spin_unlock(&wb_inode_list_lock);
- goto again2;
- }
- if (!can_unuse(inode)) {
- spin_unlock(&inode->i_lock);
+ if (!can_unuse(inode))
continue;
- }
}
list_move(&inode->i_list, &freeable);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
- spin_unlock(&inode->i_lock);
- inodes_stat.nr_unused--;
+ nr_pruned++;
}
+ inodes_stat.nr_unused -= nr_pruned;
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
__count_vm_events(PGINODESTEAL, reap);
- spin_unlock(&wb_inode_list_lock);
+ spin_unlock(&inode_lock);
dispose_list(&freeable);
up_read(&iprune_sem);
@@ -583,7 +528,7 @@ static int shrink_icache_memory(int nr, gfp_t gfp_mask)
return -1;
prune_icache(nr);
}
- return inodes_stat.nr_unused / 100 * sysctl_vfs_cache_pressure;
+ return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
}
static struct shrinker icache_shrinker = {
@@ -599,7 +544,7 @@ static void __wait_on_freeing_inode(struct inode *inode);
* add any additional branch in the common code.
*/
static struct inode *find_inode(struct super_block *sb,
- struct inode_hash_bucket *b,
+ struct hlist_head *head,
int (*test)(struct inode *, void *),
void *data)
{
@@ -607,27 +552,17 @@ static struct inode *find_inode(struct super_block *sb,
struct inode *inode = NULL;
repeat:
- rcu_read_lock();
- hlist_for_each_entry_rcu(inode, node, &b->head, i_hash) {
+ hlist_for_each_entry(inode, node, head, i_hash) {
if (inode->i_sb != sb)
continue;
- spin_lock(&inode->i_lock);
- if (hlist_unhashed(&inode->i_hash)) {
- spin_unlock(&inode->i_lock);
+ if (!test(inode, data))
continue;
- }
- if (!test(inode, data)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
- rcu_read_unlock();
__wait_on_freeing_inode(inode);
goto repeat;
}
break;
}
- rcu_read_unlock();
return node ? inode : NULL;
}
@@ -636,32 +571,23 @@ repeat:
* iget_locked for details.
*/
static struct inode *find_inode_fast(struct super_block *sb,
- struct inode_hash_bucket *b,
- unsigned long ino)
+ struct hlist_head *head, unsigned long ino)
{
struct hlist_node *node;
struct inode *inode = NULL;
repeat:
- rcu_read_lock();
- hlist_for_each_entry_rcu(inode, node, &b->head, i_hash) {
+ hlist_for_each_entry(inode, node, head, i_hash) {
if (inode->i_ino != ino)
continue;
if (inode->i_sb != sb)
continue;
- spin_lock(&inode->i_lock);
- if (hlist_unhashed(&inode->i_hash)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
- rcu_read_unlock();
__wait_on_freeing_inode(inode);
goto repeat;
}
break;
}
- rcu_read_unlock();
return node ? inode : NULL;
}
@@ -675,88 +601,16 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval)
return tmp & I_HASHMASK;
}
-static void inode_sb_list_add(struct inode *inode, struct super_block *sb)
-{
- spinlock_t *lock;
- struct list_head *list;
-#ifdef CONFIG_SMP
- int cpu;
-#endif
-
- lock = &get_cpu_var(inode_cpulock);
-#ifdef CONFIG_SMP
- cpu = smp_processor_id();
- list = per_cpu_ptr(sb->s_inodes, cpu);
- inode->i_sb_list_cpu = cpu;
-#else
- list = &sb->s_inodes;
-#endif
- put_cpu_var(inode_cpulock);
- spin_lock(lock);
- list_add_rcu(&inode->i_sb_list, list);
- spin_unlock(lock);
-}
-
-void inode_sb_list_del(struct inode *inode)
-{
- spinlock_t *lock;
-
-#ifdef CONFIG_SMP
- lock = &per_cpu(inode_cpulock, inode->i_sb_list_cpu);
-#else
- lock = &__get_cpu_var(inode_cpulock);
-#endif
- spin_lock(lock);
- list_del_rcu(&inode->i_sb_list);
- spin_unlock(lock);
-}
-
static inline void
-__inode_add_to_lists(struct super_block *sb, struct inode_hash_bucket *b,
+__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
struct inode *inode)
{
- inode_sb_list_add(inode, sb);
- percpu_counter_inc(&nr_inodes);
- if (b) {
- spin_lock(&b->lock);
- hlist_add_head(&inode->i_hash, &b->head);
- spin_unlock(&b->lock);
- }
-}
-
-#ifdef CONFIG_SMP
-/*
- * Each cpu owns a range of 1024 numbers.
- * 'shared_last_ino' is dirtied only once out of 1024 allocations,
- * to renew the exhausted range.
- *
- * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
- * error if st_ino won't fit in target struct field. Use 32bit counter
- * here to attempt to avoid that.
- */
-static DEFINE_PER_CPU(int, last_ino);
-static atomic_t shared_last_ino;
-
-static int last_ino_get(void)
-{
- int *p = &get_cpu_var(last_ino);
- int res = *p;
-
- if (unlikely((res & 1023) == 0))
- res = atomic_add_return(1024, &shared_last_ino) - 1024;
-
- *p = ++res;
- put_cpu_var(last_ino);
- return res;
-}
-#else
-static int last_ino_get(void)
-{
- static int last_ino;
-
- return ++last_ino;
+ inodes_stat.nr_inodes++;
+ list_add(&inode->i_list, &inode_in_use);
+ list_add(&inode->i_sb_list, &sb->s_inodes);
+ if (head)
+ hlist_add_head(&inode->i_hash, head);
}
-#endif
/**
* inode_add_to_lists - add a new inode to relevant lists
@@ -772,11 +626,11 @@ static int last_ino_get(void)
*/
void inode_add_to_lists(struct super_block *sb, struct inode *inode)
{
- struct inode_hash_bucket *b = inode_hashtable + hash(sb, inode->i_ino);
+ struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
- spin_lock(&inode->i_lock);
- __inode_add_to_lists(sb, b, inode);
- spin_unlock(&inode->i_lock);
+ spin_lock(&inode_lock);
+ __inode_add_to_lists(sb, head, inode);
+ spin_unlock(&inode_lock);
}
EXPORT_SYMBOL_GPL(inode_add_to_lists);
@@ -794,15 +648,23 @@ EXPORT_SYMBOL_GPL(inode_add_to_lists);
*/
struct inode *new_inode(struct super_block *sb)
{
+ /*
+ * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
+ * error if st_ino won't fit in target struct field. Use 32bit counter
+ * here to attempt to avoid that.
+ */
+ static unsigned int last_ino;
struct inode *inode;
+ spin_lock_prefetch(&inode_lock);
+
inode = alloc_inode(sb);
if (inode) {
- spin_lock(&inode->i_lock);
- inode->i_ino = last_ino_get();
- inode->i_state = 0;
+ spin_lock(&inode_lock);
__inode_add_to_lists(sb, NULL, inode);
- spin_unlock(&inode->i_lock);
+ inode->i_ino = ++last_ino;
+ inode->i_state = 0;
+ spin_unlock(&inode_lock);
}
return inode;
}
@@ -850,7 +712,7 @@ EXPORT_SYMBOL(unlock_new_inode);
* -- rmk@arm.uk.linux.org
*/
static struct inode *get_new_inode(struct super_block *sb,
- struct inode_hash_bucket *b,
+ struct hlist_head *head,
int (*test)(struct inode *, void *),
int (*set)(struct inode *, void *),
void *data)
@@ -861,16 +723,16 @@ static struct inode *get_new_inode(struct super_block *sb,
if (inode) {
struct inode *old;
+ spin_lock(&inode_lock);
/* We released the lock, so.. */
- old = find_inode(sb, b, test, data);
+ old = find_inode(sb, head, test, data);
if (!old) {
- spin_lock(&inode->i_lock);
if (set(inode, data))
goto set_failed;
+ __inode_add_to_lists(sb, head, inode);
inode->i_state = I_NEW;
- __inode_add_to_lists(sb, b, inode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the
* caller is responsible for filling in the contents
@@ -884,7 +746,7 @@ static struct inode *get_new_inode(struct super_block *sb,
* allocated.
*/
__iget(old);
- spin_unlock(&old->i_lock);
+ spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
wait_on_inode(inode);
@@ -892,7 +754,7 @@ static struct inode *get_new_inode(struct super_block *sb,
return inode;
set_failed:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
destroy_inode(inode);
return NULL;
}
@@ -902,7 +764,7 @@ set_failed:
* comment at iget_locked for details.
*/
static struct inode *get_new_inode_fast(struct super_block *sb,
- struct inode_hash_bucket *b, unsigned long ino)
+ struct hlist_head *head, unsigned long ino)
{
struct inode *inode;
@@ -910,14 +772,14 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
if (inode) {
struct inode *old;
+ spin_lock(&inode_lock);
/* We released the lock, so.. */
- old = find_inode_fast(sb, b, ino);
+ old = find_inode_fast(sb, head, ino);
if (!old) {
- spin_lock(&inode->i_lock);
inode->i_ino = ino;
+ __inode_add_to_lists(sb, head, inode);
inode->i_state = I_NEW;
- __inode_add_to_lists(sb, b, inode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the
* caller is responsible for filling in the contents
@@ -931,7 +793,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
* allocated.
*/
__iget(old);
- spin_unlock(&old->i_lock);
+ spin_unlock(&inode_lock);
destroy_inode(inode);
inode = old;
wait_on_inode(inode);
@@ -939,23 +801,6 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
return inode;
}
-static int test_inode_iunique(struct super_block *sb,
- struct inode_hash_bucket *b, unsigned long ino)
-{
- struct hlist_node *node;
- struct inode *inode = NULL;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(inode, node, &b->head, i_hash) {
- if (inode->i_ino == ino && inode->i_sb == sb) {
- rcu_read_unlock();
- return 0;
- }
- }
- rcu_read_unlock();
- return 1;
-}
-
/**
* iunique - get a unique inode number
* @sb: superblock
@@ -977,19 +822,20 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
* error if st_ino won't fit in target struct field. Use 32bit counter
* here to attempt to avoid that.
*/
- static DEFINE_SPINLOCK(unique_lock);
static unsigned int counter;
- struct inode_hash_bucket *b;
+ struct inode *inode;
+ struct hlist_head *head;
ino_t res;
- spin_lock(&unique_lock);
+ spin_lock(&inode_lock);
do {
if (counter <= max_reserved)
counter = max_reserved + 1;
res = counter++;
- b = inode_hashtable + hash(sb, res);
- } while (!test_inode_iunique(sb, b, res));
- spin_unlock(&unique_lock);
+ head = inode_hashtable + hash(sb, res);
+ inode = find_inode_fast(sb, head, res);
+ } while (inode != NULL);
+ spin_unlock(&inode_lock);
return res;
}
@@ -997,9 +843,7 @@ EXPORT_SYMBOL(iunique);
struct inode *igrab(struct inode *inode)
{
- struct inode *ret = inode;
-
- spin_lock(&inode->i_lock);
+ spin_lock(&inode_lock);
if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)))
__iget(inode);
else
@@ -1008,10 +852,9 @@ struct inode *igrab(struct inode *inode)
* called yet, and somebody is calling igrab
* while the inode is getting freed.
*/
- ret = NULL;
- spin_unlock(&inode->i_lock);
-
- return ret;
+ inode = NULL;
+ spin_unlock(&inode_lock);
+ return inode;
}
EXPORT_SYMBOL(igrab);
@@ -1035,20 +878,21 @@ EXPORT_SYMBOL(igrab);
* Note, @test is called with the inode_lock held, so can't sleep.
*/
static struct inode *ifind(struct super_block *sb,
- struct inode_hash_bucket *b,
- int (*test)(struct inode *, void *),
+ struct hlist_head *head, int (*test)(struct inode *, void *),
void *data, const int wait)
{
struct inode *inode;
- inode = find_inode(sb, b, test, data);
+ spin_lock(&inode_lock);
+ inode = find_inode(sb, head, test, data);
if (inode) {
__iget(inode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
if (likely(wait))
wait_on_inode(inode);
return inode;
}
+ spin_unlock(&inode_lock);
return NULL;
}
@@ -1068,18 +912,19 @@ static struct inode *ifind(struct super_block *sb,
* Otherwise NULL is returned.
*/
static struct inode *ifind_fast(struct super_block *sb,
- struct inode_hash_bucket *b,
- unsigned long ino)
+ struct hlist_head *head, unsigned long ino)
{
struct inode *inode;
- inode = find_inode_fast(sb, b, ino);
+ spin_lock(&inode_lock);
+ inode = find_inode_fast(sb, head, ino);
if (inode) {
__iget(inode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
wait_on_inode(inode);
return inode;
}
+ spin_unlock(&inode_lock);
return NULL;
}
@@ -1107,9 +952,9 @@ static struct inode *ifind_fast(struct super_block *sb,
struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
{
- struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
+ struct hlist_head *head = inode_hashtable + hash(sb, hashval);
- return ifind(sb, b, test, data, 0);
+ return ifind(sb, head, test, data, 0);
}
EXPORT_SYMBOL(ilookup5_nowait);
@@ -1135,9 +980,9 @@ EXPORT_SYMBOL(ilookup5_nowait);
struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
{
- struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
+ struct hlist_head *head = inode_hashtable + hash(sb, hashval);
- return ifind(sb, b, test, data, 1);
+ return ifind(sb, head, test, data, 1);
}
EXPORT_SYMBOL(ilookup5);
@@ -1157,9 +1002,9 @@ EXPORT_SYMBOL(ilookup5);
*/
struct inode *ilookup(struct super_block *sb, unsigned long ino)
{
- struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
+ struct hlist_head *head = inode_hashtable + hash(sb, ino);
- return ifind_fast(sb, b, ino);
+ return ifind_fast(sb, head, ino);
}
EXPORT_SYMBOL(ilookup);
@@ -1187,17 +1032,17 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *),
int (*set)(struct inode *, void *), void *data)
{
- struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
+ struct hlist_head *head = inode_hashtable + hash(sb, hashval);
struct inode *inode;
- inode = ifind(sb, b, test, data, 1);
+ inode = ifind(sb, head, test, data, 1);
if (inode)
return inode;
/*
* get_new_inode() will do the right thing, re-trying the search
* in case it had to block at any point.
*/
- return get_new_inode(sb, b, test, set, data);
+ return get_new_inode(sb, head, test, set, data);
}
EXPORT_SYMBOL(iget5_locked);
@@ -1218,17 +1063,17 @@ EXPORT_SYMBOL(iget5_locked);
*/
struct inode *iget_locked(struct super_block *sb, unsigned long ino)
{
- struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
+ struct hlist_head *head = inode_hashtable + hash(sb, ino);
struct inode *inode;
- inode = ifind_fast(sb, b, ino);
+ inode = ifind_fast(sb, head, ino);
if (inode)
return inode;
/*
* get_new_inode_fast() will do the right thing, re-trying the search
* in case it had to block at any point.
*/
- return get_new_inode_fast(sb, b, ino);
+ return get_new_inode_fast(sb, head, ino);
}
EXPORT_SYMBOL(iget_locked);
@@ -1236,37 +1081,29 @@ int insert_inode_locked(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
ino_t ino = inode->i_ino;
- struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
+ struct hlist_head *head = inode_hashtable + hash(sb, ino);
inode->i_state |= I_NEW;
while (1) {
struct hlist_node *node;
struct inode *old = NULL;
-
-repeat:
- spin_lock(&b->lock);
- hlist_for_each_entry(old, node, &b->head, i_hash) {
+ spin_lock(&inode_lock);
+ hlist_for_each_entry(old, node, head, i_hash) {
if (old->i_ino != ino)
continue;
if (old->i_sb != sb)
continue;
if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
continue;
- if (!spin_trylock(&old->i_lock)) {
- spin_unlock(&b->lock);
- goto repeat;
- }
break;
}
if (likely(!node)) {
- /* XXX: initialize inode->i_lock to locked? */
- hlist_add_head(&inode->i_hash, &b->head);
- spin_unlock(&b->lock);
+ hlist_add_head(&inode->i_hash, head);
+ spin_unlock(&inode_lock);
return 0;
}
- spin_unlock(&b->lock);
__iget(old);
- spin_unlock(&old->i_lock);
+ spin_unlock(&inode_lock);
wait_on_inode(old);
if (unlikely(!hlist_unhashed(&old->i_hash))) {
iput(old);
@@ -1281,7 +1118,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
int (*test)(struct inode *, void *), void *data)
{
struct super_block *sb = inode->i_sb;
- struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
+ struct hlist_head *head = inode_hashtable + hash(sb, hashval);
inode->i_state |= I_NEW;
@@ -1289,30 +1126,23 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
struct hlist_node *node;
struct inode *old = NULL;
-repeat:
- spin_lock(&b->lock);
- hlist_for_each_entry(old, node, &b->head, i_hash) {
+ spin_lock(&inode_lock);
+ hlist_for_each_entry(old, node, head, i_hash) {
if (old->i_sb != sb)
continue;
if (!test(old, data))
continue;
if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
continue;
- if (!spin_trylock(&old->i_lock)) {
- spin_unlock(&b->lock);
- goto repeat;
- }
break;
}
if (likely(!node)) {
- /* XXX: initialize inode->i_lock to locked? */
- hlist_add_head(&inode->i_hash, &b->head);
- spin_unlock(&b->lock);
+ hlist_add_head(&inode->i_hash, head);
+ spin_unlock(&inode_lock);
return 0;
}
- spin_unlock(&b->lock);
__iget(old);
- spin_unlock(&old->i_lock);
+ spin_unlock(&inode_lock);
wait_on_inode(old);
if (unlikely(!hlist_unhashed(&old->i_hash))) {
iput(old);
@@ -1333,32 +1163,14 @@ EXPORT_SYMBOL(insert_inode_locked4);
*/
void __insert_inode_hash(struct inode *inode, unsigned long hashval)
{
- struct inode_hash_bucket *b = inode_hashtable + hash(inode->i_sb, hashval);
-
- spin_lock(&inode->i_lock);
- spin_lock(&b->lock);
- hlist_add_head(&inode->i_hash, &b->head);
- spin_unlock(&b->lock);
- spin_unlock(&inode->i_lock);
+ struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
+ spin_lock(&inode_lock);
+ hlist_add_head(&inode->i_hash, head);
+ spin_unlock(&inode_lock);
}
EXPORT_SYMBOL(__insert_inode_hash);
/**
- * __remove_inode_hash - remove an inode from the hash
- * @inode: inode to unhash
- *
- * Remove an inode from the superblock. inode->i_lock must be
- * held.
- */
-void __remove_inode_hash(struct inode *inode)
-{
- struct inode_hash_bucket *b = inode_hashtable + hash(inode->i_sb, inode->i_ino);
- spin_lock(&b->lock);
- hlist_del_init(&inode->i_hash);
- spin_unlock(&b->lock);
-}
-
-/**
* remove_inode_hash - remove an inode from the hash
* @inode: inode to unhash
*
@@ -1366,9 +1178,9 @@ void __remove_inode_hash(struct inode *inode)
*/
void remove_inode_hash(struct inode *inode)
{
- spin_lock(&inode->i_lock);
- __remove_inode_hash(inode);
- spin_unlock(&inode->i_lock);
+ spin_lock(&inode_lock);
+ hlist_del_init(&inode->i_hash);
+ spin_unlock(&inode_lock);
}
EXPORT_SYMBOL(remove_inode_hash);
@@ -1388,16 +1200,12 @@ void generic_delete_inode(struct inode *inode)
{
const struct super_operations *op = inode->i_sb->s_op;
- if (!list_empty(&inode->i_list)) {
- spin_lock(&wb_inode_list_lock);
- list_del_init(&inode->i_list);
- spin_unlock(&wb_inode_list_lock);
- }
- inode_sb_list_del(inode);
- percpu_counter_dec(&nr_inodes);
+ list_del_init(&inode->i_list);
+ list_del_init(&inode->i_sb_list);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
- spin_unlock(&inode->i_lock);
+ inodes_stat.nr_inodes--;
+ spin_unlock(&inode_lock);
security_inode_delete(inode);
@@ -1414,15 +1222,9 @@ void generic_delete_inode(struct inode *inode)
truncate_inode_pages(&inode->i_data, 0);
clear_inode(inode);
}
- /*
- * i_lock not required to delete from hash. If there was a
- * concurrency window, then it would be possible for the other
- * thread to touch the inode after it has been freed, with
- * destroy_inode.
- * XXX: yes it is because find_inode_fast checks it. Maybe we
- * can avoid it though...
- */
- remove_inode_hash(inode);
+ spin_lock(&inode_lock);
+ hlist_del_init(&inode->i_hash);
+ spin_unlock(&inode_lock);
wake_up_inode(inode);
BUG_ON(inode->i_state != I_CLEAR);
destroy_inode(inode);
@@ -1443,36 +1245,29 @@ int generic_detach_inode(struct inode *inode)
struct super_block *sb = inode->i_sb;
if (!hlist_unhashed(&inode->i_hash)) {
- if (list_empty(&inode->i_list)) {
- spin_lock(&wb_inode_list_lock);
- list_add(&inode->i_list, &inode_unused);
- inodes_stat.nr_unused++;
- spin_unlock(&wb_inode_list_lock);
- }
+ if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+ list_move(&inode->i_list, &inode_unused);
+ inodes_stat.nr_unused++;
if (sb->s_flags & MS_ACTIVE) {
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
return 0;
}
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_WILL_FREE;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
write_inode_now(inode, 1);
- spin_lock(&inode->i_lock);
+ spin_lock(&inode_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
- __remove_inode_hash(inode);
- }
- if (!list_empty(&inode->i_list)) {
- spin_lock(&wb_inode_list_lock);
- list_del_init(&inode->i_list);
inodes_stat.nr_unused--;
- spin_unlock(&wb_inode_list_lock);
+ hlist_del_init(&inode->i_hash);
}
- inode_sb_list_del(inode);
- percpu_counter_dec(&nr_inodes);
+ list_del_init(&inode->i_list);
+ list_del_init(&inode->i_sb_list);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
- spin_unlock(&inode->i_lock);
+ inodes_stat.nr_inodes--;
+ spin_unlock(&inode_lock);
return 1;
}
EXPORT_SYMBOL_GPL(generic_detach_inode);
@@ -1537,12 +1332,8 @@ void iput(struct inode *inode)
if (inode) {
BUG_ON(inode->i_state == I_CLEAR);
- spin_lock(&inode->i_lock);
- inode->i_count--;
- if (inode->i_count == 0)
+ if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
iput_final(inode);
- else
- spin_unlock(&inode->i_lock);
}
}
EXPORT_SYMBOL(iput);
@@ -1723,8 +1514,6 @@ EXPORT_SYMBOL(inode_wait);
* wake_up_inode() after removing from the hash list will DTRT.
*
* This is called with inode_lock held.
- *
- * Called with i_lock held and returns with it dropped.
*/
static void __wait_on_freeing_inode(struct inode *inode)
{
@@ -1732,9 +1521,10 @@ static void __wait_on_freeing_inode(struct inode *inode)
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
wq = bit_waitqueue(&inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_lock);
schedule();
finish_wait(wq, &wait.wait);
+ spin_lock(&inode_lock);
}
static __initdata unsigned long ihash_entries;
@@ -1762,7 +1552,7 @@ void __init inode_init_early(void)
inode_hashtable =
alloc_large_system_hash("Inode-cache",
- sizeof(struct inode_hash_bucket),
+ sizeof(struct hlist_head),
ihash_entries,
14,
HASH_EARLY,
@@ -1770,17 +1560,14 @@ void __init inode_init_early(void)
&i_hash_mask,
0);
- for (loop = 0; loop < (1 << i_hash_shift); loop++) {
- spin_lock_init(&inode_hashtable[loop].lock);
- INIT_HLIST_HEAD(&inode_hashtable[loop].head);
- }
+ for (loop = 0; loop < (1 << i_hash_shift); loop++)
+ INIT_HLIST_HEAD(&inode_hashtable[loop]);
}
void __init inode_init(void)
{
int loop;
- percpu_counter_init(&nr_inodes, 0);
/* inode slab cache */
inode_cachep = kmem_cache_create("inode_cache",
sizeof(struct inode),
@@ -1790,17 +1577,13 @@ void __init inode_init(void)
init_once);
register_shrinker(&icache_shrinker);
- for_each_possible_cpu(loop) {
- spin_lock_init(&per_cpu(inode_cpulock, loop));
- }
-
/* Hash may have been set up in inode_init_early */
if (!hashdist)
return;
inode_hashtable =
alloc_large_system_hash("Inode-cache",
- sizeof(struct inode_hash_bucket),
+ sizeof(struct hlist_head),
ihash_entries,
14,
0,
@@ -1808,10 +1591,8 @@ void __init inode_init(void)
&i_hash_mask,
0);
- for (loop = 0; loop < (1 << i_hash_shift); loop++) {
- spin_lock_init(&inode_hashtable[loop].lock);
- INIT_HLIST_HEAD(&inode_hashtable[loop].head);
- }
+ for (loop = 0; loop < (1 << i_hash_shift); loop++)
+ INIT_HLIST_HEAD(&inode_hashtable[loop]);
}
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 66e0e4b3ad20..1de640de962c 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1311,6 +1311,7 @@ int jbd2_journal_stop(handle_t *handle)
if (handle->h_sync)
transaction->t_synchronous_commit = 1;
current->journal_info = NULL;
+ spin_lock(&journal->j_state_lock);
spin_lock(&transaction->t_handle_lock);
transaction->t_outstanding_credits -= handle->h_buffer_credits;
transaction->t_updates--;
@@ -1339,7 +1340,8 @@ int jbd2_journal_stop(handle_t *handle)
jbd_debug(2, "transaction too old, requesting commit for "
"handle %p\n", handle);
/* This is non-blocking */
- jbd2_log_start_commit(journal, transaction->t_tid);
+ __jbd2_log_start_commit(journal, transaction->t_tid);
+ spin_unlock(&journal->j_state_lock);
/*
* Special case: JBD2_SYNC synchronous updates require us
@@ -1349,6 +1351,7 @@ int jbd2_journal_stop(handle_t *handle)
err = jbd2_log_wait_commit(journal, tid);
} else {
spin_unlock(&transaction->t_handle_lock);
+ spin_unlock(&journal->j_state_lock);
}
lock_map_release(&handle->h_lockdep_map);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 40bc1705fa05..7aa4417e085f 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -287,9 +287,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
mutex_unlock(&f->sem);
d_instantiate(dentry, old_dentry->d_inode);
dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
- spin_lock(&old_dentry->d_inode->i_lock);
- old_dentry->d_inode->i_count++;
- spin_unlock(&old_dentry->d_inode->i_lock);
+ atomic_inc(&old_dentry->d_inode->i_count);
}
return ret;
}
@@ -868,9 +866,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
/* Might as well let the VFS know */
d_instantiate(new_dentry, old_dentry->d_inode);
- spin_lock(&old_dentry->d_inode->i_lock);
- old_dentry->d_inode->i_count++;
- spin_unlock(&old_dentry->d_inode->i_lock);
+ atomic_inc(&old_dentry->d_inode->i_count);
new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
return ret;
}
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 820212f3fc88..d945ea76b445 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1279,9 +1279,7 @@ int txCommit(tid_t tid, /* transaction identifier */
* lazy commit thread finishes processing
*/
if (tblk->xflag & COMMIT_DELETE) {
- spin_lock(&tblk->u.ip->i_lock);
- tblk->u.ip->i_count++;
- spin_unlock(&tblk->u.ip->i_lock);
+ atomic_inc(&tblk->u.ip->i_count);
/*
* Avoid a rare deadlock
*
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 8fa6219d0eeb..c79a4270f083 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -831,9 +831,7 @@ static int jfs_link(struct dentry *old_dentry,
ip->i_ctime = CURRENT_TIME;
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
mark_inode_dirty(dir);
- spin_lock(&ip->i_lock);
- ip->i_count++;
- spin_unlock(&ip->i_lock);
+ atomic_inc(&ip->i_count);
iplist[0] = ip;
iplist[1] = dir;
diff --git a/fs/libfs.c b/fs/libfs.c
index 98d2717e9027..6e8d17e1dc4c 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -14,11 +14,6 @@
#include <asm/uaccess.h>
-static inline int simple_positive(struct dentry *dentry)
-{
- return dentry->d_inode && !d_unhashed(dentry);
-}
-
int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
@@ -84,8 +79,7 @@ int dcache_dir_close(struct inode *inode, struct file *file)
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
{
- struct dentry *dentry = file->f_path.dentry;
- mutex_lock(&dentry->d_inode->i_mutex);
+ mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
switch (origin) {
case 1:
offset += file->f_pos;
@@ -93,7 +87,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
if (offset >= 0)
break;
default:
- mutex_unlock(&dentry->d_inode->i_mutex);
+ mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
return -EINVAL;
}
if (offset != file->f_pos) {
@@ -103,27 +97,21 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
struct dentry *cursor = file->private_data;
loff_t n = file->f_pos - 2;
- spin_lock(&dentry->d_lock);
- spin_lock_nested(&cursor->d_lock, DENTRY_D_LOCK_NESTED);
+ spin_lock(&dcache_lock);
list_del(&cursor->d_u.d_child);
- spin_unlock(&cursor->d_lock);
- p = dentry->d_subdirs.next;
- while (n && p != &dentry->d_subdirs) {
+ p = file->f_path.dentry->d_subdirs.next;
+ while (n && p != &file->f_path.dentry->d_subdirs) {
struct dentry *next;
next = list_entry(p, struct dentry, d_u.d_child);
- spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
- if (simple_positive(next))
+ if (!d_unhashed(next) && next->d_inode)
n--;
- spin_unlock(&next->d_lock);
p = p->next;
}
- spin_lock_nested(&cursor->d_lock, DENTRY_D_LOCK_NESTED);
list_add_tail(&cursor->d_u.d_child, p);
- spin_unlock(&cursor->d_lock);
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
}
}
- mutex_unlock(&dentry->d_inode->i_mutex);
+ mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
return offset;
}
@@ -163,38 +151,29 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
i++;
/* fallthrough */
default:
- spin_lock(&dentry->d_lock);
- if (filp->f_pos == 2) {
- spin_lock_nested(&cursor->d_lock, DENTRY_D_LOCK_NESTED);
+ spin_lock(&dcache_lock);
+ if (filp->f_pos == 2)
list_move(q, &dentry->d_subdirs);
- spin_unlock(&cursor->d_lock);
- }
for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
struct dentry *next;
next = list_entry(p, struct dentry, d_u.d_child);
- spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
- if (!simple_positive(next)) {
- spin_unlock(&next->d_lock);
+ if (d_unhashed(next) || !next->d_inode)
continue;
- }
- spin_unlock(&next->d_lock);
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
if (filldir(dirent, next->d_name.name,
next->d_name.len, filp->f_pos,
next->d_inode->i_ino,
dt_type(next->d_inode)) < 0)
return 0;
- spin_lock(&dentry->d_lock);
- spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
+ spin_lock(&dcache_lock);
/* next is still alive */
list_move(q, p);
- spin_unlock(&next->d_lock);
p = q;
filp->f_pos++;
}
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
}
return 0;
}
@@ -265,8 +244,6 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
d_instantiate(dentry, root);
s->s_root = dentry;
s->s_flags |= MS_ACTIVE;
- WARN_ON(mnt->mnt_flags & MNT_MOUNTED);
- mnt->mnt_flags |= MNT_MOUNTED;
simple_set_mnt(mnt, s);
return 0;
@@ -281,31 +258,29 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
inc_nlink(inode);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
dget(dentry);
d_instantiate(dentry, inode);
return 0;
}
+static inline int simple_positive(struct dentry *dentry)
+{
+ return dentry->d_inode && !d_unhashed(dentry);
+}
+
int simple_empty(struct dentry *dentry)
{
struct dentry *child;
int ret = 0;
- spin_lock(&dentry->d_lock);
- list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
- spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
- if (simple_positive(child)) {
- spin_unlock(&child->d_lock);
+ spin_lock(&dcache_lock);
+ list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
+ if (simple_positive(child))
goto out;
- }
- spin_unlock(&child->d_lock);
- }
ret = 1;
out:
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
return ret;
}
diff --git a/fs/locks.c b/fs/locks.c
index 9fdd6796e0ec..a8794f233bc9 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1374,7 +1374,8 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
goto out;
if ((arg == F_WRLCK)
- && (atomic_read(&dentry->d_count) > 1 || inode->i_count > 1))
+ && ((atomic_read(&dentry->d_count) > 1)
+ || (atomic_read(&inode->i_count) > 1)))
goto out;
}
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 1b0b9f081ffa..32b131cd6121 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -103,9 +103,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
return add_nondir(dentry, inode);
}
diff --git a/fs/namei.c b/fs/namei.c
index 488a6c07235e..a4855af776a8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -198,29 +198,6 @@ static int acl_permission_check(struct inode *inode, int mask,
return -EACCES;
}
-static int acl_permission_check_rcu(struct inode *inode, int mask,
- int (*check_acl)(struct inode *inode, int mask))
-{
- umode_t mode = inode->i_mode;
-
- mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
-
- if (current_fsuid() == inode->i_uid)
- mode >>= 6;
- else {
- if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl)
- return -EAGAIN;
- if (in_group_p(inode->i_gid))
- mode >>= 3;
- }
-
- /*
- * If the DACs are ok we don't need any capability check.
- */
- if ((mask & ~mode) == 0)
- return 0;
- return -EACCES;
-}
/**
* generic_permission - check for access rights on a Posix-like filesystem
* @inode: inode to check access rights for
@@ -506,26 +483,6 @@ ok:
return security_inode_permission(inode, MAY_EXEC);
}
-static int exec_permission_lite_rcu(struct inode *inode)
-{
- int ret;
-
- if (inode->i_op->permission)
- return -EAGAIN;
- ret = acl_permission_check_rcu(inode, MAY_EXEC, inode->i_op->check_acl);
- if (ret == -EAGAIN)
- return ret;
- if (!ret)
- goto ok;
-
- if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
- goto ok;
-
- return ret;
-ok:
- return security_inode_permission(inode, MAY_EXEC);
-}
-
static __always_inline void set_root(struct nameidata *nd)
{
if (!nd->root.mnt) {
@@ -538,15 +495,6 @@ static __always_inline void set_root(struct nameidata *nd)
}
static int link_path_walk(const char *, struct nameidata *);
-static __always_inline void set_root_rcu(struct nameidata *nd)
-{
- if (!nd->root.mnt) {
- struct fs_struct *fs = current->fs;
- read_lock(&fs->lock);
- nd->root = fs->root;
- read_unlock(&fs->lock);
- }
-}
static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
{
@@ -590,12 +538,6 @@ static void path_put_conditional(struct path *path, struct nameidata *nd)
mntput(path->mnt);
}
-static inline void path_to_nameidata_rcu(struct path *path, struct nameidata *nd)
-{
- nd->path.mnt = path->mnt;
- nd->path.dentry = path->dentry;
-}
-
static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
{
dput(nd->path.dentry);
@@ -675,18 +617,15 @@ int follow_up(struct path *path)
{
struct vfsmount *parent;
struct dentry *mountpoint;
- int cpu = get_cpu();
- put_cpu();
-
- vfsmount_read_lock(cpu);
+ spin_lock(&vfsmount_lock);
parent = path->mnt->mnt_parent;
if (parent == path->mnt) {
- vfsmount_read_unlock(cpu);
+ spin_unlock(&vfsmount_lock);
return 0;
}
mntget(parent);
mountpoint = dget(path->mnt->mnt_mountpoint);
- vfsmount_read_unlock(cpu);
+ spin_unlock(&vfsmount_lock);
dput(path->dentry);
path->dentry = mountpoint;
mntput(path->mnt);
@@ -697,21 +636,6 @@ int follow_up(struct path *path)
/* no need for dcache_lock, as serialization is taken care in
* namespace.c
*/
-static int __follow_mount_rcu(struct path *path)
-{
- int res = 0;
- while (d_mountpoint(path->dentry)) {
- struct vfsmount *mounted;
- mounted = __lookup_mnt(path->mnt, path->dentry, 1);
- if (!mounted)
- break;
- path->mnt = mounted;
- path->dentry = mounted->mnt_root;
- res = 1;
- }
- return res;
-}
-
static int __follow_mount(struct path *path)
{
int res = 0;
@@ -762,8 +686,6 @@ int follow_down(struct path *path)
static __always_inline void follow_dotdot(struct nameidata *nd)
{
- int cpu = get_cpu();
- put_cpu();
set_root(nd);
while(1) {
@@ -774,20 +696,23 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
nd->path.mnt == nd->root.mnt) {
break;
}
+ spin_lock(&dcache_lock);
if (nd->path.dentry != nd->path.mnt->mnt_root) {
nd->path.dentry = dget(nd->path.dentry->d_parent);
+ spin_unlock(&dcache_lock);
dput(old);
break;
}
- vfsmount_read_lock(cpu);
+ spin_unlock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
parent = nd->path.mnt->mnt_parent;
if (parent == nd->path.mnt) {
- vfsmount_read_unlock(cpu);
+ spin_unlock(&vfsmount_lock);
break;
}
mntget(parent);
nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint);
- vfsmount_read_unlock(cpu);
+ spin_unlock(&vfsmount_lock);
dput(old);
mntput(nd->path.mnt);
nd->path.mnt = parent;
@@ -800,24 +725,6 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
* small and for now I'd prefer to have fast path as straight as possible.
* It _is_ time-critical.
*/
-static int do_lookup_rcu(struct nameidata *nd, struct qstr *name,
- struct path *path)
-{
- struct vfsmount *mnt = nd->path.mnt;
- struct dentry *dentry;
-
- dentry = __d_lookup_rcu(nd->path.dentry, name);
-
- if (!dentry)
- return -EAGAIN;
- if (dentry->d_op && dentry->d_op->d_revalidate)
- return -EAGAIN;
- path->mnt = mnt;
- path->dentry = dentry;
- __follow_mount_rcu(path);
- return 0;
-}
-
static int do_lookup(struct nameidata *nd, struct qstr *name,
struct path *path)
{
@@ -915,134 +822,6 @@ fail:
return PTR_ERR(dentry);
}
-static noinline int link_path_walk_rcu(const char *name, struct nameidata *nd, struct path *next)
-{
- struct inode *inode;
- unsigned int lookup_flags = nd->flags;
-
- while (*name=='/')
- name++;
- if (!*name)
- goto return_reval;
-
- inode = nd->path.dentry->d_inode;
- if (nd->depth)
- lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
-
- /* At this point we know we have a real path component. */
- for(;;) {
- unsigned long hash;
- struct qstr this;
- unsigned int c;
-
- nd->flags |= LOOKUP_CONTINUE;
- if (exec_permission_lite_rcu(inode))
- return -EAGAIN;
-
- this.name = name;
- c = *(const unsigned char *)name;
-
- hash = init_name_hash();
- do {
- name++;
- hash = partial_name_hash(c, hash);
- c = *(const unsigned char *)name;
- } while (c && (c != '/'));
- this.len = name - (const char *) this.name;
- this.hash = end_name_hash(hash);
-
- /* remove trailing slashes? */
- if (!c)
- goto last_component;
- while (*++name == '/');
- if (!*name)
- goto last_with_slashes;
-
- if (this.name[0] == '.') switch (this.len) {
- default:
- break;
- case 2:
- if (this.name[1] != '.')
- break;
- return -EAGAIN;
- case 1:
- continue;
- }
- if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash)
- return -EAGAIN;
- /* This does the actual lookups.. */
- if (do_lookup_rcu(nd, &this, next))
- return -EAGAIN;
-
- inode = next->dentry->d_inode;
- if (!inode)
- return -ENOENT;
- if (inode->i_op->follow_link)
- return -EAGAIN;
- path_to_nameidata_rcu(next, nd);
- if (!inode->i_op->lookup)
- return -ENOTDIR;
- continue;
- /* here ends the main loop */
-
-last_with_slashes:
- lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
-last_component:
- /* Clear LOOKUP_CONTINUE iff it was previously unset */
- nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
- if (lookup_flags & LOOKUP_PARENT)
- return -EAGAIN;
- if (this.name[0] == '.') switch (this.len) {
- default:
- break;
- case 2:
- if (this.name[1] != '.')
- break;
- return -EAGAIN;
- case 1:
- goto return_reval;
- }
- if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash)
- return -EAGAIN;
- if (do_lookup_rcu(nd, &this, next))
- return -EAGAIN;
- inode = next->dentry->d_inode;
- if ((lookup_flags & LOOKUP_FOLLOW)
- && inode && inode->i_op->follow_link)
- return -EAGAIN;
-
- path_to_nameidata_rcu(next, nd);
- if (!inode)
- return -ENOENT;
- if (lookup_flags & LOOKUP_DIRECTORY) {
- if (!inode->i_op->lookup)
- return -ENOTDIR;
- }
- goto return_base;
- }
-return_reval:
- /*
- * We bypassed the ordinary revalidation routines.
- * We may need to check the cached dentry for staleness.
- */
- if (nd->path.dentry && nd->path.dentry->d_sb &&
- (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))
- return -EAGAIN;
-return_base:
- spin_lock(&nd->path.dentry->d_lock);
- if (d_unhashed(nd->path.dentry)) {
- spin_unlock(&nd->path.dentry->d_lock);
- return -EAGAIN;
- }
- if (!nd->path.dentry->d_inode) {
- spin_unlock(&nd->path.dentry->d_lock);
- return -EAGAIN;
- }
- atomic_inc(&nd->path.dentry->d_count);
- spin_unlock(&nd->path.dentry->d_lock);
- return 0;
-}
-
/*
* This is a temporary kludge to deal with "automount" symlinks; proper
* solution is to trigger them on follow_mount(), so that do_lookup()
@@ -1116,7 +895,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
if (this.name[0] == '.') switch (this.len) {
default:
break;
- case 2:
+ case 2:
if (this.name[1] != '.')
break;
follow_dotdot(nd);
@@ -1161,7 +940,7 @@ last_component:
if (this.name[0] == '.') switch (this.len) {
default:
break;
- case 2:
+ case 2:
if (this.name[1] != '.')
break;
follow_dotdot(nd);
@@ -1225,19 +1004,6 @@ return_err:
return err;
}
-static int path_walk_rcu(const char *name, struct nameidata *nd)
-{
- struct path save = nd->path;
- struct path path = {.mnt = NULL};
- int err;
-
- current->total_link_count = 0;
- err = link_path_walk_rcu(name, nd, &path);
- if (unlikely(err == -EAGAIN))
- nd->path = save;
- return err;
-}
-
static int path_walk(const char *name, struct nameidata *nd)
{
struct path save = nd->path;
@@ -1263,55 +1029,6 @@ static int path_walk(const char *name, struct nameidata *nd)
return result;
}
-static noinline int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
-{
- int retval = 0;
- int fput_needed;
- struct file *file;
-
- nd->last_type = LAST_ROOT; /* if there are only slashes... */
- nd->flags = flags;
- nd->depth = 0;
- nd->root.mnt = NULL;
-
- if (*name=='/') {
- set_root_rcu(nd);
- nd->path = nd->root;
- } else if (dfd == AT_FDCWD) {
- struct fs_struct *fs = current->fs;
- read_lock(&fs->lock);
- nd->path = fs->pwd;
- read_unlock(&fs->lock);
- } else {
- struct dentry *dentry;
-
- file = fget_light(dfd, &fput_needed);
- retval = -EBADF;
- if (!file)
- goto out_fail;
-
- dentry = file->f_path.dentry;
-
- retval = -ENOTDIR;
- if (!S_ISDIR(dentry->d_inode->i_mode))
- goto fput_fail;
-
- retval = file_permission(file, MAY_EXEC);
- if (retval)
- goto fput_fail;
-
- nd->path = file->f_path;
-
- fput_light(file, fput_needed);
- }
- return 0;
-
-fput_fail:
- fput_light(file, fput_needed);
-out_fail:
- return retval;
-}
-
static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
{
int retval = 0;
@@ -1368,51 +1085,16 @@ out_fail:
static int do_path_lookup(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
- int retval;
- int cpu = get_cpu();
- put_cpu();
-
- vfsmount_read_lock(cpu);
- rcu_read_lock();
- retval = path_init_rcu(dfd, name, flags, nd);
- if (unlikely(retval)) {
- rcu_read_unlock();
- vfsmount_read_unlock(cpu);
- return retval;
- }
- retval = path_walk_rcu(name, nd);
- rcu_read_unlock();
- if (likely(!retval))
- mntget(nd->path.mnt);
- vfsmount_read_unlock(cpu);
- if (likely(!retval)) {
- if (unlikely(!audit_dummy_context())) {
- if (nd->path.dentry && nd->path.dentry->d_inode)
- audit_inode(name, nd->path.dentry);
- }
- }
- if (nd->root.mnt)
- nd->root.mnt = NULL;
-
- if (unlikely(retval == -EAGAIN)) {
- /* slower, locked walk */
- retval = path_init(dfd, name, flags, nd);
- if (unlikely(retval))
- return retval;
+ int retval = path_init(dfd, name, flags, nd);
+ if (!retval)
retval = path_walk(name, nd);
- if (likely(!retval)) {
- if (unlikely(!audit_dummy_context())) {
- if (nd->path.dentry && nd->path.dentry->d_inode)
- audit_inode(name, nd->path.dentry);
- }
- }
-
- if (nd->root.mnt) {
- path_put(&nd->root);
- nd->root.mnt = NULL;
- }
+ if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
+ nd->path.dentry->d_inode))
+ audit_inode(name, nd->path.dentry);
+ if (nd->root.mnt) {
+ path_put(&nd->root);
+ nd->root.mnt = NULL;
}
-
return retval;
}
@@ -2481,10 +2163,12 @@ void dentry_unhash(struct dentry *dentry)
{
dget(dentry);
shrink_dcache_parent(dentry);
+ spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
if (atomic_read(&dentry->d_count) == 2)
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
}
int vfs_rmdir(struct inode *dir, struct dentry *dentry)
@@ -2637,11 +2321,8 @@ static long do_unlinkat(int dfd, const char __user *pathname)
if (nd.last.name[nd.last.len])
goto slashes;
inode = dentry->d_inode;
- if (inode) {
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
- }
+ if (inode)
+ atomic_inc(&inode->i_count);
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit2;
diff --git a/fs/namespace.c b/fs/namespace.c
index ad39c7f2d292..962fd96dbe4c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -11,8 +11,6 @@
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <linux/percpu.h>
#include <linux/smp_lock.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -39,16 +37,12 @@
#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
#define HASH_SIZE (1UL << HASH_SHIFT)
-/*
- * vfsmount "brlock" style spinlock for vfsmount related operations, use
- * vfsmount_read_lock/vfsmount_write_lock functions.
- */
-static DEFINE_PER_CPU(spinlock_t, vfsmount_lock);
+/* spinlock for vfsmount related operations, inplace of dcache_lock */
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
static int event;
static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida);
-static DEFINE_SPINLOCK(mnt_id_lock);
static int mnt_id_start = 0;
static int mnt_group_start = 1;
@@ -60,48 +54,6 @@ static struct rw_semaphore namespace_sem;
struct kobject *fs_kobj;
EXPORT_SYMBOL_GPL(fs_kobj);
-void vfsmount_read_lock(int cpu)
-{
- spinlock_t *lock;
-
- lock = &per_cpu(vfsmount_lock, cpu);
- spin_lock(lock);
-}
-
-void vfsmount_read_unlock(int cpu)
-{
- spinlock_t *lock;
-
- lock = &per_cpu(vfsmount_lock, cpu);
- spin_unlock(lock);
-}
-
-void vfsmount_write_lock(void)
-{
- int i;
- int nr = 0;
-
- for_each_possible_cpu(i) {
- spinlock_t *lock;
-
- lock = &per_cpu(vfsmount_lock, i);
- spin_lock_nested(lock, nr);
- nr++;
- }
-}
-
-void vfsmount_write_unlock(void)
-{
- int i;
-
- for_each_possible_cpu(i) {
- spinlock_t *lock;
-
- lock = &per_cpu(vfsmount_lock, i);
- spin_unlock(lock);
- }
-}
-
static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
{
unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
@@ -112,21 +64,18 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
-/*
- * allocation is serialized by namespace_sem, but we need the spinlock to
- * serialise with freeing.
- */
+/* allocation is serialized by namespace_sem */
static int mnt_alloc_id(struct vfsmount *mnt)
{
int res;
retry:
ida_pre_get(&mnt_id_ida, GFP_KERNEL);
- spin_lock(&mnt_id_lock);
+ spin_lock(&vfsmount_lock);
res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
if (!res)
mnt_id_start = mnt->mnt_id + 1;
- spin_unlock(&mnt_id_lock);
+ spin_unlock(&vfsmount_lock);
if (res == -EAGAIN)
goto retry;
@@ -136,11 +85,11 @@ retry:
static void mnt_free_id(struct vfsmount *mnt)
{
int id = mnt->mnt_id;
- spin_lock(&mnt_id_lock);
+ spin_lock(&vfsmount_lock);
ida_remove(&mnt_id_ida, id);
if (mnt_id_start > id)
mnt_id_start = id;
- spin_unlock(&mnt_id_lock);
+ spin_unlock(&vfsmount_lock);
}
/*
@@ -176,49 +125,6 @@ void mnt_release_group_id(struct vfsmount *mnt)
mnt->mnt_group_id = 0;
}
-static inline void add_mnt_count(struct vfsmount *mnt, int n)
-{
-#ifdef CONFIG_SMP
- (*per_cpu_ptr(mnt->mnt_count, smp_processor_id())) += n;
-#else
- mnt->mnt_count += n;
-#endif
-}
-
-static inline void inc_mnt_count(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
- (*per_cpu_ptr(mnt->mnt_count, smp_processor_id()))++;
-#else
- mnt->mnt_count++;
-#endif
-}
-
-static inline void dec_mnt_count(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
- (*per_cpu_ptr(mnt->mnt_count, smp_processor_id()))--;
-#else
- mnt->mnt_count--;
-#endif
-}
-
-unsigned int count_mnt_count(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
- unsigned int count = 0;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- count += *per_cpu_ptr(mnt->mnt_count, cpu);
- }
-
- return count;
-#else
- return mnt->mnt_count;
-#endif
-}
-
struct vfsmount *alloc_vfsmnt(const char *name)
{
struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -235,13 +141,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
goto out_free_id;
}
-#ifdef CONFIG_SMP
- mnt->mnt_count = alloc_percpu(int);
- if (!mnt->mnt_count)
- goto out_free_devname;
-#else
- mnt->mnt_count = 0;
-#endif
+ atomic_set(&mnt->mnt_count, 1);
INIT_LIST_HEAD(&mnt->mnt_hash);
INIT_LIST_HEAD(&mnt->mnt_child);
INIT_LIST_HEAD(&mnt->mnt_mounts);
@@ -253,19 +153,14 @@ struct vfsmount *alloc_vfsmnt(const char *name)
#ifdef CONFIG_SMP
mnt->mnt_writers = alloc_percpu(int);
if (!mnt->mnt_writers)
- goto out_free_mntcount;
+ goto out_free_devname;
#else
mnt->mnt_writers = 0;
#endif
- preempt_disable();
- inc_mnt_count(mnt);
- preempt_enable();
}
return mnt;
#ifdef CONFIG_SMP
-out_free_mntcount:
- free_percpu(mnt->mnt_count);
out_free_devname:
kfree(mnt->mnt_devname);
#endif
@@ -376,8 +271,8 @@ int mnt_want_write(struct vfsmount *mnt)
* held by mnt_make_readonly(). Works on !RT as well.
*/
while (mnt->mnt_flags & MNT_WRITE_HOLD) {
- vfsmount_write_lock();
- vfsmount_write_unlock();
+ spin_lock(&vfsmount_lock);
+ spin_unlock(&vfsmount_lock);
}
/*
* After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
@@ -456,7 +351,7 @@ static int mnt_make_readonly(struct vfsmount *mnt)
{
int ret = 0;
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
mnt->mnt_flags |= MNT_WRITE_HOLD;
/*
* After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -490,15 +385,15 @@ static int mnt_make_readonly(struct vfsmount *mnt)
*/
smp_wmb();
mnt->mnt_flags &= ~MNT_WRITE_HOLD;
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
return ret;
}
static void __mnt_unmake_readonly(struct vfsmount *mnt)
{
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
mnt->mnt_flags &= ~MNT_READONLY;
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
}
void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
@@ -551,13 +446,10 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
struct vfsmount *lookup_mnt(struct path *path)
{
struct vfsmount *child_mnt;
- int cpu = get_cpu();
- put_cpu();
-
- vfsmount_read_lock(cpu);
+ spin_lock(&vfsmount_lock);
if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
mntget(child_mnt);
- vfsmount_read_unlock(cpu);
+ spin_unlock(&vfsmount_lock);
return child_mnt;
}
@@ -582,16 +474,6 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
}
}
-static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry)
-{
- if (!__lookup_mnt(mnt, dentry, 0)) {
- spin_lock(&dentry->d_lock);
- WARN_ON(dentry->d_mounted == 0);
- dentry->d_mounted--;
- spin_unlock(&dentry->d_lock);
- }
-}
-
static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
{
old_path->dentry = mnt->mnt_mountpoint;
@@ -600,19 +482,15 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
mnt->mnt_mountpoint = mnt->mnt_root;
list_del_init(&mnt->mnt_child);
list_del_init(&mnt->mnt_hash);
- dentry_reset_mounted(old_path->mnt, old_path->dentry);
- WARN_ON(!(mnt->mnt_flags & MNT_MOUNTED));
- mnt->mnt_flags &= ~MNT_MOUNTED;
+ old_path->dentry->d_mounted--;
}
void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
struct vfsmount *child_mnt)
{
child_mnt->mnt_parent = mntget(mnt);
- spin_lock(&dentry->d_lock);
- child_mnt->mnt_mountpoint = dget_dlock(dentry);
+ child_mnt->mnt_mountpoint = dget(dentry);
dentry->d_mounted++;
- spin_unlock(&dentry->d_lock);
}
static void attach_mnt(struct vfsmount *mnt, struct path *path)
@@ -621,8 +499,6 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path)
list_add_tail(&mnt->mnt_hash, mount_hashtable +
hash(path->mnt, path->dentry));
list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
- WARN_ON(mnt->mnt_flags & MNT_MOUNTED);
- mnt->mnt_flags |= MNT_MOUNTED;
}
/*
@@ -645,8 +521,6 @@ static void commit_tree(struct vfsmount *mnt)
list_add_tail(&mnt->mnt_hash, mount_hashtable +
hash(parent, mnt->mnt_mountpoint));
list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
- WARN_ON(mnt->mnt_flags & MNT_MOUNTED);
- mnt->mnt_flags |= MNT_MOUNTED;
touch_mnt_namespace(n);
}
@@ -694,7 +568,7 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
goto out_free;
}
- mnt->mnt_flags = (old->mnt_flags & ~MNT_MOUNTED);
+ mnt->mnt_flags = old->mnt_flags;
atomic_inc(&sb->s_active);
mnt->mnt_sb = sb;
mnt->mnt_root = dget(root);
@@ -750,92 +624,43 @@ static inline void __mntput(struct vfsmount *mnt)
void mntput_no_expire(struct vfsmount *mnt)
{
- int cpu = get_cpu();
- put_cpu();
repeat:
- if (likely(mnt->mnt_flags & MNT_MOUNTED)) {
- vfsmount_read_lock(cpu);
- if (unlikely(!(mnt->mnt_flags & MNT_MOUNTED))) {
- vfsmount_read_unlock(cpu);
- goto repeat;
+ if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
+ if (likely(!mnt->mnt_pinned)) {
+ spin_unlock(&vfsmount_lock);
+ __mntput(mnt);
+ return;
}
- preempt_disable();
- dec_mnt_count(mnt);
- preempt_enable();
- vfsmount_read_unlock(cpu);
-
- return;
- }
-
- vfsmount_write_lock();
- if (unlikely((mnt->mnt_flags & MNT_MOUNTED))) {
- vfsmount_write_unlock();
+ atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
+ mnt->mnt_pinned = 0;
+ spin_unlock(&vfsmount_lock);
+ acct_auto_close_mnt(mnt);
+ security_sb_umount_close(mnt);
goto repeat;
}
- preempt_disable();
- dec_mnt_count(mnt);
- preempt_enable();
- if (count_mnt_count(mnt)) {
- vfsmount_write_unlock();
- return;
- }
- if (likely(!mnt->mnt_pinned)) {
- vfsmount_write_unlock();
- __mntput(mnt);
- return;
- }
- preempt_disable();
- add_mnt_count(mnt, mnt->mnt_pinned + 1);
- preempt_enable();
- mnt->mnt_pinned = 0;
- vfsmount_write_unlock();
- acct_auto_close_mnt(mnt);
- security_sb_umount_close(mnt);
- goto repeat;
-}
-EXPORT_SYMBOL(mntput_no_expire);
-
-void mntput(struct vfsmount *mnt)
-{
- if (mnt) {
- /* avoid cacheline pingpong */
- if (unlikely(mnt->mnt_expiry_mark))
- mnt->mnt_expiry_mark = 0;
- mntput_no_expire(mnt);
- }
}
-EXPORT_SYMBOL(mntput);
-struct vfsmount *mntget(struct vfsmount *mnt)
-{
- if (mnt) {
- preempt_disable();
- inc_mnt_count(mnt);
- preempt_enable();
- }
- return mnt;
-}
-EXPORT_SYMBOL(mntget);
+EXPORT_SYMBOL(mntput_no_expire);
void mnt_pin(struct vfsmount *mnt)
{
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
mnt->mnt_pinned++;
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
}
+
EXPORT_SYMBOL(mnt_pin);
void mnt_unpin(struct vfsmount *mnt)
{
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
if (mnt->mnt_pinned) {
- preempt_disable();
- inc_mnt_count(mnt);
- preempt_enable();
+ atomic_inc(&mnt->mnt_count);
mnt->mnt_pinned--;
}
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
}
+
EXPORT_SYMBOL(mnt_unpin);
static inline void mangle(struct seq_file *m, const char *s)
@@ -1116,13 +941,12 @@ int may_umount_tree(struct vfsmount *mnt)
int minimum_refs = 0;
struct vfsmount *p;
- /* write lock needed for count_mnt_count */
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
for (p = mnt; p; p = next_mnt(p, mnt)) {
- actual_refs += count_mnt_count(p);
+ actual_refs += atomic_read(&p->mnt_count);
minimum_refs += 2;
}
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
if (actual_refs > minimum_refs)
return 0;
@@ -1149,12 +973,11 @@ int may_umount(struct vfsmount *mnt)
{
int ret = 1;
down_read(&namespace_sem);
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
if (propagate_mount_busy(mnt, 2))
ret = 0;
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
up_read(&namespace_sem);
-
return ret;
}
@@ -1169,14 +992,13 @@ void release_mounts(struct list_head *head)
if (mnt->mnt_parent != mnt) {
struct dentry *dentry;
struct vfsmount *m;
-
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
dentry = mnt->mnt_mountpoint;
m = mnt->mnt_parent;
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
m->mnt_ghosts--;
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
dput(dentry);
mntput(m);
}
@@ -1200,11 +1022,9 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
__touch_mnt_namespace(p->mnt_ns);
p->mnt_ns = NULL;
list_del_init(&p->mnt_child);
- WARN_ON(!(p->mnt_flags & MNT_MOUNTED));
- p->mnt_flags &= ~MNT_MOUNTED;
if (p->mnt_parent != p) {
p->mnt_parent->mnt_ghosts++;
- dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint);
+ p->mnt_mountpoint->d_mounted--;
}
change_mnt_propagation(p, MS_PRIVATE);
}
@@ -1233,16 +1053,8 @@ static int do_umount(struct vfsmount *mnt, int flags)
flags & (MNT_FORCE | MNT_DETACH))
return -EINVAL;
- /*
- * probably don't strictly need the lock here if we examined
- * all race cases, but it's a slowpath.
- */
- vfsmount_write_lock();
- if (count_mnt_count(mnt) != 2) {
- vfsmount_write_unlock();
+ if (atomic_read(&mnt->mnt_count) != 2)
return -EBUSY;
- }
- vfsmount_write_unlock();
if (!xchg(&mnt->mnt_expiry_mark, 1))
return -EAGAIN;
@@ -1284,7 +1096,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
}
down_write(&namespace_sem);
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
event++;
if (!(flags & MNT_DETACH))
@@ -1296,7 +1108,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
umount_tree(mnt, 1, &umount_list);
retval = 0;
}
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
if (retval)
security_sb_umount_busy(mnt);
up_write(&namespace_sem);
@@ -1383,13 +1195,6 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
goto Enomem;
q->mnt_mountpoint = mnt->mnt_mountpoint;
- /*
- * We don't call attach_mnt on a cloned rootfs, so set it as
- * mounted here.
- */
- WARN_ON(q->mnt_flags & MNT_MOUNTED);
- q->mnt_flags |= MNT_MOUNTED;
-
p = mnt;
list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
if (!is_subdir(r->mnt_mountpoint, dentry))
@@ -1410,19 +1215,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
q = clone_mnt(p, p->mnt_root, flag);
if (!q)
goto Enomem;
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
list_add_tail(&q->mnt_list, &res->mnt_list);
attach_mnt(q, &path);
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
}
}
return res;
Enomem:
if (res) {
LIST_HEAD(umount_list);
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
umount_tree(res, 0, &umount_list);
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
release_mounts(&umount_list);
}
return NULL;
@@ -1441,9 +1246,9 @@ void drop_collected_mounts(struct vfsmount *mnt)
{
LIST_HEAD(umount_list);
down_write(&namespace_sem);
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
umount_tree(mnt, 0, &umount_list);
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
up_write(&namespace_sem);
release_mounts(&umount_list);
}
@@ -1556,13 +1361,12 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
if (err)
goto out_cleanup_ids;
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
if (IS_MNT_SHARED(dest_mnt)) {
for (p = source_mnt; p; p = next_mnt(p, source_mnt))
set_mnt_shared(p);
}
-
if (parent_path) {
detach_mnt(source_mnt, parent_path);
attach_mnt(source_mnt, path);
@@ -1576,8 +1380,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
list_del_init(&child->mnt_hash);
commit_tree(child);
}
- vfsmount_write_unlock();
-
+ spin_unlock(&vfsmount_lock);
return 0;
out_cleanup_ids:
@@ -1639,10 +1442,10 @@ static int do_change_type(struct path *path, int flag)
goto out_unlock;
}
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
change_mnt_propagation(m, type);
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
out_unlock:
up_write(&namespace_sem);
@@ -1675,13 +1478,9 @@ static int do_loopback(struct path *path, char *old_name,
goto out;
err = -ENOMEM;
- if (recurse) {
+ if (recurse)
mnt = copy_tree(old_path.mnt, old_path.dentry, 0);
- /* Annoying. Since we graft the rootfs, we need to unmark
- * it as mounted. */
- WARN_ON(!(mnt->mnt_flags & MNT_MOUNTED));
- mnt->mnt_flags &= ~MNT_MOUNTED;
- } else
+ else
mnt = clone_mnt(old_path.mnt, old_path.dentry, 0);
if (!mnt)
@@ -1690,10 +1489,9 @@ static int do_loopback(struct path *path, char *old_name,
err = graft_tree(mnt, path);
if (err) {
LIST_HEAD(umount_list);
-
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
umount_tree(mnt, 0, &umount_list);
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
release_mounts(&umount_list);
}
@@ -1746,19 +1544,18 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
else
err = do_remount_sb(sb, flags, data, 0);
if (!err) {
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
- mnt_flags |= path->mnt->mnt_flags & MNT_MOUNTED;
path->mnt->mnt_flags = mnt_flags;
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
}
up_write(&sb->s_umount);
if (!err) {
security_sb_post_remount(path->mnt, flags, data);
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
touch_mnt_namespace(path->mnt->mnt_ns);
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
}
return err;
}
@@ -1935,7 +1732,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
return;
down_write(&namespace_sem);
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
/* extract from the expiration list every vfsmount that matches the
* following criteria:
@@ -1954,7 +1751,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
touch_mnt_namespace(mnt->mnt_ns);
umount_tree(mnt, 1, &umounts);
}
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
up_write(&namespace_sem);
release_mounts(&umounts);
@@ -2229,9 +2026,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
kfree(new_ns);
return ERR_PTR(-ENOMEM);
}
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
/*
* Second pass: switch the tsk->fs->* elements and mark new vfsmounts
@@ -2428,7 +2225,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
goto out2; /* not attached */
/* make sure we can reach put_old from new_root */
tmp = old.mnt;
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
if (tmp != new.mnt) {
for (;;) {
if (tmp->mnt_parent == tmp)
@@ -2448,7 +2245,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
/* mount new_root on / */
attach_mnt(new.mnt, &root_parent);
touch_mnt_namespace(current->nsproxy->mnt_ns);
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
chroot_fs_refs(&root, &new);
security_sb_post_pivotroot(&root, &new);
error = 0;
@@ -2464,7 +2261,7 @@ out1:
out0:
return error;
out3:
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
goto out2;
}
@@ -2494,7 +2291,6 @@ static void __init init_mount_tree(void)
void __init mnt_init(void)
{
unsigned u;
- int i;
int err;
init_rwsem(&namespace_sem);
@@ -2512,9 +2308,6 @@ void __init mnt_init(void)
for (u = 0; u < HASH_SIZE; u++)
INIT_LIST_HEAD(&mount_hashtable[u]);
- for_each_possible_cpu(i)
- spin_lock_init(&per_cpu(vfsmount_lock, i));
-
err = sysfs_init();
if (err)
printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2531,30 +2324,15 @@ void put_mnt_ns(struct mnt_namespace *ns)
struct vfsmount *root;
LIST_HEAD(umount_list);
- /*
- * We open code this to avoid vfsmount_write_lock() in case of
- * ns->count > 1
- */
- if (atomic_add_unless(&ns->count, -1, 1))
- return;
-
- /*
- * Do the full locking here as it's likely that ns->count will
- * drop to zero and we have to take namespace_sem and all vfs
- * mount locks anyway for umount_tree().
- */
- down_write(&namespace_sem);
- vfsmount_write_lock();
- if (!atomic_dec_and_test(&ns->count)) {
- vfsmount_write_unlock();
- up_write(&namespace_sem);
+ if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock))
return;
- }
root = ns->root;
ns->root = NULL;
-
+ spin_unlock(&vfsmount_lock);
+ down_write(&namespace_sem);
+ spin_lock(&vfsmount_lock);
umount_tree(root, 0, &umount_list);
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
up_write(&namespace_sem);
release_mounts(&umount_list);
kfree(ns);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 0a5210802874..b8b5b30d53f0 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -364,21 +364,21 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
}
/* If a pointer is invalid, we search the dentry. */
- spin_lock(&parent->d_lock);
+ spin_lock(&dcache_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
dent = list_entry(next, struct dentry, d_u.d_child);
if ((unsigned long)dent->d_fsdata == fpos) {
if (dent->d_inode)
- dget(dent);
+ dget_locked(dent);
else
dent = NULL;
- spin_unlock(&parent->d_lock);
+ spin_unlock(&dcache_lock);
goto out;
}
next = next->next;
}
- spin_unlock(&parent->d_lock);
+ spin_unlock(&dcache_lock);
return NULL;
out:
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 0a041b0d432e..2441d1ab57dc 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -192,7 +192,7 @@ ncp_renew_dentries(struct dentry *parent)
struct list_head *next;
struct dentry *dentry;
- spin_lock(&parent->d_lock);
+ spin_lock(&dcache_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -204,7 +204,7 @@ ncp_renew_dentries(struct dentry *parent)
next = next->next;
}
- spin_unlock(&parent->d_lock);
+ spin_unlock(&dcache_lock);
}
static inline void
@@ -214,7 +214,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
struct list_head *next;
struct dentry *dentry;
- spin_lock(&parent->d_lock);
+ spin_lock(&dcache_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -222,7 +222,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
ncp_age_dentry(server, dentry);
next = next->next;
}
- spin_unlock(&parent->d_lock);
+ spin_unlock(&dcache_lock);
}
struct ncp_cache_head {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 01a0b9acb1f8..b5d55d39fb79 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1434,9 +1434,11 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
dir->i_ino, dentry->d_name.name);
+ spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
if (atomic_read(&dentry->d_count) > 1) {
spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
/* Start asynchronous writeout of the inode */
write_inode_now(dentry->d_inode, 0);
error = nfs_sillyrename(dir, dentry);
@@ -1447,6 +1449,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
need_rehash = 1;
}
spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
error = nfs_safe_remove(dentry);
if (!error || error == -ENOENT) {
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -1540,9 +1543,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
d_drop(dentry);
error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
if (error == 0) {
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
d_add(dentry, inode);
}
return error;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 5b79ff1dd3a9..b35d2a616066 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -55,9 +55,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
return -ENOMEM;
}
/* Circumvent igrab(): we know the inode is not being freed */
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
/*
* Ensure that this dentry is invisible to d_find_alias().
* Otherwise, it may be spliced into the tree by
@@ -66,11 +64,9 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
* This again causes shrink_dcache_for_umount_subtree() to
* Oops, since the test for IS_ROOT() will fail.
*/
- spin_lock(&sb->s_root->d_inode->i_lock);
- spin_lock(&sb->s_root->d_lock);
+ spin_lock(&dcache_lock);
list_del_init(&sb->s_root->d_alias);
- spin_unlock(&sb->s_root->d_lock);
- spin_unlock(&sb->s_root->d_inode->i_lock);
+ spin_unlock(&dcache_lock);
}
return 0;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 86eefbafb7b4..5f59a2df21aa 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -396,7 +396,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n",
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
- inode->i_count);
+ atomic_read(&inode->i_count));
out:
return inode;
@@ -1153,7 +1153,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
__func__, inode->i_sb->s_id, inode->i_ino,
- inode->i_count, fattr->valid);
+ atomic_read(&inode->i_count), fattr->valid);
if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
goto out_fileid;
@@ -1395,16 +1395,9 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
return &nfsi->vfs_inode;
}
-static void nfs_i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
- kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
-}
-
void nfs_destroy_inode(struct inode *inode)
{
- call_rcu(&inode->i_rcu, nfs_i_callback);
+ kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
}
static inline void nfs4_init_once(struct nfs_inode *nfsi)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 961895616e8d..40c766782891 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -48,17 +48,12 @@ char *nfs_path(const char *base,
const struct dentry *dentry,
char *buffer, ssize_t buflen)
{
- char *end;
+ char *end = buffer+buflen;
int namelen;
- unsigned seq;
-rename_retry:
- end = buffer+buflen;
*--end = '\0';
buflen--;
-
- seq = read_seqbegin(&rename_lock);
- rcu_read_lock();
+ spin_lock(&dcache_lock);
while (!IS_ROOT(dentry) && dentry != droot) {
namelen = dentry->d_name.len;
buflen -= namelen + 1;
@@ -69,9 +64,7 @@ rename_retry:
*--end = '/';
dentry = dentry->d_parent;
}
- rcu_read_unlock();
- if (read_seqretry(&rename_lock, seq))
- goto rename_retry;
+ spin_unlock(&dcache_lock);
if (*end != '/') {
if (--buflen < 0)
goto Elong;
@@ -88,9 +81,7 @@ rename_retry:
memcpy(end, base, namelen);
return end;
Elong_unlock:
- rcu_read_unlock();
- if (read_seqretry(&rename_lock, seq))
- goto rename_retry;
+ spin_unlock(&dcache_lock);
Elong:
return ERR_PTR(-ENAMETOOLONG);
}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 7ae43df98d04..c1e2733f4fa4 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -501,8 +501,7 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
state->owner = owner;
atomic_inc(&owner->so_count);
list_add(&state->inode_states, &nfsi->open_states);
- __iget(inode);
- state->inode = inode;
+ state->inode = igrab(inode);
spin_unlock(&inode->i_lock);
/* Note: The reclaim code dictates that we add stateless
* and read-only stateids to the end of the list */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 4f853b5ba3d7..d63d964a0392 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -377,7 +377,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
BUG_ON(error);
if (!nfsi->npages) {
- __iget(inode);
+ igrab(inode);
if (nfs_have_delegation(inode, FMODE_WRITE))
nfsi->change_attr++;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8de004cf26d5..8715d194561a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1759,7 +1759,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
goto out_dput_new;
if (svc_msnfs(ffhp) &&
- ((atomic_read(&odentry->d_count) > 1) || (atomic_read(&ndentry->d_count) > 1))) {
+ ((atomic_read(&odentry->d_count) > 1)
+ || (atomic_read(&ndentry->d_count) > 1))) {
host_err = -EPERM;
goto out_dput_new;
}
diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c
index fa6dd36e2ee1..dd5f7e0a95f6 100644
--- a/fs/nilfs2/gcdat.c
+++ b/fs/nilfs2/gcdat.c
@@ -27,7 +27,6 @@
#include "page.h"
#include "mdt.h"
-/* XXX: what protects i_state? */
int nilfs_init_gcdat_inode(struct the_nilfs *nilfs)
{
struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat;
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index a7dccc002a85..e7ed4cdd0ecf 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -478,7 +478,7 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
inode->i_sb = sb; /* sb may be NULL for some meta data files */
inode->i_blkbits = nilfs->ns_blocksize_bits;
inode->i_flags = 0;
- inode->i_count = 1;
+ atomic_set(&inode->i_count, 1);
inode->i_nlink = 1;
inode->i_ino = ino;
inode->i_mode = S_IFREG;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index bc9308f1d616..07ba838ef089 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -222,9 +222,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
inode->i_ctime = CURRENT_TIME;
inode_inc_link_count(inode);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
err = nilfs_add_nondir(dentry, inode);
if (!err)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index bdaaa5813d2c..037e878e03fc 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -52,7 +52,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
/* determine if the children should tell inode about their events */
watched = fsnotify_inode_watches_children(inode);
- spin_lock(&inode->i_lock);
+ spin_lock(&dcache_lock);
/* run all of the dentries associated with this inode. Since this is a
* directory, there damn well better only be one item on this list */
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
@@ -61,21 +61,19 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
/* run all of the children of the original inode and fix their
* d_flags to indicate parental interest (their parent is the
* original inode) */
- spin_lock(&alias->d_lock);
list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
if (!child->d_inode)
continue;
- spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+ spin_lock(&child->d_lock);
if (watched)
child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
else
child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
spin_unlock(&child->d_lock);
}
- spin_unlock(&alias->d_lock);
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
}
/* Notify this dentry's parent about a child's events. */
@@ -89,18 +87,13 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask)
if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
return;
-again:
spin_lock(&dentry->d_lock);
parent = dentry->d_parent;
- if (parent != dentry && !spin_trylock(&parent->d_lock)) {
- spin_unlock(&dentry->d_lock);
- goto again;
- }
p_inode = parent->d_inode;
if (fsnotify_inode_watches_children(p_inode)) {
if (p_inode->i_fsnotify_mask & mask) {
- dget_dlock(parent);
+ dget(parent);
send = true;
}
} else {
@@ -110,13 +103,11 @@ again:
* children and update their d_flags to let them know p_inode
* doesn't care about them any more.
*/
- dget_dlock(parent);
+ dget(parent);
should_update_children = true;
}
spin_unlock(&dentry->d_lock);
- if (parent != dentry)
- spin_unlock(&parent->d_lock);
if (send) {
/* we are notifying a parent so come up with the new mask which
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index be1af807e67b..3165d85aada2 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -362,75 +362,65 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
* of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
* We temporarily drop inode_lock, however, and CAN block.
*/
-void fsnotify_unmount_inodes(struct super_block *sb)
+void fsnotify_unmount_inodes(struct list_head *list)
{
- int i;
-
- for_each_possible_cpu(i) {
- struct inode *inode, *next_i, *need_iput = NULL;
- struct list_head *list;
-#ifdef CONFIG_SMP
- list = per_cpu_ptr(sb->s_inodes, i);
-#else
- list = &sb->s_inodes;
-#endif
-
- list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
- struct inode *need_iput_tmp;
-
- spin_lock(&inode->i_lock);
- /*
- * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
- * I_WILL_FREE, or I_NEW which is fine because by that point
- * the inode cannot have any associated watches.
- */
- if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
-
- /*
- * If i_count is zero, the inode cannot have any watches and
- * doing an __iget/iput with MS_ACTIVE clear would actually
- * evict all inodes with zero i_count from icache which is
- * unnecessarily violent and may in fact be illegal to do.
- */
- if (!inode->i_count) {
- spin_unlock(&inode->i_lock);
- continue;
- }
-
- need_iput_tmp = need_iput;
- need_iput = NULL;
-
- /* In case fsnotify_inode_delete() drops a reference. */
- if (inode != need_iput_tmp) {
- __iget(inode);
- } else
- need_iput_tmp = NULL;
- spin_unlock(&inode->i_lock);
-
- /* In case the dropping of a reference would nuke next_i. */
- if (&next_i->i_sb_list != list) {
- spin_lock(&next_i->i_lock);
- if (next_i->i_count &&
- !(next_i->i_state &
- (I_CLEAR | I_FREEING | I_WILL_FREE))) {
- __iget(next_i);
- need_iput = next_i;
- }
- spin_unlock(&next_i->i_lock);
- }
-
- if (need_iput_tmp)
- iput(need_iput_tmp);
-
- /* for each watch, send FS_UNMOUNT and then remove it */
- fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
-
- fsnotify_inode_delete(inode);
-
- iput(inode);
+ struct inode *inode, *next_i, *need_iput = NULL;
+
+ list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
+ struct inode *need_iput_tmp;
+
+ /*
+ * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
+ * I_WILL_FREE, or I_NEW which is fine because by that point
+ * the inode cannot have any associated watches.
+ */
+ if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
+ continue;
+
+ /*
+ * If i_count is zero, the inode cannot have any watches and
+ * doing an __iget/iput with MS_ACTIVE clear would actually
+ * evict all inodes with zero i_count from icache which is
+ * unnecessarily violent and may in fact be illegal to do.
+ */
+ if (!atomic_read(&inode->i_count))
+ continue;
+
+ need_iput_tmp = need_iput;
+ need_iput = NULL;
+
+ /* In case fsnotify_inode_delete() drops a reference. */
+ if (inode != need_iput_tmp)
+ __iget(inode);
+ else
+ need_iput_tmp = NULL;
+
+ /* In case the dropping of a reference would nuke next_i. */
+ if ((&next_i->i_sb_list != list) &&
+ atomic_read(&next_i->i_count) &&
+ !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) {
+ __iget(next_i);
+ need_iput = next_i;
}
+
+ /*
+ * We can safely drop inode_lock here because we hold
+ * references on both inode and next_i. Also no new inodes
+ * will be added since the umount has begun. Finally,
+ * iprune_mutex keeps shrink_icache_memory() away.
+ */
+ spin_unlock(&inode_lock);
+
+ if (need_iput_tmp)
+ iput(need_iput_tmp);
+
+ /* for each watch, send FS_UNMOUNT and then remove it */
+ fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+
+ fsnotify_inode_delete(inode);
+
+ iput(inode);
+
+ spin_lock(&inode_lock);
}
}
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
index e0a92bf683d6..40b1cf914ccb 100644
--- a/fs/notify/inotify/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -185,25 +185,23 @@ static void set_dentry_child_flags(struct inode *inode, int watched)
{
struct dentry *alias;
- spin_lock(&inode->i_lock);
+ spin_lock(&dcache_lock);
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct dentry *child;
- spin_lock(&alias->d_lock);
list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
if (!child->d_inode)
continue;
- spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+ spin_lock(&child->d_lock);
if (watched)
child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
else
child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
spin_unlock(&child->d_lock);
}
- spin_unlock(&alias->d_lock);
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
}
/*
@@ -271,7 +269,6 @@ void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
if (!inode)
return;
- /* XXX: need parent lock in place of dcache_lock? */
spin_lock(&entry->d_lock);
parent = entry->d_parent;
if (parent->d_inode && inotify_inode_watched(parent->d_inode))
@@ -286,7 +283,6 @@ void inotify_d_move(struct dentry *entry)
{
struct dentry *parent;
- /* XXX: need parent lock in place of dcache_lock? */
parent = entry->d_parent;
if (inotify_inode_watched(parent->d_inode))
entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
@@ -343,28 +339,18 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
return;
-again:
spin_lock(&dentry->d_lock);
parent = dentry->d_parent;
- if (parent != dentry && !spin_trylock(&parent->d_lock)) {
- spin_unlock(&dentry->d_lock);
- goto again;
- }
inode = parent->d_inode;
if (inotify_inode_watched(inode)) {
- dget_dlock(parent);
+ dget(parent);
spin_unlock(&dentry->d_lock);
- if (parent != dentry)
- spin_unlock(&parent->d_lock);
inotify_inode_queue_event(inode, mask, cookie, name,
dentry->d_inode);
dput(parent);
- } else {
+ } else
spin_unlock(&dentry->d_lock);
- if (parent != dentry)
- spin_unlock(&parent->d_lock);
- }
}
EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
@@ -385,86 +371,76 @@ EXPORT_SYMBOL_GPL(inotify_get_cookie);
* of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
* We temporarily drop inode_lock, however, and CAN block.
*/
-void inotify_unmount_inodes(struct super_block *sb)
-{
- int i;
-
- for_each_possible_cpu(i) {
- struct inode *inode, *next_i, *need_iput = NULL;
- struct list_head *list;
-#ifdef CONFIG_SMP
- list = per_cpu_ptr(sb->s_inodes, i);
-#else
- list = &sb->s_inodes;
-#endif
-
- list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
- struct inotify_watch *watch, *next_w;
- struct inode *need_iput_tmp;
- struct list_head *watches;
-
- spin_lock(&inode->i_lock);
- /*
- * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
- * I_WILL_FREE, or I_NEW which is fine because by that point
- * the inode cannot have any associated watches.
- */
- if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
-
- /*
- * If i_count is zero, the inode cannot have any watches and
- * doing an __iget/iput with MS_ACTIVE clear would actually
- * evict all inodes with zero i_count from icache which is
- * unnecessarily violent and may in fact be illegal to do.
- */
- if (!inode->i_count) {
- spin_unlock(&inode->i_lock);
- continue;
- }
-
- need_iput_tmp = need_iput;
- need_iput = NULL;
- /* In case inotify_remove_watch_locked() drops a reference. */
- if (inode != need_iput_tmp) {
- __iget(inode);
- } else
- need_iput_tmp = NULL;
-
- spin_unlock(&inode->i_lock);
-
- /* In case the dropping of a reference would nuke next_i. */
- if (&next_i->i_sb_list != list) {
- spin_lock(&next_i->i_lock);
- if (next_i->i_count &&
- !(next_i->i_state &
- (I_CLEAR|I_FREEING|I_WILL_FREE))) {
- __iget(next_i);
- need_iput = next_i;
- }
- spin_unlock(&next_i->i_lock);
- }
-
- if (need_iput_tmp)
- iput(need_iput_tmp);
-
- /* for each watch, send IN_UNMOUNT and then remove it */
- mutex_lock(&inode->inotify_mutex);
- watches = &inode->inotify_watches;
- list_for_each_entry_safe(watch, next_w, watches, i_list) {
- struct inotify_handle *ih = watch->ih;
- get_inotify_watch(watch);
- mutex_lock(&ih->mutex);
- ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, NULL, NULL);
- inotify_remove_watch_locked(ih, watch);
- mutex_unlock(&ih->mutex);
- put_inotify_watch(watch);
- }
- mutex_unlock(&inode->inotify_mutex);
- iput(inode);
+void inotify_unmount_inodes(struct list_head *list)
+{
+ struct inode *inode, *next_i, *need_iput = NULL;
+
+ list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
+ struct inotify_watch *watch, *next_w;
+ struct inode *need_iput_tmp;
+ struct list_head *watches;
+
+ /*
+ * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
+ * I_WILL_FREE, or I_NEW which is fine because by that point
+ * the inode cannot have any associated watches.
+ */
+ if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
+ continue;
+
+ /*
+ * If i_count is zero, the inode cannot have any watches and
+ * doing an __iget/iput with MS_ACTIVE clear would actually
+ * evict all inodes with zero i_count from icache which is
+ * unnecessarily violent and may in fact be illegal to do.
+ */
+ if (!atomic_read(&inode->i_count))
+ continue;
+
+ need_iput_tmp = need_iput;
+ need_iput = NULL;
+ /* In case inotify_remove_watch_locked() drops a reference. */
+ if (inode != need_iput_tmp)
+ __iget(inode);
+ else
+ need_iput_tmp = NULL;
+ /* In case the dropping of a reference would nuke next_i. */
+ if ((&next_i->i_sb_list != list) &&
+ atomic_read(&next_i->i_count) &&
+ !(next_i->i_state & (I_CLEAR | I_FREEING |
+ I_WILL_FREE))) {
+ __iget(next_i);
+ need_iput = next_i;
+ }
+
+ /*
+ * We can safely drop inode_lock here because we hold
+ * references on both inode and next_i. Also no new inodes
+ * will be added since the umount has begun. Finally,
+ * iprune_mutex keeps shrink_icache_memory() away.
+ */
+ spin_unlock(&inode_lock);
+
+ if (need_iput_tmp)
+ iput(need_iput_tmp);
+
+ /* for each watch, send IN_UNMOUNT and then remove it */
+ mutex_lock(&inode->inotify_mutex);
+ watches = &inode->inotify_watches;
+ list_for_each_entry_safe(watch, next_w, watches, i_list) {
+ struct inotify_handle *ih= watch->ih;
+ get_inotify_watch(watch);
+ mutex_lock(&ih->mutex);
+ ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
+ NULL, NULL);
+ inotify_remove_watch_locked(ih, watch);
+ mutex_unlock(&ih->mutex);
+ put_inotify_watch(watch);
}
+ mutex_unlock(&inode->inotify_mutex);
+ iput(inode);
+
+ spin_lock(&inode_lock);
}
}
EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 3475c2e98c80..80b04770e8e9 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2921,9 +2921,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
}
if ((sb->s_root = d_alloc_root(vol->root_ino))) {
/* We increment i_count simulating an ntfs_iget(). */
- spin_lock(&vol->root_ino->i_lock);
- vol->root_ino->i_count++;
- spin_unlock(&vol->root_ino->i_lock);
+ atomic_inc(&vol->root_ino->i_count);
ntfs_debug("Exiting, status successful.");
/* Release the default upcase if it has no users. */
mutex_lock(&ntfs_lock);
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index d825f00eda71..b4957c7d9fe2 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -151,25 +151,23 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
struct list_head *p;
struct dentry *dentry = NULL;
- spin_lock(&inode->i_lock);
+ spin_lock(&dcache_lock);
+
list_for_each(p, &inode->i_dentry) {
dentry = list_entry(p, struct dentry, d_alias);
- spin_lock(&dentry->d_lock);
if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
mlog(0, "dentry found: %.*s\n",
dentry->d_name.len, dentry->d_name.name);
- dget_dlock(dentry);
- spin_unlock(&dentry->d_lock);
+ dget_locked(dentry);
break;
}
- spin_unlock(&dentry->d_lock);
dentry = NULL;
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&dcache_lock);
return dentry;
}
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 3d4b7e210bb9..50fb26a6a5f5 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -719,9 +719,7 @@ static int ocfs2_link(struct dentry *old_dentry,
goto out_commit;
}
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
diff --git a/fs/open.c b/fs/open.c
index 9f57c39c921e..040cef72bc00 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -844,7 +844,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
f->f_path.mnt = mnt;
f->f_pos = 0;
f->f_op = fops_get(inode->i_fop);
- file_sb_list_add(f, inode->i_sb);
+ file_move(f, &inode->i_sb->s_files);
error = security_dentry_open(f, cred);
if (error)
@@ -890,7 +890,7 @@ cleanup_all:
mnt_drop_write(mnt);
}
}
- file_sb_list_del(f);
+ file_kill(f);
f->f_path.dentry = NULL;
f->f_path.mnt = NULL;
cleanup_file:
diff --git a/fs/pnode.c b/fs/pnode.c
index 5a48677f1562..8d5f392ec3d3 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -264,12 +264,12 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
prev_src_mnt = child;
}
out:
- vfsmount_write_lock();
+ spin_lock(&vfsmount_lock);
while (!list_empty(&tmp_list)) {
child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash);
umount_tree(child, 0, &umount_list);
}
- vfsmount_write_unlock();
+ spin_unlock(&vfsmount_lock);
release_mounts(&umount_list);
return ret;
}
@@ -279,7 +279,7 @@ out:
*/
static inline int do_refcount_check(struct vfsmount *mnt, int count)
{
- int mycount = count_mnt_count(mnt) - mnt->mnt_ghosts;
+ int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts;
return (mycount > count);
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6ab0bd692968..8dce96c331f8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -650,17 +650,15 @@ static unsigned mounts_poll(struct file *file, poll_table *wait)
struct proc_mounts *p = file->private_data;
struct mnt_namespace *ns = p->ns;
unsigned res = POLLIN | POLLRDNORM;
- int cpu = get_cpu();
- put_cpu();
poll_wait(file, &ns->poll, wait);
- vfsmount_read_lock(cpu);
+ spin_lock(&vfsmount_lock);
if (p->event != ns->event) {
p->event = ns->event;
res |= POLLERR | POLLPRI;
}
- vfsmount_read_unlock(cpu);
+ spin_unlock(&vfsmount_lock);
return res;
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index ee0a77b56810..445a02bcaab3 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -65,16 +65,9 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
return inode;
}
-static void proc_i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
- kmem_cache_free(proc_inode_cachep, PROC_I(inode));
-}
-
static void proc_destroy_inode(struct inode *inode)
{
- call_rcu(&inode->i_rcu, proc_i_callback);
+ kmem_cache_free(proc_inode_cachep, PROC_I(inode));
}
static void init_once(void *foo)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 4c6a62baa109..6c9da00ddda2 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -841,55 +841,35 @@ static int dqinit_needed(struct inode *inode, int type)
/* This routine is guarded by dqonoff_mutex mutex */
static void add_dquot_ref(struct super_block *sb, int type)
{
+ struct inode *inode, *old_inode = NULL;
int reserved = 0;
- int i;
- for_each_possible_cpu(i) {
- struct inode *inode, *old_inode = NULL;
- struct list_head *list;
-#ifdef CONFIG_SMP
- list = per_cpu_ptr(sb->s_inodes, i);
-#else
- list = &sb->s_inodes;
-#endif
-
- rcu_read_lock();
- list_for_each_entry_rcu(inode, list, i_sb_list) {
- spin_lock(&inode->i_lock);
- if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
- if (unlikely(inode_get_rsv_space(inode) > 0))
- reserved = 1;
+ spin_lock(&inode_lock);
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
+ continue;
+ if (unlikely(inode_get_rsv_space(inode) > 0))
+ reserved = 1;
+ if (!atomic_read(&inode->i_writecount))
+ continue;
+ if (!dqinit_needed(inode, type))
+ continue;
- if (!atomic_read(&inode->i_writecount)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
+ __iget(inode);
+ spin_unlock(&inode_lock);
- if (!dqinit_needed(inode, type)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
-
- __iget(inode);
- spin_unlock(&inode->i_lock);
- rcu_read_unlock();
-
- iput(old_inode);
- sb->dq_op->initialize(inode, type);
- /* We hold a reference to 'inode' so it couldn't have been
- * removed from s_inodes list while we dropped the inode_lock.
- * We cannot iput the inode now as we can be holding the last
- * reference and we cannot iput it under inode_lock. So we
- * keep the reference and iput it later. */
- old_inode = inode;
- rcu_read_lock();
- }
- rcu_read_unlock();
iput(old_inode);
- }
+ sb->dq_op->initialize(inode, type);
+ /* We hold a reference to 'inode' so it couldn't have been
+ * removed from s_inodes list while we dropped the inode_lock.
+ * We cannot iput the inode now as we can be holding the last
+ * reference and we cannot iput it under inode_lock. So we
+ * keep the reference and iput it later. */
+ old_inode = inode;
+ spin_lock(&inode_lock);
+ }
+ spin_unlock(&inode_lock);
+ iput(old_inode);
if (reserved) {
printk(KERN_WARNING "VFS (%s): Writes happened before quota"
@@ -962,29 +942,20 @@ static void put_dquot_list(struct list_head *tofree_head)
static void remove_dquot_ref(struct super_block *sb, int type,
struct list_head *tofree_head)
{
- int i;
- for_each_possible_cpu(i) {
- struct inode *inode;
- struct list_head *list;
-#ifdef CONFIG_SMP
- list = per_cpu_ptr(sb->s_inodes, i);
-#else
- list = &sb->s_inodes;
-#endif
+ struct inode *inode;
- rcu_read_lock();
- list_for_each_entry_rcu(inode, list, i_sb_list) {
- /*
- * We have to scan also I_NEW inodes because they can already
- * have quota pointer initialized. Luckily, we need to touch
- * only quota pointers and these have separate locking
- * (dqptr_sem).
- */
- if (!IS_NOQUOTA(inode))
- remove_inode_dquot_ref(inode, type, tofree_head);
- }
- rcu_read_unlock();
+ spin_lock(&inode_lock);
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+ /*
+ * We have to scan also I_NEW inodes because they can already
+ * have quota pointer initialized. Luckily, we need to touch
+ * only quota pointers and these have separate locking
+ * (dqptr_sem).
+ */
+ if (!IS_NOQUOTA(inode))
+ remove_inode_dquot_ref(inode, type, tofree_head);
}
+ spin_unlock(&inode_lock);
}
/* Gather all references from inodes and drop them */
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index d3f9b7d05307..da2dba082e2d 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -39,7 +39,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
BUG_ON(!S_ISREG(inode->i_mode));
/* fast out for when nothing needs to be done */
- if ((inode->i_count > 1 ||
+ if ((atomic_read(&inode->i_count) > 1 ||
!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
!tail_has_to_be_packed(inode)) &&
REISERFS_I(inode)->i_prealloc_count <= 0) {
@@ -94,7 +94,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
if (!err)
err = jbegin_failure;
- if (!err && inode->i_count <= 1 &&
+ if (!err && atomic_read(&inode->i_count) <= 1 &&
(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
tail_has_to_be_packed(inode)) {
/* if regular file is released by last holder and it has been
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 05b3240ed9ab..9d4dcf0b07cb 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1151,9 +1151,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
inode->i_ctime = CURRENT_TIME_SEC;
reiserfs_update_sd(&th, inode);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
d_instantiate(dentry, inode);
retval = journal_end(&th, dir->i_sb, jbegin_count);
reiserfs_write_unlock(dir->i_sb);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 17f52e8bb739..5fa7118f04e1 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1477,7 +1477,7 @@ static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
** reading in the last block. The user will hit problems trying to
** read the file, but for now we just skip the indirect2direct
*/
- if (inode->i_count > 1 ||
+ if (atomic_read(&inode->i_count) > 1 ||
!tail_has_to_be_packed(inode) ||
!page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
/* leave tail in an unformatted node */
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 1326fc0d20b2..eae7d9dbf3ff 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -6,7 +6,6 @@
*/
#include <linux/fs.h>
-#include <linux/mount.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -459,16 +458,13 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
char *buf;
size_t size = seq_get_buf(m, &buf);
int res = -ENAMETOOLONG;
- int cpu = get_cpu();
- put_cpu();
if (size) {
char *p;
- vfsmount_read_lock(cpu);
+ spin_lock(&dcache_lock);
p = __d_path(path, root, buf, size);
- vfsmount_read_unlock(cpu);
-
+ spin_unlock(&dcache_lock);
res = PTR_ERR(p);
if (!IS_ERR(p)) {
char *end = mangle_path(buf, p, esc);
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index 61e482123488..8c177eb7e344 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -62,7 +62,7 @@ smb_invalidate_dircache_entries(struct dentry *parent)
struct list_head *next;
struct dentry *dentry;
- spin_lock(&parent->d_lock);
+ spin_lock(&dcache_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -70,7 +70,7 @@ smb_invalidate_dircache_entries(struct dentry *parent)
smb_age_dentry(server, dentry);
next = next->next;
}
- spin_unlock(&parent->d_lock);
+ spin_unlock(&dcache_lock);
}
/*
@@ -96,13 +96,13 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
}
/* If a pointer is invalid, we search the dentry. */
- spin_lock(&parent->d_lock);
+ spin_lock(&dcache_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
dent = list_entry(next, struct dentry, d_u.d_child);
if ((unsigned long)dent->d_fsdata == fpos) {
if (dent->d_inode)
- dget(dent);
+ dget_locked(dent);
else
dent = NULL;
goto out_unlock;
@@ -111,7 +111,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
}
dent = NULL;
out_unlock:
- spin_unlock(&parent->d_lock);
+ spin_unlock(&dcache_lock);
return dent;
}
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index dafa6316da0b..3e4803b4427e 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -405,7 +405,6 @@ void
smb_renew_times(struct dentry * dentry)
{
dget(dentry);
-again:
spin_lock(&dentry->d_lock);
for (;;) {
struct dentry *parent;
@@ -414,13 +413,8 @@ again:
if (IS_ROOT(dentry))
break;
parent = dentry->d_parent;
- if (!spin_trylock(&parent->d_lock)) {
- spin_unlock(&dentry->d_lock);
- goto again;
- }
- dget_dlock(parent);
+ dget(parent);
spin_unlock(&dentry->d_lock);
- spin_unlock(&parent->d_lock);
dput(dentry);
dentry = parent;
spin_lock(&dentry->d_lock);
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 7b0c74971ad4..71c29b6670b4 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -332,7 +332,6 @@ static int smb_build_path(struct smb_sb_info *server, unsigned char *buf,
* and store it in reversed order [see reverse_string()]
*/
dget(entry);
-again:
spin_lock(&entry->d_lock);
while (!IS_ROOT(entry)) {
struct dentry *parent;
@@ -351,7 +350,6 @@ again:
dput(entry);
return len;
}
-
reverse_string(path, len);
path += len;
if (unicode) {
@@ -363,11 +361,7 @@ again:
maxlen -= len+1;
parent = entry->d_parent;
- if (!spin_trylock(&parent->d_lock)) {
- spin_unlock(&entry->d_lock);
- goto again;
- }
- dget_dlock(parent);
+ dget(parent);
spin_unlock(&entry->d_lock);
dput(entry);
entry = parent;
diff --git a/fs/super.c b/fs/super.c
index 84c4aafd6fae..aff046b0fe78 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -62,41 +62,10 @@ static struct super_block *alloc_super(struct file_system_type *type)
s = NULL;
goto out;
}
-#ifdef CONFIG_SMP
- s->s_files = alloc_percpu(struct list_head);
- if (!s->s_files) {
- security_sb_free(s);
- kfree(s);
- s = NULL;
- goto out;
- } else {
- int i;
-
- for_each_possible_cpu(i)
- INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
- }
-#else
INIT_LIST_HEAD(&s->s_files);
-#endif
-#ifdef CONFIG_SMP
- s->s_inodes = alloc_percpu(struct list_head);
- if (!s->s_inodes) {
- free_percpu(s->s_files);
- security_sb_free(s);
- kfree(s);
- s = NULL;
- goto out;
- } else {
- int i;
-
- for_each_possible_cpu(i)
- INIT_LIST_HEAD(per_cpu_ptr(s->s_inodes, i));
- }
-#else
- INIT_LIST_HEAD(&s->s_inodes);
-#endif
INIT_LIST_HEAD(&s->s_instances);
INIT_HLIST_HEAD(&s->s_anon);
+ INIT_LIST_HEAD(&s->s_inodes);
INIT_LIST_HEAD(&s->s_dentry_lru);
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
@@ -148,10 +117,6 @@ out:
*/
static inline void destroy_super(struct super_block *s)
{
-#ifdef CONFIG_SMP
- free_percpu(s->s_inodes);
- free_percpu(s->s_files);
-#endif
security_sb_free(s);
kfree(s->s_subtype);
kfree(s->s_options);
@@ -603,7 +568,7 @@ out:
int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
{
int retval;
- int remount_rw, remount_ro;
+ int remount_rw;
if (sb->s_frozen != SB_UNFROZEN)
return -EBUSY;
@@ -618,12 +583,9 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
shrink_dcache_sb(sb);
sync_filesystem(sb);
- remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
- remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
-
/* If we are remounting RDONLY and current sb is read/write,
make sure there are no rw files opened */
- if (remount_ro) {
+ if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) {
if (force)
mark_files_ro(sb);
else if (!fs_may_remount_ro(sb))
@@ -632,6 +594,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
if (retval < 0 && retval != -ENOSYS)
return -EBUSY;
}
+ remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
if (sb->s_op->remount_fs) {
retval = sb->s_op->remount_fs(sb, &flags, data);
@@ -641,14 +604,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
if (remount_rw)
vfs_dq_quota_on_remount(sb);
- /* Some filesystems modify their metadata via some other path
- than the bdev buffer cache (eg. use a private mapping, or
- directories in pagecache, etc). Also file data modifications
- go via their own mappings. So If we try to mount readonly
- then copy the filesystem from bdev, we could get stale data,
- so invalidate it to give a best effort at coherency. */
- if (remount_ro && sb->s_bdev)
- invalidate_bdev(sb->s_bdev);
return 0;
}
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index d63da9be14cc..33e047b59b8d 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -126,9 +126,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
return add_nondir(dentry, inode);
}
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 1eed16ffdd68..552fb0111fff 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -557,9 +557,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
lock_2_inodes(dir, inode);
inc_nlink(inode);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
inode->i_ctime = ubifs_current_time(inode);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index d020118df827..43f9d19a6f33 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -342,7 +342,7 @@ static void ubifs_delete_inode(struct inode *inode)
goto out;
dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
- ubifs_assert(!inode->i_count);
+ ubifs_assert(!atomic_read(&inode->i_count));
ubifs_assert(inode->i_nlink == 0);
truncate_inode_pages(&inode->i_data, 0);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 9e85540e9e01..cd2115060fdc 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1108,9 +1108,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
inc_nlink(inode);
inode->i_ctime = current_fs_time(inode->i_sb);
mark_inode_dirty(inode);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
d_instantiate(dentry, inode);
unlock_kernel();
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index ece37bbb3ea4..4c26d9e8bc94 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -178,9 +178,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
error = ufs_add_nondir(dentry, inode);
unlock_kernel();
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index f78e1df5353c..225946012d0b 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -349,9 +349,7 @@ xfs_vn_link(
if (unlikely(error))
return -error;
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count);
d_instantiate(dentry, inode);
return 0;
}
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index a1dc3a153ef9..c22a608321a3 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -539,7 +539,7 @@ DECLARE_EVENT_CLASS(xfs_inode_class,
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
- __entry->count = VFS_I(ip)->i_count;
+ __entry->count = atomic_read(&VFS_I(ip)->i_count);
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d ino 0x%llx count %d caller %pf",
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 53a14575bf3b..ec1f28c4fc4f 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -493,10 +493,8 @@ void xfs_mark_inode_dirty_sync(xfs_inode_t *);
#define IHOLD(ip) \
do { \
- spin_lock(&VFS_I(ip)->i_lock); \
- ASSERT(&VFS_I(ip)->i_count > 0); \
- VFS_I(ip)->i_count++; \
- spin_unlock(&VFS_I(ip)->i_lock); \
+ ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
+ atomic_inc(&(VFS_I(ip)->i_count)); \
trace_xfs_ihold(ip, _THIS_IP_); \
} while (0)
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 63b98bec6630..30b93b2a01a4 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -5,7 +5,6 @@
#include <linux/list.h>
#include <linux/rculist.h>
#include <linux/spinlock.h>
-#include <linux/seqlock.h>
#include <linux/cache.h>
#include <linux/rcupdate.h>
@@ -38,8 +37,8 @@ struct qstr {
};
struct dentry_stat_t {
- int nr_dentry; /* unused */
- int nr_unused; /* protected by dcache_lru_lock */
+ int nr_dentry;
+ int nr_unused;
int age_limit; /* age in seconds */
int want_pages; /* pages requested by system */
int dummy[2];
@@ -88,30 +87,20 @@ full_name_hash(const unsigned char *name, unsigned int len)
#endif
struct dentry {
- /*
- * The following 64 bytes of fields (on 64-bit) fit into a 64 byte
- * cacheline. They are critical for path lookups. We can do most
- * path lookups in 2 cachelines (these + name string) if we have
- * correct sizing and alignment here.
- *
- * XXX: d_sb for revalidate needs to be duplicated into a d_flag.
- */
atomic_t d_count;
unsigned int d_flags; /* protected by d_lock */
spinlock_t d_lock; /* per dentry lock */
int d_mounted;
- seqcount_t d_seq; /* per dentry seqlock */
struct inode *d_inode; /* Where the name belongs to - NULL is
* negative */
+ /*
+ * The next three fields are touched by __d_lookup. Place them here
+ * so they all fit in a cache line.
+ */
struct hlist_node d_hash; /* lookup hash list */
struct dentry *d_parent; /* parent directory */
struct qstr d_name;
- const struct dentry_operations *d_op;
- /*
- * The following 64 bytes of lists tend to be required for tree
- * manipulation, not required for lookups.
- */
struct list_head d_lru; /* LRU list */
/*
* d_child and d_rcu can share memory
@@ -122,14 +111,10 @@ struct dentry {
} d_u;
struct list_head d_subdirs; /* our children */
struct list_head d_alias; /* inode alias list */
-
- /*
- * These following fields may be needed by some types of lookups, and
- * d_iname is likely to be required too, so keep them together.
- */
+ unsigned long d_time; /* used by d_revalidate */
+ const struct dentry_operations *d_op;
struct super_block *d_sb; /* The root of the dentry tree */
void *d_fsdata; /* fs-specific data */
- unsigned long d_time; /* used by d_revalidate */
unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
};
@@ -165,44 +150,43 @@ struct dentry_operations {
/*
locking rules:
- big lock d_lock may block
-d_revalidate: no no yes
-d_hash no no yes
-d_compare: no yes no
-d_delete: no no no
-d_release: no no yes
-d_iput: no no yes
+ big lock dcache_lock d_lock may block
+d_revalidate: no no no yes
+d_hash no no no yes
+d_compare: no yes yes no
+d_delete: no yes no no
+d_release: no no no yes
+d_iput: no no no yes
*/
/* d_flags entries */
#define DCACHE_AUTOFS_PENDING 0x0001 /* autofs: "under construction" */
-#define DCACHE_NFSFS_RENAMED 0x0002
- /* this dentry has been "silly renamed" and has to be deleted on the last
- * dput() */
-
-#define DCACHE_DISCONNECTED 0x0004
- /* This dentry is possibly not currently connected to the dcache tree, in
- * which case its parent will either be itself, or will have this flag as
- * well. nfsd will not use a dentry with this bit set, but will first
- * endeavour to clear the bit either by discovering that it is connected,
- * or by performing lookup operations. Any filesystem which supports
- * nfsd_operations MUST have a lookup function which, if it finds a
- * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that
- * dentry into place and return that dentry rather than the passed one,
- * typically using d_splice_alias. */
+#define DCACHE_NFSFS_RENAMED 0x0002 /* this dentry has been "silly
+ * renamed" and has to be
+ * deleted on the last dput()
+ */
+#define DCACHE_DISCONNECTED 0x0004
+ /* This dentry is possibly not currently connected to the dcache tree,
+ * in which case its parent will either be itself, or will have this
+ * flag as well. nfsd will not use a dentry with this bit set, but will
+ * first endeavour to clear the bit either by discovering that it is
+ * connected, or by performing lookup operations. Any filesystem which
+ * supports nfsd_operations MUST have a lookup function which, if it finds
+ * a directory inode with a DCACHE_DISCONNECTED dentry, will d_move
+ * that dentry into place and return that dentry rather than the passed one,
+ * typically using d_splice_alias.
+ */
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
#define DCACHE_UNHASHED 0x0010
-#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020
- /* Parent inode is watched by inotify */
+
+#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched by inotify */
#define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */
-#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080
- /* Parent inode is watched by some fsnotify listener */
-#define DCACHE_MOUNTED 0x0100 /* is a mountpoint */
-#define DCACHE_GENOCIDE 0x0200 /* being genocided */
+#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */
+extern spinlock_t dcache_lock;
extern seqlock_t rename_lock;
/**
@@ -220,8 +204,23 @@ extern seqlock_t rename_lock;
*
* __d_drop requires dentry->d_lock.
*/
-void d_drop(struct dentry *dentry);
-void __d_drop(struct dentry *dentry);
+
+static inline void __d_drop(struct dentry *dentry)
+{
+ if (!(dentry->d_flags & DCACHE_UNHASHED)) {
+ dentry->d_flags |= DCACHE_UNHASHED;
+ hlist_del_rcu(&dentry->d_hash);
+ }
+}
+
+static inline void d_drop(struct dentry *dentry)
+{
+ spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+}
static inline int dname_external(struct dentry *dentry)
{
@@ -300,11 +299,9 @@ extern void d_move(struct dentry *, struct dentry *);
extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
/* appendix may either be NULL or be used for transname suffixes */
-extern struct dentry *d_lookup(struct dentry *, struct qstr *);
-extern struct dentry *__d_lookup(struct dentry *, struct qstr *);
-extern struct dentry *d_lookup_rcu(struct dentry *, struct qstr *);
-extern struct dentry *__d_lookup_rcu(struct dentry *, struct qstr *);
-extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
+extern struct dentry * d_lookup(struct dentry *, struct qstr *);
+extern struct dentry * __d_lookup(struct dentry *, struct qstr *);
+extern struct dentry * d_hash_and_lookup(struct dentry *, struct qstr *);
/* validate "insecure" dentry pointer */
extern int d_validate(struct dentry *, struct dentry *);
@@ -321,29 +318,28 @@ extern char *dentry_path(struct dentry *, char *, int);
/* Allocation counts.. */
/**
- * dget, dget_dlock - get a reference to a dentry
+ * dget, dget_locked - get a reference to a dentry
* @dentry: dentry to get a reference to
*
* Given a dentry or %NULL pointer increment the reference count
* if appropriate and return the dentry. A dentry will not be
- * destroyed when it has references.
+ * destroyed when it has references. dget() should never be
+ * called for dentries with zero reference counter. For these cases
+ * (preferably none, functions in dcache.c are sufficient for normal
+ * needs and they take necessary precautions) you should hold dcache_lock
+ * and call dget_locked() instead of dget().
*/
-static inline struct dentry *dget_dlock(struct dentry *dentry)
-{
- if (dentry)
- atomic_inc(&dentry->d_count);
- return dentry;
-}
-
+
static inline struct dentry *dget(struct dentry *dentry)
{
if (dentry) {
- dget_dlock(dentry);
+ BUG_ON(!atomic_read(&dentry->d_count));
+ atomic_inc(&dentry->d_count);
}
return dentry;
}
-extern struct dentry *dget_parent(struct dentry *dentry);
+extern struct dentry * dget_locked(struct dentry *);
/**
* d_unhashed - is dentry hashed
@@ -362,6 +358,16 @@ static inline int d_unlinked(struct dentry *dentry)
return d_unhashed(dentry) && !IS_ROOT(dentry);
}
+static inline struct dentry *dget_parent(struct dentry *dentry)
+{
+ struct dentry *ret;
+
+ spin_lock(&dentry->d_lock);
+ ret = dget(dentry->d_parent);
+ spin_unlock(&dentry->d_lock);
+ return ret;
+}
+
extern void dput(struct dentry *);
static inline int d_mountpoint(struct dentry *dentry)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4981e6ee3ba5..5191f49c2fec 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -406,8 +406,6 @@ extern struct files_stat_struct files_stat;
extern int get_max_files(void);
extern int sysctl_nr_open;
extern struct inodes_stat_t inodes_stat;
-extern struct percpu_counter nr_inodes;
-extern int get_nr_inodes(void);
extern int leases_enable, lease_break_time;
#ifdef CONFIG_DNOTIFY
extern int dir_notify_enable;
@@ -727,15 +725,9 @@ struct inode {
struct hlist_node i_hash;
struct list_head i_list; /* backing dev IO list */
struct list_head i_sb_list;
- union {
- struct list_head i_dentry;
- struct rcu_head i_rcu;
- };
+ struct list_head i_dentry;
unsigned long i_ino;
-#ifdef CONFIG_SMP
- int i_sb_list_cpu;
-#endif
- unsigned int i_count;
+ atomic_t i_count;
unsigned int i_nlink;
uid_t i_uid;
gid_t i_gid;
@@ -932,9 +924,6 @@ struct file {
#define f_vfsmnt f_path.mnt
const struct file_operations *f_op;
spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */
-#ifdef CONFIG_SMP
- int f_sb_list_cpu;
-#endif
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
@@ -959,6 +948,9 @@ struct file {
unsigned long f_mnt_write_state;
#endif
};
+extern spinlock_t files_lock;
+#define file_list_lock() spin_lock(&files_lock);
+#define file_list_unlock() spin_unlock(&files_lock);
#define get_file(x) atomic_long_inc(&(x)->f_count)
#define file_count(x) atomic_long_read(&(x)->f_count)
@@ -1347,17 +1339,9 @@ struct super_block {
#endif
struct xattr_handler **s_xattr;
- struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
-#ifdef CONFIG_SMP
- struct list_head *s_inodes;
-#else
struct list_head s_inodes; /* all inodes */
-#endif
-#ifdef CONFIG_SMP
- struct list_head *s_files;
-#else
+ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
struct list_head s_files;
-#endif
/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
struct list_head s_dentry_lru; /* unused dentry lru */
int s_nr_dentry_unused; /* # of dentry on lru */
@@ -2053,7 +2037,6 @@ extern const struct file_operations read_pipefifo_fops;
extern const struct file_operations write_pipefifo_fops;
extern const struct file_operations rdwr_pipefifo_fops;
-extern void mark_files_ro(struct super_block *sb);
extern int fs_may_remount_ro(struct super_block *);
#ifdef CONFIG_BLOCK
@@ -2185,6 +2168,7 @@ extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struc
extern int insert_inode_locked(struct inode *);
extern void unlock_new_inode(struct inode *);
+extern void __iget(struct inode * inode);
extern void iget_failed(struct inode *);
extern void clear_inode(struct inode *);
extern void destroy_inode(struct inode *);
@@ -2193,17 +2177,14 @@ extern struct inode *new_inode(struct super_block *);
extern int should_remove_suid(struct dentry *);
extern int file_remove_suid(struct file *);
-extern void inode_sb_list_del(struct inode *inode);
extern void __insert_inode_hash(struct inode *, unsigned long hashval);
-extern void __remove_inode_hash(struct inode *);
extern void remove_inode_hash(struct inode *);
static inline void insert_inode_hash(struct inode *inode) {
__insert_inode_hash(inode, inode->i_ino);
}
-extern struct file * get_empty_filp(void);
-extern void file_sb_list_add(struct file *f, struct super_block *sb);
-extern void file_sb_list_del(struct file *f);
+extern void file_move(struct file *f, struct list_head *list);
+extern void file_kill(struct file *f);
#ifdef CONFIG_BLOCK
struct bio;
extern void submit_bio(int, struct bio *);
@@ -2404,20 +2385,10 @@ extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt);
extern void save_mount_options(struct super_block *sb, char *options);
extern void replace_mount_options(struct super_block *sb, char *options);
-static inline void __iget(struct inode *inode)
-{
- assert_spin_locked(&inode->i_lock);
- inode->i_count++;
-}
-
static inline ino_t parent_ino(struct dentry *dentry)
{
ino_t res;
- /*
- * Don't strictly need d_lock here? If the parent ino could change
- * then surely we'd have a deeper race in the caller?
- */
spin_lock(&dentry->d_lock);
res = dentry->d_parent->d_inode->i_ino;
spin_unlock(&dentry->d_lock);
@@ -2493,8 +2464,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
struct ctl_table;
int proc_nr_files(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
-int proc_nr_inodes(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos);
+
int __init get_filesystem_list(char *buf);
#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index e776fb5ed01a..4d6f47b51189 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -276,10 +276,10 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
{
struct dentry *parent;
+ assert_spin_locked(&dcache_lock);
assert_spin_locked(&dentry->d_lock);
parent = dentry->d_parent;
- /* XXX: after dcache_lock removal, there is a race with parent->d_inode and fsnotify_inode_watches_children. must fix */
if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode))
dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
else
@@ -288,12 +288,15 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
/*
* fsnotify_d_instantiate - instantiate a dentry for inode
+ * Called with dcache_lock held.
*/
static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
{
if (!inode)
return;
+ assert_spin_locked(&dcache_lock);
+
spin_lock(&dentry->d_lock);
__fsnotify_update_dcache_flags(dentry);
spin_unlock(&dentry->d_lock);
@@ -344,7 +347,7 @@ extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry);
extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry);
extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry);
-extern void fsnotify_unmount_inodes(struct super_block *sb);
+extern void fsnotify_unmount_inodes(struct list_head *list);
/* put here because inotify does some weird stuff when destroying watches */
extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
@@ -374,7 +377,7 @@ static inline u32 fsnotify_get_cookie(void)
return 0;
}
-static inline void fsnotify_unmount_inodes(struct super_block *sb)
+static inline void fsnotify_unmount_inodes(struct list_head *list)
{}
#endif /* CONFIG_FSNOTIFY */
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index e8bcd7c6c0cc..37ea2894b3c0 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -111,7 +111,7 @@ extern void inotify_inode_queue_event(struct inode *, __u32, __u32,
const char *, struct inode *);
extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32,
const char *);
-extern void inotify_unmount_inodes(struct super_block *);
+extern void inotify_unmount_inodes(struct list_head *);
extern void inotify_inode_is_dead(struct inode *);
extern u32 inotify_get_cookie(void);
@@ -161,7 +161,7 @@ static inline void inotify_dentry_parent_queue_event(struct dentry *dentry,
{
}
-static inline void inotify_unmount_inodes(struct super_block *sb)
+static inline void inotify_unmount_inodes(struct list_head *list)
{
}
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 849e70535047..5d5275364867 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -31,13 +31,11 @@ struct mnt_namespace;
#define MNT_SHRINKABLE 0x100
#define MNT_WRITE_HOLD 0x200
-#define MNT_MOUNTED 0x400
#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */
#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */
#define MNT_PNODE_MASK 0x3000 /* propagation flag mask */
-
struct vfsmount {
struct list_head mnt_hash;
struct vfsmount *mnt_parent; /* fs we are mounted on */
@@ -58,6 +56,12 @@ struct vfsmount {
struct mnt_namespace *mnt_ns; /* containing namespace */
int mnt_id; /* mount identifier */
int mnt_group_id; /* peer group identifier */
+ /*
+ * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
+ * to let these frequently modified fields in a separate cache line
+ * (so that reads of mnt_flags wont ping-pong on SMP machines)
+ */
+ atomic_t mnt_count;
int mnt_expiry_mark; /* true if marked for expiry */
int mnt_pinned;
int mnt_ghosts;
@@ -66,11 +70,6 @@ struct vfsmount {
#else
int mnt_writers;
#endif
-#ifdef CONFIG_SMP
- int *mnt_count;
-#else
- int mnt_count;
-#endif
};
static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
@@ -82,28 +81,32 @@ static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
#endif
}
-struct file; /* forward dec */
-
-extern void vfsmount_read_lock(int cpu);
-extern void vfsmount_read_unlock(int cpu);
-extern void vfsmount_write_lock(void);
-extern void vfsmount_write_unlock(void);
+static inline struct vfsmount *mntget(struct vfsmount *mnt)
+{
+ if (mnt)
+ atomic_inc(&mnt->mnt_count);
+ return mnt;
+}
-extern unsigned int count_mnt_count(struct vfsmount *mnt);
+struct file; /* forward dec */
extern int mnt_want_write(struct vfsmount *mnt);
extern int mnt_want_write_file(struct file *file);
extern int mnt_clone_write(struct vfsmount *mnt);
extern void mnt_drop_write(struct vfsmount *mnt);
-
extern void mntput_no_expire(struct vfsmount *mnt);
-extern struct vfsmount *mntget(struct vfsmount *mnt);
-extern void mntput(struct vfsmount *mnt);
-
extern void mnt_pin(struct vfsmount *mnt);
extern void mnt_unpin(struct vfsmount *mnt);
extern int __mnt_is_readonly(struct vfsmount *mnt);
+static inline void mntput(struct vfsmount *mnt)
+{
+ if (mnt) {
+ mnt->mnt_expiry_mark = 0;
+ mntput_no_expire(mnt);
+ }
+}
+
extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
const char *name, void *data);
@@ -120,6 +123,7 @@ extern int do_add_mount(struct vfsmount *newmnt, struct path *path,
extern void mark_mounts_for_expiry(struct list_head *mounts);
+extern spinlock_t vfsmount_lock;
extern dev_t name_to_dev_t(char *name);
#endif /* _LINUX_MOUNT_H */
diff --git a/include/linux/tty.h b/include/linux/tty.h
index e5c5ba2327f1..42f207676016 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -465,7 +465,6 @@ extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty);
extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
extern struct mutex tty_mutex;
-extern spinlock_t tty_files_lock;
extern void tty_write_unlock(struct tty_struct *tty);
extern int tty_write_lock(struct tty_struct *tty, int ndelay);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 15e8bcd90cd1..76e8903cd204 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -9,8 +9,8 @@
struct backing_dev_info;
-extern spinlock_t sb_inode_list_lock;
-extern spinlock_t wb_inode_list_lock;
+extern spinlock_t inode_lock;
+extern struct list_head inode_in_use;
extern struct list_head inode_unused;
/*
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index f3a43f7747ca..01582e5fe998 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -237,16 +237,9 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void mqueue_i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
- kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
-}
-
static void mqueue_destroy_inode(struct inode *inode)
{
- call_rcu(&inode->i_rcu, mqueue_i_callback);
+ kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
}
static void mqueue_delete_inode(struct inode *inode)
@@ -769,11 +762,8 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
}
inode = dentry->d_inode;
- if (inode) {
- spin_lock(&inode->i_lock);
- inode->i_count++;
- spin_unlock(&inode->i_lock);
- }
+ if (inode)
+ atomic_inc(&inode->i_count);
err = mnt_want_write(ipc_ns->mq_mnt);
if (err)
goto out_err;
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index a4bdd2a18f1a..4b05bd9479db 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -720,8 +720,6 @@ int audit_tag_tree(char *old, char *new)
struct vfsmount *mnt;
struct dentry *dentry;
int err;
- int cpu = get_cpu();
- put_cpu();
err = kern_path(new, 0, &path);
if (err)
@@ -763,15 +761,15 @@ int audit_tag_tree(char *old, char *new)
continue;
}
- vfsmount_read_lock(cpu);
+ spin_lock(&vfsmount_lock);
if (!is_under(mnt, dentry, &path)) {
- vfsmount_read_unlock(cpu);
+ spin_unlock(&vfsmount_lock);
path_put(&path);
put_tree(tree);
mutex_lock(&audit_filter_mutex);
continue;
}
- vfsmount_read_unlock(cpu);
+ spin_unlock(&vfsmount_lock);
path_put(&path);
list_for_each_entry(p, &list, mnt_list) {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 84ad27652d40..11a8b34cfae0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -808,29 +808,25 @@ static void cgroup_clear_directory(struct dentry *dentry)
struct list_head *node;
BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
- spin_lock(&dentry->d_lock);
+ spin_lock(&dcache_lock);
node = dentry->d_subdirs.next;
while (node != &dentry->d_subdirs) {
struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
-
- spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
list_del_init(node);
if (d->d_inode) {
/* This should never be called on a cgroup
* directory with child cgroups */
BUG_ON(d->d_inode->i_mode & S_IFDIR);
- dget_dlock(d);
- spin_unlock(&d->d_lock);
- spin_unlock(&dentry->d_lock);
+ d = dget_locked(d);
+ spin_unlock(&dcache_lock);
d_delete(d);
simple_unlink(dentry->d_inode, d);
dput(d);
- spin_lock(&dentry->d_lock);
- } else
- spin_unlock(&d->d_lock);
+ spin_lock(&dcache_lock);
+ }
node = dentry->d_subdirs.next;
}
- spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
}
/*
@@ -838,16 +834,11 @@ static void cgroup_clear_directory(struct dentry *dentry)
*/
static void cgroup_d_remove_dir(struct dentry *dentry)
{
- struct dentry *parent;
-
cgroup_clear_directory(dentry);
- parent = dentry->d_parent;
- spin_lock(&parent->d_lock);
- spin_lock(&dentry->d_lock);
+ spin_lock(&dcache_lock);
list_del_init(&dentry->d_u.d_child);
- spin_unlock(&dentry->d_lock);
- spin_unlock(&parent->d_lock);
+ spin_unlock(&dcache_lock);
remove_dir(dentry);
}
@@ -3173,7 +3164,9 @@ again:
list_del(&cgrp->sibling);
cgroup_unlock_hierarchy(cgrp->root);
+ spin_lock(&cgrp->dentry->d_lock);
d = dget(cgrp->dentry);
+ spin_unlock(&d->d_lock);
cgroup_d_remove_dir(d);
dput(d);
diff --git a/kernel/futex.c b/kernel/futex.c
index a6cec3270c78..e7a35f1039e7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -168,9 +168,7 @@ static void get_futex_key_refs(union futex_key *key)
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
- spin_lock(&key->shared.inode->i_lock);
- key->shared.inode->i_count++;
- spin_unlock(&key->shared.inode->i_lock);
+ atomic_inc(&key->shared.inode->i_count);
break;
case FUT_OFF_MMSHARED:
atomic_inc(&key->private.mm->mm_count);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e4baabf7893e..8a68b2448468 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1301,14 +1301,14 @@ static struct ctl_table fs_table[] = {
.data = &inodes_stat,
.maxlen = 2*sizeof(int),
.mode = 0444,
- .proc_handler = &proc_nr_inodes,
+ .proc_handler = proc_dointvec,
},
{
.procname = "inode-state",
.data = &inodes_stat,
.maxlen = 7*sizeof(int),
.mode = 0444,
- .proc_handler = &proc_nr_inodes,
+ .proc_handler = proc_dointvec,
},
{
.procname = "file-nr",
@@ -1334,12 +1334,6 @@ static struct ctl_table fs_table[] = {
.extra2 = &sysctl_nr_open_max,
},
{
- /*
- * dentry_stat has an atomic_t member, so this is a bit of
- * a hack, but it works for the moment, and I won't bother
- * changing it now because we'll probably want to change to
- * a more scalable counter anyway.
- */
.procname = "dentry-state",
.data = &dentry_stat,
.maxlen = 6*sizeof(int),
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 8a0d9aa7b207..0e8ca0347707 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -71,7 +71,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
* RCU on the reader side
*/
nr_wb = nr_dirty = nr_io = nr_more_io = 0;
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lock);
list_for_each_entry(wb, &bdi->wb_list, list) {
nr_wb++;
list_for_each_entry(inode, &wb->b_dirty, i_list)
@@ -81,7 +81,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
list_for_each_entry(inode, &wb->b_more_io, i_list)
nr_more_io++;
}
- spin_unlock(&wb_inode_list_lock);
+ spin_unlock(&inode_lock);
get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi);
@@ -696,11 +696,11 @@ void bdi_destroy(struct backing_dev_info *bdi)
if (bdi_has_dirty_io(bdi)) {
struct bdi_writeback *dst = &default_backing_dev_info.wb;
- spin_lock(&wb_inode_list_lock);
+ spin_lock(&inode_lock);
list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
list_splice(&bdi->wb.b_io, &dst->b_io);
list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
- spin_unlock(&wb_inode_list_lock);
+ spin_unlock(&inode_lock);
}
bdi_unregister(bdi);
diff --git a/mm/shmem.c b/mm/shmem.c
index d7905296c2d3..eef4ebea5158 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1882,9 +1882,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
dir->i_size += BOGO_DIRENT_SIZE;
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
inc_nlink(inode);
- spin_lock(&inode->i_lock);
- inode->i_count++; /* New dentry reference */
- spin_unlock(&inode->i_lock);
+ atomic_inc(&inode->i_count); /* New dentry reference */
dget(dentry); /* Extra pinning count for the created dentry */
d_instantiate(dentry, inode);
out:
@@ -2397,20 +2395,13 @@ static struct inode *shmem_alloc_inode(struct super_block *sb)
return &p->vfs_inode;
}
-static void shmem_i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
- kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
-}
-
static void shmem_destroy_inode(struct inode *inode)
{
if ((inode->i_mode & S_IFMT) == S_IFREG) {
/* only struct inode is valid if it's an inline symlink */
mpol_free_shared_policy(&SHMEM_I(inode)->policy);
}
- call_rcu(&inode->i_rcu, shmem_i_callback);
+ kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
}
static void init_once(void *foo)
diff --git a/net/socket.c b/net/socket.c
index 371eaf092a31..769c386bd428 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -263,19 +263,12 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void sock_i_callback(struct rcu_head *head)
+static void sock_destroy_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(sock_inode_cachep,
container_of(inode, struct socket_alloc, vfs_inode));
}
-static void sock_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, sock_i_callback);
-}
-
static void init_once(void *foo)
{
struct socket_alloc *ei = (struct socket_alloc *)foo;
@@ -375,9 +368,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
&socket_file_ops);
if (unlikely(!file)) {
/* drop dentry, keep inode */
- spin_lock(&path.dentry->d_inode->i_lock);
- path.dentry->d_inode->i_count++;
- spin_unlock(&path.dentry->d_inode->i_lock);
+ atomic_inc(&path.dentry->d_inode->i_count);
path_put(&path);
put_unused_fd(fd);
return -ENFILE;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 6792d5634fa0..27a23785a50d 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -162,17 +162,9 @@ rpc_alloc_inode(struct super_block *sb)
}
static void
-rpc_i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
- kmem_cache_free(rpc_inode_cachep, RPC_I(inode));
-}
-
-static void
rpc_destroy_inode(struct inode *inode)
{
- call_rcu(&inode->i_rcu, rpc_i_callback);
+ kmem_cache_free(rpc_inode_cachep, RPC_I(inode));
}
static int
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f013de205ea8..9a2ee845e9d4 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2246,7 +2246,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
tty = get_current_tty();
if (tty) {
- spin_lock(&tty_files_lock);
+ file_list_lock();
if (!list_empty(&tty->tty_files)) {
struct inode *inode;
@@ -2262,7 +2262,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
drop_tty = 1;
}
}
- spin_unlock(&tty_files_lock);
+ file_list_unlock();
tty_kref_put(tty);
}
/* Reset controlling tty. */
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index a016c04e5d52..fab36fdf2769 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -943,28 +943,24 @@ static void sel_remove_entries(struct dentry *de)
{
struct list_head *node;
- spin_lock(&de->d_lock);
+ spin_lock(&dcache_lock);
node = de->d_subdirs.next;
while (node != &de->d_subdirs) {
struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
-
- spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
list_del_init(node);
if (d->d_inode) {
- dget_dlock(d);
- spin_unlock(&de->d_lock);
- spin_unlock(&d->d_lock);
+ d = dget_locked(d);
+ spin_unlock(&dcache_lock);
d_delete(d);
simple_unlink(de->d_inode, d);
dput(d);
- spin_lock(&de->d_lock);
- } else
- spin_unlock(&d->d_lock);
+ spin_lock(&dcache_lock);
+ }
node = de->d_subdirs.next;
}
- spin_unlock(&de->d_lock);
+ spin_unlock(&dcache_lock);
}
#define BOOL_DIR_NAME "booleans"
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index 4346c48964c3..18369d497eb8 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -93,21 +93,21 @@ int tomoyo_realpath_from_path2(struct path *path, char *newname,
struct path root;
struct path ns_root = { };
struct path tmp;
- int cpu = get_cpu();
- put_cpu();
read_lock(&current->fs->lock);
root = current->fs->root;
path_get(&root);
read_unlock(&current->fs->lock);
- vfsmount_read_lock(cpu);
+ spin_lock(&vfsmount_lock);
if (root.mnt && root.mnt->mnt_ns)
ns_root.mnt = mntget(root.mnt->mnt_ns->root);
if (ns_root.mnt)
ns_root.dentry = dget(ns_root.mnt->mnt_root);
+ spin_unlock(&vfsmount_lock);
+ spin_lock(&dcache_lock);
tmp = ns_root;
sp = __d_path(path, &tmp, newname, newname_len);
- vfsmount_read_unlock(cpu);
+ spin_unlock(&dcache_lock);
path_put(&root);
path_put(&ns_root);
/* Prepend "/proc" prefix if using internal proc vfs mount. */