diff options
author | John Stultz <johnstul@us.ibm.com> | 2010-02-22 16:48:26 -0800 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2010-04-27 17:32:58 +0200 |
commit | 23695896abe0d1c813c2c88f0dd165d435d08689 (patch) | |
tree | bc676786565c37a9dce4a8ca07d0a393e753f43c /fs | |
parent | 3f2ef0d48b33d3423f99077a6efd6f4eea3ac090 (diff) | |
download | lwn-23695896abe0d1c813c2c88f0dd165d435d08689.tar.gz lwn-23695896abe0d1c813c2c88f0dd165d435d08689.zip |
Fix vfsmount_read_lock to work with -rt
Because vfsmount_read_lock aquires the vfsmount spinlock for the current cpu,
it causes problems wiht -rt, as you might migrate between cpus between a
lock and unlock.
This patch fixes the issue by having the caller pick a cpu, then consistently
use that cpu between the lock and unlock. We may migrate inbetween lock and
unlock, but that's ok because we're not doing anything cpu specific, other
then avoiding contention on the read side across the cpus.
Its not pretty, but it works and statistically shouldn't hurt performance.
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/dcache.c | 20 | ||||
-rw-r--r-- | fs/namei.c | 24 | ||||
-rw-r--r-- | fs/namespace.c | 23 | ||||
-rw-r--r-- | fs/proc/base.c | 6 | ||||
-rw-r--r-- | fs/seq_file.c | 6 |
5 files changed, 49 insertions, 30 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index c6c034eb0398..9439ad01d45c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2335,6 +2335,8 @@ char *d_path(const struct path *path, char *buf, int buflen) char *res; struct path root; struct path tmp; + int cpu = get_cpu(); + put_cpu(); /* * We have various synthetic filesystems that never get mounted. On @@ -2351,10 +2353,10 @@ char *d_path(const struct path *path, char *buf, int buflen) path_get(&root); read_unlock(¤t->fs->lock); - vfsmount_read_lock(); + vfsmount_read_lock(cpu); tmp = root; res = __d_path(path, &tmp, buf, buflen); - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); path_put(&root); return res; @@ -2389,13 +2391,15 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) char *end; char *retval; unsigned seq; + int cpu = get_cpu(); + put_cpu(); rename_retry: end = buf + buflen; prepend(&end, &buflen, "\0", 1); seq = read_seqbegin(&rename_lock); - vfsmount_read_lock(); + vfsmount_read_lock(cpu); rcu_read_lock(); /* protect parent */ spin_lock(&dentry->d_lock); unlinked: @@ -2426,7 +2430,7 @@ unlinked: out: spin_unlock(&dentry->d_lock); rcu_read_unlock(); - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); if (read_seqretry(&rename_lock, seq)) goto rename_retry; return retval; @@ -2458,6 +2462,8 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) int error; struct path pwd, root; char *page = (char *) __get_free_page(GFP_USER); + int cpu = get_cpu(); + put_cpu(); if (!page) return -ENOMEM; @@ -2470,7 +2476,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) read_unlock(¤t->fs->lock); error = -ENOENT; - vfsmount_read_lock(); + vfsmount_read_lock(cpu); spin_lock(&pwd.dentry->d_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; @@ -2480,7 +2486,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) spin_unlock(&pwd.dentry->d_lock); /* XXX: race here, have to close (eg. return unlinked from __d_path) */ cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE); - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); error = PTR_ERR(cwd); if (IS_ERR(cwd)) @@ -2495,7 +2501,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) } } else { spin_unlock(&pwd.dentry->d_lock); - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); } out: diff --git a/fs/namei.c b/fs/namei.c index 924a0e2ed123..2e247dc950eb 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -675,16 +675,18 @@ int follow_up(struct path *path) { struct vfsmount *parent; struct dentry *mountpoint; + int cpu = get_cpu(); + put_cpu(); - vfsmount_read_lock(); + vfsmount_read_lock(cpu); parent = path->mnt->mnt_parent; if (parent == path->mnt) { - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); return 0; } mntget(parent); mountpoint = dget(path->mnt->mnt_mountpoint); - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -760,6 +762,8 @@ int follow_down(struct path *path) static __always_inline void follow_dotdot(struct nameidata *nd) { + int cpu = get_cpu(); + put_cpu(); set_root(nd); while(1) { @@ -775,15 +779,15 @@ static __always_inline void follow_dotdot(struct nameidata *nd) dput(old); break; } - vfsmount_read_lock(); + vfsmount_read_lock(cpu); parent = nd->path.mnt->mnt_parent; if (parent == nd->path.mnt) { - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); break; } mntget(parent); nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint); - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); dput(old); mntput(nd->path.mnt); nd->path.mnt = parent; @@ -1365,20 +1369,22 @@ static int do_path_lookup(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { int retval; + int cpu = get_cpu(); + put_cpu(); - vfsmount_read_lock(); + vfsmount_read_lock(cpu); rcu_read_lock(); retval = path_init_rcu(dfd, name, flags, nd); if (unlikely(retval)) { rcu_read_unlock(); - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); return retval; } retval = path_walk_rcu(name, nd); rcu_read_unlock(); if (likely(!retval)) mntget(nd->path.mnt); - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); if (likely(!retval)) { if (unlikely(!audit_dummy_context())) { if (nd->path.dentry && nd->path.dentry->d_inode) diff --git a/fs/namespace.c b/fs/namespace.c index 83b0370b1acb..7fb7fe3da44e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -60,21 +60,20 @@ static struct rw_semaphore namespace_sem; struct kobject *fs_kobj; EXPORT_SYMBOL_GPL(fs_kobj); -void vfsmount_read_lock(void) +void vfsmount_read_lock(int cpu) { spinlock_t *lock; - lock = &get_cpu_var(vfsmount_lock); + lock = &per_cpu(vfsmount_lock, cpu); spin_lock(lock); } -void vfsmount_read_unlock(void) +void vfsmount_read_unlock(int cpu) { spinlock_t *lock; - lock = &__get_cpu_var(vfsmount_lock); + lock = &per_cpu(vfsmount_lock, cpu); spin_unlock(lock); - put_cpu_var(vfsmount_lock); } void vfsmount_write_lock(void) @@ -552,11 +551,13 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *lookup_mnt(struct path *path) { struct vfsmount *child_mnt; + int cpu = get_cpu(); + put_cpu(); - vfsmount_read_lock(); + vfsmount_read_lock(cpu); if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) mntget(child_mnt); - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); return child_mnt; } @@ -748,14 +749,16 @@ static inline void __mntput(struct vfsmount *mnt) void mntput_no_expire(struct vfsmount *mnt) { + int cpu = get_cpu(); + put_cpu(); if (likely(mnt->mnt_flags & MNT_MOUNTED)) { - vfsmount_read_lock(); + vfsmount_read_lock(cpu); if (unlikely(!mnt->mnt_flags & MNT_MOUNTED)) { - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); goto repeat; } dec_mnt_count(mnt); - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); return; } diff --git a/fs/proc/base.c b/fs/proc/base.c index fcfdd27649fa..d59e279874c7 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -650,15 +650,17 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) struct proc_mounts *p = file->private_data; struct mnt_namespace *ns = p->ns; unsigned res = POLLIN | POLLRDNORM; + int cpu = get_cpu(); + put_cpu(); poll_wait(file, &ns->poll, wait); - vfsmount_read_lock(); + vfsmount_read_lock(cpu); if (p->event != ns->event) { p->event = ns->event; res |= POLLERR | POLLPRI; } - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); return res; } diff --git a/fs/seq_file.c b/fs/seq_file.c index 880659440865..1326fc0d20b2 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -459,13 +459,15 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root, char *buf; size_t size = seq_get_buf(m, &buf); int res = -ENAMETOOLONG; + int cpu = get_cpu(); + put_cpu(); if (size) { char *p; - vfsmount_read_lock(); + vfsmount_read_lock(cpu); p = __d_path(path, root, buf, size); - vfsmount_read_unlock(); + vfsmount_read_unlock(cpu); res = PTR_ERR(p); if (!IS_ERR(p)) { |