diff options
author | Nick Piggin <npiggin@suse.de> | 2010-01-29 15:38:19 -0800 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2010-04-27 17:32:28 +0200 |
commit | f0a176c5482950a40ac31113816b981d4fe6069c (patch) | |
tree | a48a5c340310bf98b4604bb6e669e82a73388eb3 | |
parent | 787deaa77c09c3746dd2b998997109f2463190fa (diff) | |
download | lwn-f0a176c5482950a40ac31113816b981d4fe6069c.tar.gz lwn-f0a176c5482950a40ac31113816b981d4fe6069c.zip |
fs-vfsmount_lock-scale
Use a brlock for the vfsmount lock.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | fs/dcache.c | 4 | ||||
-rw-r--r-- | fs/namei.c | 13 | ||||
-rw-r--r-- | fs/namespace.c | 203 | ||||
-rw-r--r-- | fs/pnode.c | 4 | ||||
-rw-r--r-- | fs/proc/base.c | 4 | ||||
-rw-r--r-- | include/linux/mount.h | 6 | ||||
-rw-r--r-- | kernel/audit_tree.c | 6 | ||||
-rw-r--r-- | security/tomoyo/realpath.c | 4 |
8 files changed, 162 insertions, 82 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index 116fd33f564b..761f35a32f0f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1911,7 +1911,7 @@ char *__d_path(const struct path *path, struct path *root, char *end = buffer + buflen; char *retval; - spin_lock(&vfsmount_lock); + vfsmount_read_lock(); prepend(&end, &buflen, "\0", 1); if (d_unlinked(dentry) && (prepend(&end, &buflen, " (deleted)", 10) != 0)) @@ -1947,7 +1947,7 @@ char *__d_path(const struct path *path, struct path *root, } out: - spin_unlock(&vfsmount_lock); + vfsmount_read_unlock(); return retval; global_root: diff --git a/fs/namei.c b/fs/namei.c index a4855af776a8..2c32e4dcc412 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -617,15 +617,16 @@ int follow_up(struct path *path) { struct vfsmount *parent; struct dentry *mountpoint; - spin_lock(&vfsmount_lock); + + vfsmount_read_lock(); parent = path->mnt->mnt_parent; if (parent == path->mnt) { - spin_unlock(&vfsmount_lock); + vfsmount_read_unlock(); return 0; } mntget(parent); mountpoint = dget(path->mnt->mnt_mountpoint); - spin_unlock(&vfsmount_lock); + vfsmount_read_unlock(); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -704,15 +705,15 @@ static __always_inline void follow_dotdot(struct nameidata *nd) break; } spin_unlock(&dcache_lock); - spin_lock(&vfsmount_lock); + vfsmount_read_lock(); parent = nd->path.mnt->mnt_parent; if (parent == nd->path.mnt) { - spin_unlock(&vfsmount_lock); + vfsmount_read_unlock(); break; } mntget(parent); nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint); - spin_unlock(&vfsmount_lock); + vfsmount_read_unlock(); dput(old); mntput(nd->path.mnt); nd->path.mnt = parent; diff --git a/fs/namespace.c b/fs/namespace.c index 962fd96dbe4c..7b77f51fabcd 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -11,6 +11,8 @@ #include <linux/syscalls.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/percpu.h> #include <linux/smp_lock.h> #include <linux/init.h> #include <linux/kernel.h> @@ -37,12 +39,16 @@ #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) #define HASH_SIZE (1UL << HASH_SHIFT) -/* spinlock for vfsmount related operations, inplace of dcache_lock */ -__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); +/* + * vfsmount "brlock" style spinlock for vfsmount related operations, use + * vfsmount_read_lock/vfsmount_write_lock functions. + */ +static DEFINE_PER_CPU(spinlock_t, vfsmount_lock); static int event; static DEFINE_IDA(mnt_id_ida); static DEFINE_IDA(mnt_group_ida); +static DEFINE_SPINLOCK(mnt_id_lock); static int mnt_id_start = 0; static int mnt_group_start = 1; @@ -54,6 +60,49 @@ static struct rw_semaphore namespace_sem; struct kobject *fs_kobj; EXPORT_SYMBOL_GPL(fs_kobj); +void vfsmount_read_lock(void) +{ + spinlock_t *lock; + + lock = &get_cpu_var(vfsmount_lock); + spin_lock(lock); +} + +void vfsmount_read_unlock(void) +{ + spinlock_t *lock; + + lock = &__get_cpu_var(vfsmount_lock); + spin_unlock(lock); + put_cpu_var(vfsmount_lock); +} + +void vfsmount_write_lock(void) +{ + int i; + int nr = 0; + + for_each_possible_cpu(i) { + spinlock_t *lock; + + lock = &per_cpu(vfsmount_lock, i); + spin_lock_nested(lock, nr); + nr++; + } +} + +void vfsmount_write_unlock(void) +{ + int i; + + for_each_possible_cpu(i) { + spinlock_t *lock; + + lock = &per_cpu(vfsmount_lock, i); + spin_unlock(lock); + } +} + static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); @@ -64,18 +113,21 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) -/* allocation is serialized by namespace_sem */ +/* + * allocation is serialized by namespace_sem, but we need the spinlock to + * serialise with freeing. + */ static int mnt_alloc_id(struct vfsmount *mnt) { int res; retry: ida_pre_get(&mnt_id_ida, GFP_KERNEL); - spin_lock(&vfsmount_lock); + spin_lock(&mnt_id_lock); res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); if (!res) mnt_id_start = mnt->mnt_id + 1; - spin_unlock(&vfsmount_lock); + spin_unlock(&mnt_id_lock); if (res == -EAGAIN) goto retry; @@ -85,11 +137,11 @@ retry: static void mnt_free_id(struct vfsmount *mnt) { int id = mnt->mnt_id; - spin_lock(&vfsmount_lock); + spin_lock(&mnt_id_lock); ida_remove(&mnt_id_ida, id); if (mnt_id_start > id) mnt_id_start = id; - spin_unlock(&vfsmount_lock); + spin_unlock(&mnt_id_lock); } /* @@ -351,7 +403,7 @@ static int mnt_make_readonly(struct vfsmount *mnt) { int ret = 0; - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); mnt->mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -385,15 +437,15 @@ static int mnt_make_readonly(struct vfsmount *mnt) */ smp_wmb(); mnt->mnt_flags &= ~MNT_WRITE_HOLD; - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); return ret; } static void __mnt_unmake_readonly(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); mnt->mnt_flags &= ~MNT_READONLY; - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); } void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) @@ -446,10 +498,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *lookup_mnt(struct path *path) { struct vfsmount *child_mnt; - spin_lock(&vfsmount_lock); + + vfsmount_read_lock(); if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) mntget(child_mnt); - spin_unlock(&vfsmount_lock); + vfsmount_read_unlock(); return child_mnt; } @@ -625,40 +678,47 @@ static inline void __mntput(struct vfsmount *mnt) void mntput_no_expire(struct vfsmount *mnt) { repeat: - if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { - if (likely(!mnt->mnt_pinned)) { - spin_unlock(&vfsmount_lock); - __mntput(mnt); - return; - } - atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); - mnt->mnt_pinned = 0; - spin_unlock(&vfsmount_lock); - acct_auto_close_mnt(mnt); - security_sb_umount_close(mnt); - goto repeat; + /* open-code atomic_dec_and_lock for the vfsmount lock */ + if (atomic_add_unless(&mnt->mnt_count, -1, 1)) + return; + vfsmount_write_lock(); + if (!atomic_dec_and_test(&mnt->mnt_count)) { + vfsmount_write_unlock(); + return; } + + if (likely(!mnt->mnt_pinned)) { + vfsmount_write_unlock(); + __mntput(mnt); + return; + } + atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); + mnt->mnt_pinned = 0; + vfsmount_write_unlock(); + acct_auto_close_mnt(mnt); + security_sb_umount_close(mnt); + goto repeat; } EXPORT_SYMBOL(mntput_no_expire); void mnt_pin(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); mnt->mnt_pinned++; - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); if (mnt->mnt_pinned) { atomic_inc(&mnt->mnt_count); mnt->mnt_pinned--; } - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); } EXPORT_SYMBOL(mnt_unpin); @@ -941,12 +1001,12 @@ int may_umount_tree(struct vfsmount *mnt) int minimum_refs = 0; struct vfsmount *p; - spin_lock(&vfsmount_lock); + vfsmount_read_lock(); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += atomic_read(&p->mnt_count); minimum_refs += 2; } - spin_unlock(&vfsmount_lock); + vfsmount_read_unlock(); if (actual_refs > minimum_refs) return 0; @@ -973,11 +1033,12 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - spin_lock(&vfsmount_lock); + vfsmount_read_lock(); if (propagate_mount_busy(mnt, 2)) ret = 0; - spin_unlock(&vfsmount_lock); + vfsmount_read_unlock(); up_read(&namespace_sem); + return ret; } @@ -992,13 +1053,14 @@ void release_mounts(struct list_head *head) if (mnt->mnt_parent != mnt) { struct dentry *dentry; struct vfsmount *m; - spin_lock(&vfsmount_lock); + + vfsmount_write_lock(); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; m->mnt_ghosts--; - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); dput(dentry); mntput(m); } @@ -1096,7 +1158,7 @@ static int do_umount(struct vfsmount *mnt, int flags) } down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); event++; if (!(flags & MNT_DETACH)) @@ -1108,7 +1170,7 @@ static int do_umount(struct vfsmount *mnt, int flags) umount_tree(mnt, 1, &umount_list); retval = 0; } - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); if (retval) security_sb_umount_busy(mnt); up_write(&namespace_sem); @@ -1215,19 +1277,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, q = clone_mnt(p, p->mnt_root, flag); if (!q) goto Enomem; - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); } } return res; Enomem: if (res) { LIST_HEAD(umount_list); - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); umount_tree(res, 0, &umount_list); - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); release_mounts(&umount_list); } return NULL; @@ -1246,9 +1308,9 @@ void drop_collected_mounts(struct vfsmount *mnt) { LIST_HEAD(umount_list); down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); umount_tree(mnt, 0, &umount_list); - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); up_write(&namespace_sem); release_mounts(&umount_list); } @@ -1361,12 +1423,13 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, if (err) goto out_cleanup_ids; - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) set_mnt_shared(p); } + if (parent_path) { detach_mnt(source_mnt, parent_path); attach_mnt(source_mnt, path); @@ -1380,7 +1443,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, list_del_init(&child->mnt_hash); commit_tree(child); } - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); + return 0; out_cleanup_ids: @@ -1442,10 +1506,10 @@ static int do_change_type(struct path *path, int flag) goto out_unlock; } - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); out_unlock: up_write(&namespace_sem); @@ -1489,9 +1553,10 @@ static int do_loopback(struct path *path, char *old_name, err = graft_tree(mnt, path); if (err) { LIST_HEAD(umount_list); - spin_lock(&vfsmount_lock); + + vfsmount_write_lock(); umount_tree(mnt, 0, &umount_list); - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); release_mounts(&umount_list); } @@ -1544,18 +1609,18 @@ static int do_remount(struct path *path, int flags, int mnt_flags, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK; path->mnt->mnt_flags = mnt_flags; - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); } up_write(&sb->s_umount); if (!err) { security_sb_post_remount(path->mnt, flags, data); - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); touch_mnt_namespace(path->mnt->mnt_ns); - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); } return err; } @@ -1732,7 +1797,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) return; down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -1751,7 +1816,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1, &umounts); } - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); up_write(&namespace_sem); release_mounts(&umounts); @@ -2026,9 +2091,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, kfree(new_ns); return ERR_PTR(-ENOMEM); } - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); list_add_tail(&new_ns->list, &new_ns->root->mnt_list); - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2225,7 +2290,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, goto out2; /* not attached */ /* make sure we can reach put_old from new_root */ tmp = old.mnt; - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); if (tmp != new.mnt) { for (;;) { if (tmp->mnt_parent == tmp) @@ -2245,7 +2310,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new.mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); chroot_fs_refs(&root, &new); security_sb_post_pivotroot(&root, &new); error = 0; @@ -2261,7 +2326,7 @@ out1: out0: return error; out3: - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); goto out2; } @@ -2291,6 +2356,7 @@ static void __init init_mount_tree(void) void __init mnt_init(void) { unsigned u; + int i; int err; init_rwsem(&namespace_sem); @@ -2308,6 +2374,9 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); + for_each_possible_cpu(i) + spin_lock_init(&per_cpu(vfsmount_lock, i)); + err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", @@ -2323,16 +2392,22 @@ void put_mnt_ns(struct mnt_namespace *ns) { struct vfsmount *root; LIST_HEAD(umount_list); + spinlock_t *lock; - if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock)) + lock = &get_cpu_var(vfsmount_lock); + if (!atomic_dec_and_lock(&ns->count, lock)) { + put_cpu_var(vfsmount_lock); return; + } root = ns->root; ns->root = NULL; - spin_unlock(&vfsmount_lock); + spin_unlock(lock); + put_cpu_var(vfsmount_lock); + down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); umount_tree(root, 0, &umount_list); - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); up_write(&namespace_sem); release_mounts(&umount_list); kfree(ns); diff --git a/fs/pnode.c b/fs/pnode.c index 8d5f392ec3d3..8d1ff70a39f6 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -264,12 +264,12 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, prev_src_mnt = child; } out: - spin_lock(&vfsmount_lock); + vfsmount_write_lock(); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); umount_tree(child, 0, &umount_list); } - spin_unlock(&vfsmount_lock); + vfsmount_write_unlock(); release_mounts(&umount_list); return ret; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 3cd449d23352..fcfdd27649fa 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -653,12 +653,12 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) poll_wait(file, &ns->poll, wait); - spin_lock(&vfsmount_lock); + vfsmount_read_lock(); if (p->event != ns->event) { p->event = ns->event; res |= POLLERR | POLLPRI; } - spin_unlock(&vfsmount_lock); + vfsmount_read_unlock(); return res; } diff --git a/include/linux/mount.h b/include/linux/mount.h index 5d5275364867..75c94a59c0b1 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -90,6 +90,11 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt) struct file; /* forward dec */ +extern void vfsmount_read_lock(void); +extern void vfsmount_read_unlock(void); +extern void vfsmount_write_lock(void); +extern void vfsmount_write_unlock(void); + extern int mnt_want_write(struct vfsmount *mnt); extern int mnt_want_write_file(struct file *file); extern int mnt_clone_write(struct vfsmount *mnt); @@ -123,7 +128,6 @@ extern int do_add_mount(struct vfsmount *newmnt, struct path *path, extern void mark_mounts_for_expiry(struct list_head *mounts); -extern spinlock_t vfsmount_lock; extern dev_t name_to_dev_t(char *name); #endif /* _LINUX_MOUNT_H */ diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 4b05bd9479db..6274327fd913 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -761,15 +761,15 @@ int audit_tag_tree(char *old, char *new) continue; } - spin_lock(&vfsmount_lock); + vfsmount_read_lock(); if (!is_under(mnt, dentry, &path)) { - spin_unlock(&vfsmount_lock); + vfsmount_read_unlock(); path_put(&path); put_tree(tree); mutex_lock(&audit_filter_mutex); continue; } - spin_unlock(&vfsmount_lock); + vfsmount_read_unlock(); path_put(&path); list_for_each_entry(p, &list, mnt_list) { diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index 18369d497eb8..c5c3c1669cab 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -98,12 +98,12 @@ int tomoyo_realpath_from_path2(struct path *path, char *newname, root = current->fs->root; path_get(&root); read_unlock(¤t->fs->lock); - spin_lock(&vfsmount_lock); + vfsmount_read_lock(); if (root.mnt && root.mnt->mnt_ns) ns_root.mnt = mntget(root.mnt->mnt_ns->root); if (ns_root.mnt) ns_root.dentry = dget(ns_root.mnt->mnt_root); - spin_unlock(&vfsmount_lock); + vfsmount_read_unlock(); spin_lock(&dcache_lock); tmp = ns_root; sp = __d_path(path, &tmp, newname, newname_len); |