summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2011-01-14 22:30:21 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2011-01-16 13:47:07 -0500
commitf03c65993b98eeb909a4012ce7833c5857d74755 (patch)
treea6dd5e353889b7fe4ab87c54170d09443d788fec /fs
parent7b8a53fd815deb39542085897743fa0063f9fe06 (diff)
downloadlwn-f03c65993b98eeb909a4012ce7833c5857d74755.tar.gz
lwn-f03c65993b98eeb909a4012ce7833c5857d74755.zip
sanitize vfsmount refcounting changes
Instead of splitting refcount between (per-cpu) mnt_count and (SMP-only) mnt_longrefs, make all references contribute to mnt_count again and keep track of how many are longterm ones. Accounting rules for longterm count: * 1 for each fs_struct.root.mnt * 1 for each fs_struct.pwd.mnt * 1 for having non-NULL ->mnt_ns * decrement to 0 happens only under vfsmount lock exclusive That allows nice common case for mntput() - since we can't drop the final reference until after mnt_longterm has reached 0 due to the rules above, mntput() can grab vfsmount lock shared and check mnt_longterm. If it turns out to be non-zero (which is the common case), we know that this is not the final mntput() and can just blindly decrement percpu mnt_count. Otherwise we grab vfsmount lock exclusive and do usual decrement-and-check of percpu mnt_count. For fs_struct.c we have mnt_make_longterm() and mnt_make_shortterm(); namespace.c uses the latter in places where we don't already hold vfsmount lock exclusive and opencodes a few remaining spots where we need to manipulate mnt_longterm. Note that we mostly revert the code outside of fs/namespace.c back to what we used to have; in particular, normal code doesn't need to care about two kinds of references, etc. And we get to keep the optimization Nick's variant had bought us... Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r--fs/anon_inodes.c2
-rw-r--r--fs/fs_struct.c35
-rw-r--r--fs/internal.h3
-rw-r--r--fs/namei.c24
-rw-r--r--fs/namespace.c116
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/super.c2
7 files changed, 73 insertions, 111 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index cbe57f3c4d89..c5567cb78432 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -233,7 +233,7 @@ static int __init anon_inode_init(void)
return 0;
err_mntput:
- mntput_long(anon_inode_mnt);
+ mntput(anon_inode_mnt);
err_unregister_filesystem:
unregister_filesystem(&anon_inode_fs_type);
err_exit:
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 68ca487bedb1..78b519c13536 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -4,6 +4,19 @@
#include <linux/path.h>
#include <linux/slab.h>
#include <linux/fs_struct.h>
+#include "internal.h"
+
+static inline void path_get_longterm(struct path *path)
+{
+ path_get(path);
+ mnt_make_longterm(path->mnt);
+}
+
+static inline void path_put_longterm(struct path *path)
+{
+ mnt_make_shortterm(path->mnt);
+ path_put(path);
+}
/*
* Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
@@ -17,11 +30,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
write_seqcount_begin(&fs->seq);
old_root = fs->root;
fs->root = *path;
- path_get_long(path);
+ path_get_longterm(path);
write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
if (old_root.dentry)
- path_put_long(&old_root);
+ path_put_longterm(&old_root);
}
/*
@@ -36,12 +49,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
write_seqcount_begin(&fs->seq);
old_pwd = fs->pwd;
fs->pwd = *path;
- path_get_long(path);
+ path_get_longterm(path);
write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
if (old_pwd.dentry)
- path_put_long(&old_pwd);
+ path_put_longterm(&old_pwd);
}
void chroot_fs_refs(struct path *old_root, struct path *new_root)
@@ -59,13 +72,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
write_seqcount_begin(&fs->seq);
if (fs->root.dentry == old_root->dentry
&& fs->root.mnt == old_root->mnt) {
- path_get_long(new_root);
+ path_get_longterm(new_root);
fs->root = *new_root;
count++;
}
if (fs->pwd.dentry == old_root->dentry
&& fs->pwd.mnt == old_root->mnt) {
- path_get_long(new_root);
+ path_get_longterm(new_root);
fs->pwd = *new_root;
count++;
}
@@ -76,13 +89,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
while (count--)
- path_put_long(old_root);
+ path_put_longterm(old_root);
}
void free_fs_struct(struct fs_struct *fs)
{
- path_put_long(&fs->root);
- path_put_long(&fs->pwd);
+ path_put_longterm(&fs->root);
+ path_put_longterm(&fs->pwd);
kmem_cache_free(fs_cachep, fs);
}
@@ -118,9 +131,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
spin_lock(&old->lock);
fs->root = old->root;
- path_get_long(&fs->root);
+ path_get_longterm(&fs->root);
fs->pwd = old->pwd;
- path_get_long(&fs->pwd);
+ path_get_longterm(&fs->pwd);
spin_unlock(&old->lock);
}
return fs;
diff --git a/fs/internal.h b/fs/internal.h
index 4931060fd089..12ccb86edef7 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -73,6 +73,9 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
extern int do_add_mount(struct vfsmount *, struct path *, int);
extern void mnt_clear_expiry(struct vfsmount *);
+extern void mnt_make_longterm(struct vfsmount *);
+extern void mnt_make_shortterm(struct vfsmount *);
+
extern void __init mnt_init(void);
DECLARE_BRLOCK(vfsmount_lock);
diff --git a/fs/namei.c b/fs/namei.c
index c2e37727e3ab..8f7b41a14882 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -368,18 +368,6 @@ void path_get(struct path *path)
EXPORT_SYMBOL(path_get);
/**
- * path_get_long - get a long reference to a path
- * @path: path to get the reference to
- *
- * Given a path increment the reference count to the dentry and the vfsmount.
- */
-void path_get_long(struct path *path)
-{
- mntget_long(path->mnt);
- dget(path->dentry);
-}
-
-/**
* path_put - put a reference to a path
* @path: path to put the reference to
*
@@ -393,18 +381,6 @@ void path_put(struct path *path)
EXPORT_SYMBOL(path_put);
/**
- * path_put_long - put a long reference to a path
- * @path: path to put the reference to
- *
- * Given a path decrement the reference count to the dentry and the vfsmount.
- */
-void path_put_long(struct path *path)
-{
- dput(path->dentry);
- mntput_long(path->mnt);
-}
-
-/**
* nameidata_drop_rcu - drop this nameidata out of rcu-walk
* @nd: nameidata pathwalk data to drop
* Returns: 0 on success, -ECHILD on failure
diff --git a/fs/namespace.c b/fs/namespace.c
index d7fc05fac753..48809e21f270 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -183,7 +183,7 @@ static inline void mnt_dec_count(struct vfsmount *mnt)
unsigned int mnt_get_count(struct vfsmount *mnt)
{
#ifdef CONFIG_SMP
- unsigned int count = atomic_read(&mnt->mnt_longrefs);
+ unsigned int count = 0;
int cpu;
for_each_possible_cpu(cpu) {
@@ -217,7 +217,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
if (!mnt->mnt_pcp)
goto out_free_devname;
- atomic_set(&mnt->mnt_longrefs, 1);
+ this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
#else
mnt->mnt_count = 1;
mnt->mnt_writers = 0;
@@ -624,8 +624,11 @@ static void commit_tree(struct vfsmount *mnt)
BUG_ON(parent == mnt);
list_add_tail(&head, &mnt->mnt_list);
- list_for_each_entry(m, &head, mnt_list)
+ list_for_each_entry(m, &head, mnt_list) {
m->mnt_ns = n;
+ atomic_inc(&m->mnt_longterm);
+ }
+
list_splice(&head, n->list.prev);
list_add_tail(&mnt->mnt_hash, mount_hashtable +
@@ -734,51 +737,30 @@ static inline void mntfree(struct vfsmount *mnt)
deactivate_super(sb);
}
-#ifdef CONFIG_SMP
-static inline void __mntput(struct vfsmount *mnt, int longrefs)
+static void mntput_no_expire(struct vfsmount *mnt)
{
- if (!longrefs) {
put_again:
- br_read_lock(vfsmount_lock);
- if (likely(atomic_read(&mnt->mnt_longrefs))) {
- mnt_dec_count(mnt);
- br_read_unlock(vfsmount_lock);
- return;
- }
+#ifdef CONFIG_SMP
+ br_read_lock(vfsmount_lock);
+ if (likely(atomic_read(&mnt->mnt_longterm))) {
+ mnt_dec_count(mnt);
br_read_unlock(vfsmount_lock);
- } else {
- BUG_ON(!atomic_read(&mnt->mnt_longrefs));
- if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
- return;
+ return;
}
+ br_read_unlock(vfsmount_lock);
br_write_lock(vfsmount_lock);
- if (!longrefs)
- mnt_dec_count(mnt);
- else
- atomic_dec(&mnt->mnt_longrefs);
+ mnt_dec_count(mnt);
if (mnt_get_count(mnt)) {
br_write_unlock(vfsmount_lock);
return;
}
- if (unlikely(mnt->mnt_pinned)) {
- mnt_add_count(mnt, mnt->mnt_pinned + 1);
- mnt->mnt_pinned = 0;
- br_write_unlock(vfsmount_lock);
- acct_auto_close_mnt(mnt);
- goto put_again;
- }
- br_write_unlock(vfsmount_lock);
- mntfree(mnt);
-}
#else
-static inline void __mntput(struct vfsmount *mnt, int longrefs)
-{
-put_again:
mnt_dec_count(mnt);
if (likely(mnt_get_count(mnt)))
return;
br_write_lock(vfsmount_lock);
+#endif
if (unlikely(mnt->mnt_pinned)) {
mnt_add_count(mnt, mnt->mnt_pinned + 1);
mnt->mnt_pinned = 0;
@@ -789,12 +771,6 @@ put_again:
br_write_unlock(vfsmount_lock);
mntfree(mnt);
}
-#endif
-
-static void mntput_no_expire(struct vfsmount *mnt)
-{
- __mntput(mnt, 0);
-}
void mntput(struct vfsmount *mnt)
{
@@ -802,7 +778,7 @@ void mntput(struct vfsmount *mnt)
/* avoid cacheline pingpong, hope gcc doesn't get "smart" */
if (unlikely(mnt->mnt_expiry_mark))
mnt->mnt_expiry_mark = 0;
- __mntput(mnt, 0);
+ mntput_no_expire(mnt);
}
}
EXPORT_SYMBOL(mntput);
@@ -815,33 +791,6 @@ struct vfsmount *mntget(struct vfsmount *mnt)
}
EXPORT_SYMBOL(mntget);
-void mntput_long(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
- if (mnt) {
- /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
- if (unlikely(mnt->mnt_expiry_mark))
- mnt->mnt_expiry_mark = 0;
- __mntput(mnt, 1);
- }
-#else
- mntput(mnt);
-#endif
-}
-EXPORT_SYMBOL(mntput_long);
-
-struct vfsmount *mntget_long(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
- if (mnt)
- atomic_inc(&mnt->mnt_longrefs);
- return mnt;
-#else
- return mntget(mnt);
-#endif
-}
-EXPORT_SYMBOL(mntget_long);
-
void mnt_pin(struct vfsmount *mnt)
{
br_write_lock(vfsmount_lock);
@@ -1216,7 +1165,7 @@ void release_mounts(struct list_head *head)
dput(dentry);
mntput(m);
}
- mntput_long(mnt);
+ mntput(mnt);
}
}
@@ -1240,6 +1189,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
list_del_init(&p->mnt_list);
__touch_mnt_namespace(p->mnt_ns);
p->mnt_ns = NULL;
+ atomic_dec(&p->mnt_longterm);
list_del_init(&p->mnt_child);
if (p->mnt_parent != p) {
p->mnt_parent->mnt_ghosts++;
@@ -1969,7 +1919,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
unlock:
up_write(&namespace_sem);
- mntput_long(newmnt);
+ mntput(newmnt);
return err;
}
@@ -2291,6 +2241,20 @@ static struct mnt_namespace *alloc_mnt_ns(void)
return new_ns;
}
+void mnt_make_longterm(struct vfsmount *mnt)
+{
+ atomic_inc(&mnt->mnt_longterm);
+}
+
+void mnt_make_shortterm(struct vfsmount *mnt)
+{
+ if (atomic_add_unless(&mnt->mnt_longterm, -1, 1))
+ return;
+ br_write_lock(vfsmount_lock);
+ atomic_dec(&mnt->mnt_longterm);
+ br_write_unlock(vfsmount_lock);
+}
+
/*
* Allocate a new namespace structure and populate it with contents
* copied from the namespace of the passed in task structure.
@@ -2328,14 +2292,19 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
q = new_ns->root;
while (p) {
q->mnt_ns = new_ns;
+ atomic_inc(&q->mnt_longterm);
if (fs) {
if (p == fs->root.mnt) {
+ fs->root.mnt = mntget(q);
+ atomic_inc(&q->mnt_longterm);
+ mnt_make_shortterm(p);
rootmnt = p;
- fs->root.mnt = mntget_long(q);
}
if (p == fs->pwd.mnt) {
+ fs->pwd.mnt = mntget(q);
+ atomic_inc(&q->mnt_longterm);
+ mnt_make_shortterm(p);
pwdmnt = p;
- fs->pwd.mnt = mntget_long(q);
}
}
p = next_mnt(p, mnt_ns->root);
@@ -2344,9 +2313,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
up_write(&namespace_sem);
if (rootmnt)
- mntput_long(rootmnt);
+ mntput(rootmnt);
if (pwdmnt)
- mntput_long(pwdmnt);
+ mntput(pwdmnt);
return new_ns;
}
@@ -2379,6 +2348,7 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
new_ns = alloc_mnt_ns();
if (!IS_ERR(new_ns)) {
mnt->mnt_ns = new_ns;
+ atomic_inc(&mnt->mnt_longterm);
new_ns->root = mnt;
list_add(&new_ns->list, &new_ns->root->mnt_list);
}
diff --git a/fs/pipe.c b/fs/pipe.c
index e2e95fb46a1e..89e9e19b1b2e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void)
static void __exit exit_pipe_fs(void)
{
unregister_filesystem(&pipe_fs_type);
- mntput_long(pipe_mnt);
+ mntput(pipe_mnt);
}
fs_initcall(init_pipe_fs);
diff --git a/fs/super.c b/fs/super.c
index 4f6a3571a634..74e149efed81 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1141,7 +1141,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
return mnt;
err:
- mntput_long(mnt);
+ mntput(mnt);
return ERR_PTR(err);
}