diff options
author | Nick Piggin <npiggin@suse.de> | 2010-01-29 15:44:00 -0800 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2010-04-27 17:32:56 +0200 |
commit | d31a7a0f3e087f9f67008edce0b1ff7d948fde20 (patch) | |
tree | 724538481ab3aa79f5f60ff25d3bb5dfb3352681 /fs | |
parent | 5364019193759010b5910897330f6f1ed5b58cc7 (diff) | |
download | lwn-d31a7a0f3e087f9f67008edce0b1ff7d948fde20.tar.gz lwn-d31a7a0f3e087f9f67008edce0b1ff7d948fde20.zip |
Fixups from 09102009.patch.gz
This patch is just the delta from Nick's 06102009 and his 09102009 megapatches
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/autofs4/expire.c | 8 | ||||
-rw-r--r-- | fs/dcache.c | 93 | ||||
-rw-r--r-- | fs/libfs.c | 2 | ||||
-rw-r--r-- | fs/namei.c | 334 | ||||
-rw-r--r-- | fs/namespace.c | 43 |
5 files changed, 435 insertions, 45 deletions
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 1d0a915f1859..5693f251f431 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -295,7 +295,9 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, struct autofs_info *ino = autofs4_dentry_ino(root); if (d_mountpoint(root)) { ino->flags |= AUTOFS_INF_MOUNTPOINT; - root->d_mounted--; + spin_lock(&root->d_lock); + root->d_flags &= ~DCACHE_MOUNTED; + spin_unlock(&root->d_lock); } ino->flags |= AUTOFS_INF_EXPIRING; autofs4_add_expiring(root); @@ -533,7 +535,9 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, spin_lock(&sbi->fs_lock); if (ino->flags & AUTOFS_INF_MOUNTPOINT) { - sb->s_root->d_mounted++; + spin_lock(&sb->s_root->d_lock); + sb->s_root->d_flags |= DCACHE_MOUNTED; + spin_unlock(&sb->s_root->d_lock); ino->flags &= ~AUTOFS_INF_MOUNTPOINT; } ino->flags &= ~AUTOFS_INF_EXPIRING; diff --git a/fs/dcache.c b/fs/dcache.c index eb74151b22e3..c6c034eb0398 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1188,12 +1188,12 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_count = 1; dentry->d_flags = DCACHE_UNHASHED; spin_lock_init(&dentry->d_lock); + seqcount_init(&dentry->d_seq); dentry->d_inode = NULL; dentry->d_parent = NULL; dentry->d_sb = NULL; dentry->d_op = NULL; dentry->d_fsdata = NULL; - dentry->d_mounted = 0; INIT_HLIST_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); @@ -1581,21 +1581,6 @@ err_out: * d_lookup() is protected against the concurrent renames in some unrelated * directory using the seqlockt_t rename_lock. */ - -struct dentry * d_lookup(struct dentry * parent, struct qstr * name) -{ - struct dentry * dentry = NULL; - unsigned seq; - - do { - seq = read_seqbegin(&rename_lock); - dentry = __d_lookup(parent, name); - if (dentry) - break; - } while (read_seqretry(&rename_lock, seq)); - return dentry; -} - struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) { unsigned int len = name->len; @@ -1658,6 +1643,78 @@ next: return found; } +struct dentry * d_lookup(struct dentry * parent, struct qstr * name) +{ + struct dentry *dentry = NULL; + unsigned seq; + + do { + seq = read_seqbegin(&rename_lock); + dentry = __d_lookup(parent, name); + if (dentry) + break; + } while (read_seqretry(&rename_lock, seq)); + return dentry; +} + +struct dentry * __d_lookup_rcu(struct dentry * parent, struct qstr * name) +{ + unsigned int len = name->len; + unsigned int hash = name->hash; + const unsigned char *str = name->name; + struct dcache_hash_bucket *b = d_hash(parent, hash); + struct hlist_head *head = &b->head; + struct hlist_node *node; + struct dentry *dentry; + + hlist_for_each_entry_rcu(dentry, node, head, d_hash) { + unsigned seq; + struct dentry *tparent; + const char *tname; + int tlen; + + if (unlikely(dentry->d_name.hash != hash)) + continue; + +seqretry: + seq = read_seqcount_begin(&dentry->d_seq); + tparent = dentry->d_parent; + if (unlikely(tparent != parent)) + continue; + tlen = dentry->d_name.len; + if (unlikely(tlen != len)) + continue; + tname = dentry->d_name.name; + if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) + goto seqretry; + if (unlikely(memcmp(tname, str, tlen))) + continue; + if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) + goto seqretry; + + return dentry; + } + return NULL; +} + +struct dentry *d_lookup_rcu(struct dentry *parent, struct qstr * name) +{ + struct dentry *dentry = NULL; + unsigned seq; + + if (parent->d_op && parent->d_op->d_compare) + goto out; + + do { + seq = read_seqbegin(&rename_lock); + dentry = __d_lookup_rcu(parent, name); + if (dentry) + break; + } while (read_seqretry(&rename_lock, seq)); +out: + return dentry; +} + /** * d_hash_and_lookup - hash the qstr then search for a dentry * @dir: Directory to search in @@ -1927,6 +1984,8 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) list_del(&target->d_u.d_child); /* Switch the names.. */ + write_seqcount_begin(&dentry->d_seq); + write_seqcount_begin(&target->d_seq); switch_names(dentry, target); swap(dentry->d_name.hash, target->d_name.hash); @@ -1941,6 +2000,8 @@ static void d_move_locked(struct dentry * dentry, struct dentry * target) /* And add them back to the (new) parent lists */ list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); } + write_seqcount_end(&target->d_seq); + write_seqcount_end(&dentry->d_seq); list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); if (target->d_parent != dentry->d_parent) diff --git a/fs/libfs.c b/fs/libfs.c index e8a7923a9fbb..48fe7c883d27 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -265,7 +265,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, d_instantiate(dentry, root); s->s_root = dentry; s->s_flags |= MS_ACTIVE; - mnt->mnt_mounted++; + mnt->mnt_flags |= MNT_MOUNTED; simple_set_mnt(mnt, s); return 0; diff --git a/fs/namei.c b/fs/namei.c index 157d4e4fc9f9..924a0e2ed123 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -198,6 +198,29 @@ static int acl_permission_check(struct inode *inode, int mask, return -EACCES; } +static int acl_permission_check_rcu(struct inode *inode, int mask, + int (*check_acl)(struct inode *inode, int mask)) +{ + umode_t mode = inode->i_mode; + + mask &= MAY_READ | MAY_WRITE | MAY_EXEC; + + if (current_fsuid() == inode->i_uid) + mode >>= 6; + else { + if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) + return -EAGAIN; + if (in_group_p(inode->i_gid)) + mode >>= 3; + } + + /* + * If the DACs are ok we don't need any capability check. + */ + if ((mask & ~mode) == 0) + return 0; + return -EACCES; +} /** * generic_permission - check for access rights on a Posix-like filesystem * @inode: inode to check access rights for @@ -483,6 +506,26 @@ ok: return security_inode_permission(inode, MAY_EXEC); } +static int exec_permission_lite_rcu(struct inode *inode) +{ + int ret; + + if (inode->i_op->permission) + return -EAGAIN; + ret = acl_permission_check_rcu(inode, MAY_EXEC, inode->i_op->check_acl); + if (ret == -EAGAIN) + return ret; + if (!ret) + goto ok; + + if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) + goto ok; + + return ret; +ok: + return security_inode_permission(inode, MAY_EXEC); +} + static __always_inline void set_root(struct nameidata *nd) { if (!nd->root.mnt) { @@ -495,6 +538,15 @@ static __always_inline void set_root(struct nameidata *nd) } static int link_path_walk(const char *, struct nameidata *); +static __always_inline void set_root_rcu(struct nameidata *nd) +{ + if (!nd->root.mnt) { + struct fs_struct *fs = current->fs; + read_lock(&fs->lock); + nd->root = fs->root; + read_unlock(&fs->lock); + } +} static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) { @@ -538,6 +590,12 @@ static void path_put_conditional(struct path *path, struct nameidata *nd) mntput(path->mnt); } +static inline void path_to_nameidata_rcu(struct path *path, struct nameidata *nd) +{ + nd->path.mnt = path->mnt; + nd->path.dentry = path->dentry; +} + static inline void path_to_nameidata(struct path *path, struct nameidata *nd) { dput(nd->path.dentry); @@ -637,6 +695,21 @@ int follow_up(struct path *path) /* no need for dcache_lock, as serialization is taken care in * namespace.c */ +static int __follow_mount_rcu(struct path *path) +{ + int res = 0; + while (d_mountpoint(path->dentry)) { + struct vfsmount *mounted; + mounted = __lookup_mnt(path->mnt, path->dentry, 1); + if (!mounted) + break; + path->mnt = mounted; + path->dentry = mounted->mnt_root; + res = 1; + } + return res; +} + static int __follow_mount(struct path *path) { int res = 0; @@ -723,6 +796,24 @@ static __always_inline void follow_dotdot(struct nameidata *nd) * small and for now I'd prefer to have fast path as straight as possible. * It _is_ time-critical. */ +static int do_lookup_rcu(struct nameidata *nd, struct qstr *name, + struct path *path) +{ + struct vfsmount *mnt = nd->path.mnt; + struct dentry *dentry; + + dentry = __d_lookup_rcu(nd->path.dentry, name); + + if (!dentry) + return -EAGAIN; + if (dentry->d_op && dentry->d_op->d_revalidate) + return -EAGAIN; + path->mnt = mnt; + path->dentry = dentry; + __follow_mount_rcu(path); + return 0; +} + static int do_lookup(struct nameidata *nd, struct qstr *name, struct path *path) { @@ -820,6 +911,134 @@ fail: return PTR_ERR(dentry); } +static noinline int link_path_walk_rcu(const char *name, struct nameidata *nd, struct path *next) +{ + struct inode *inode; + unsigned int lookup_flags = nd->flags; + + while (*name=='/') + name++; + if (!*name) + goto return_reval; + + inode = nd->path.dentry->d_inode; + if (nd->depth) + lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); + + /* At this point we know we have a real path component. */ + for(;;) { + unsigned long hash; + struct qstr this; + unsigned int c; + + nd->flags |= LOOKUP_CONTINUE; + if (exec_permission_lite_rcu(inode)) + return -EAGAIN; + + this.name = name; + c = *(const unsigned char *)name; + + hash = init_name_hash(); + do { + name++; + hash = partial_name_hash(c, hash); + c = *(const unsigned char *)name; + } while (c && (c != '/')); + this.len = name - (const char *) this.name; + this.hash = end_name_hash(hash); + + /* remove trailing slashes? */ + if (!c) + goto last_component; + while (*++name == '/'); + if (!*name) + goto last_with_slashes; + + if (this.name[0] == '.') switch (this.len) { + default: + break; + case 2: + if (this.name[1] != '.') + break; + return -EAGAIN; + case 1: + continue; + } + if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) + return -EAGAIN; + /* This does the actual lookups.. */ + if (do_lookup_rcu(nd, &this, next)) + return -EAGAIN; + + inode = next->dentry->d_inode; + if (!inode) + return -ENOENT; + if (inode->i_op->follow_link) + return -EAGAIN; + path_to_nameidata_rcu(next, nd); + if (!inode->i_op->lookup) + return -ENOTDIR; + continue; + /* here ends the main loop */ + +last_with_slashes: + lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; +last_component: + /* Clear LOOKUP_CONTINUE iff it was previously unset */ + nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; + if (lookup_flags & LOOKUP_PARENT) + return -EAGAIN; + if (this.name[0] == '.') switch (this.len) { + default: + break; + case 2: + if (this.name[1] != '.') + break; + return -EAGAIN; + case 1: + goto return_reval; + } + if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) + return -EAGAIN; + if (do_lookup_rcu(nd, &this, next)) + return -EAGAIN; + inode = next->dentry->d_inode; + if ((lookup_flags & LOOKUP_FOLLOW) + && inode && inode->i_op->follow_link) + return -EAGAIN; + + path_to_nameidata_rcu(next, nd); + if (!inode) + return -ENOENT; + if (lookup_flags & LOOKUP_DIRECTORY) { + if (!inode->i_op->lookup) + return -ENOTDIR; + } + goto return_base; + } +return_reval: + /* + * We bypassed the ordinary revalidation routines. + * We may need to check the cached dentry for staleness. + */ + if (nd->path.dentry && nd->path.dentry->d_sb && + (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) + return -EAGAIN; +return_base: + spin_lock(&nd->path.dentry->d_lock); + if (d_unhashed(nd->path.dentry)) { + spin_unlock(&nd->path.dentry->d_lock); + return -EAGAIN; + } + if (!nd->path.dentry->d_inode) { + spin_unlock(&nd->path.dentry->d_lock); + return -EAGAIN; + } + nd->path.dentry->d_count++; + spin_unlock(&nd->path.dentry->d_lock); + return 0; +} + /* * This is a temporary kludge to deal with "automount" symlinks; proper * solution is to trigger them on follow_mount(), so that do_lookup() @@ -893,7 +1112,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (this.name[0] == '.') switch (this.len) { default: break; - case 2: + case 2: if (this.name[1] != '.') break; follow_dotdot(nd); @@ -938,7 +1157,7 @@ last_component: if (this.name[0] == '.') switch (this.len) { default: break; - case 2: + case 2: if (this.name[1] != '.') break; follow_dotdot(nd); @@ -1002,6 +1221,19 @@ return_err: return err; } +static int path_walk_rcu(const char *name, struct nameidata *nd) +{ + struct path save = nd->path; + struct path path = {.mnt = NULL}; + int err; + + current->total_link_count = 0; + err = link_path_walk_rcu(name, nd, &path); + if (unlikely(err == -EAGAIN)) + nd->path = save; + return err; +} + static int path_walk(const char *name, struct nameidata *nd) { struct path save = nd->path; @@ -1027,6 +1259,55 @@ static int path_walk(const char *name, struct nameidata *nd) return result; } +static noinline int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd) +{ + int retval = 0; + int fput_needed; + struct file *file; + + nd->last_type = LAST_ROOT; /* if there are only slashes... */ + nd->flags = flags; + nd->depth = 0; + nd->root.mnt = NULL; + + if (*name=='/') { + set_root_rcu(nd); + nd->path = nd->root; + } else if (dfd == AT_FDCWD) { + struct fs_struct *fs = current->fs; + read_lock(&fs->lock); + nd->path = fs->pwd; + read_unlock(&fs->lock); + } else { + struct dentry *dentry; + + file = fget_light(dfd, &fput_needed); + retval = -EBADF; + if (!file) + goto out_fail; + + dentry = file->f_path.dentry; + + retval = -ENOTDIR; + if (!S_ISDIR(dentry->d_inode->i_mode)) + goto fput_fail; + + retval = file_permission(file, MAY_EXEC); + if (retval) + goto fput_fail; + + nd->path = file->f_path; + + fput_light(file, fput_needed); + } + return 0; + +fput_fail: + fput_light(file, fput_needed); +out_fail: + return retval; +} + static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { int retval = 0; @@ -1083,16 +1364,49 @@ out_fail: static int do_path_lookup(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { - int retval = path_init(dfd, name, flags, nd); - if (!retval) - retval = path_walk(name, nd); - if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && - nd->path.dentry->d_inode)) - audit_inode(name, nd->path.dentry); - if (nd->root.mnt) { - path_put(&nd->root); + int retval; + + vfsmount_read_lock(); + rcu_read_lock(); + retval = path_init_rcu(dfd, name, flags, nd); + if (unlikely(retval)) { + rcu_read_unlock(); + vfsmount_read_unlock(); + return retval; + } + retval = path_walk_rcu(name, nd); + rcu_read_unlock(); + if (likely(!retval)) + mntget(nd->path.mnt); + vfsmount_read_unlock(); + if (likely(!retval)) { + if (unlikely(!audit_dummy_context())) { + if (nd->path.dentry && nd->path.dentry->d_inode) + audit_inode(name, nd->path.dentry); + } + } + if (nd->root.mnt) nd->root.mnt = NULL; + + if (unlikely(retval == -EAGAIN)) { + /* slower, locked walk */ + retval = path_init(dfd, name, flags, nd); + if (unlikely(retval)) + return retval; + retval = path_walk(name, nd); + if (likely(!retval)) { + if (unlikely(!audit_dummy_context())) { + if (nd->path.dentry && nd->path.dentry->d_inode) + audit_inode(name, nd->path.dentry); + } + } + + if (nd->root.mnt) { + path_put(&nd->root); + nd->root.mnt = NULL; + } } + return retval; } diff --git a/fs/namespace.c b/fs/namespace.c index 600f1f20eeed..576ceda85f39 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -581,25 +581,36 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) } } +static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry) +{ + if (!__lookup_mnt(mnt, dentry, 0)) { + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_MOUNTED; + spin_unlock(&dentry->d_lock); + } +} + static void detach_mnt(struct vfsmount *mnt, struct path *old_path) { old_path->dentry = mnt->mnt_mountpoint; old_path->mnt = mnt->mnt_parent; mnt->mnt_parent = mnt; mnt->mnt_mountpoint = mnt->mnt_root; - list_del_init(&mnt->mnt_hash); list_del_init(&mnt->mnt_child); - old_path->dentry->d_mounted--; - WARN_ON(mnt->mnt_mounted != 1); - mnt->mnt_mounted--; + list_del_init(&mnt->mnt_hash); + dentry_reset_mounted(old_path->mnt, old_path->dentry); + WARN_ON(!(mnt->mnt_flags & MNT_MOUNTED)); + mnt->mnt_flags &= ~MNT_MOUNTED; } void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *child_mnt) { child_mnt->mnt_parent = mntget(mnt); - child_mnt->mnt_mountpoint = dget(dentry); - dentry->d_mounted++; + spin_lock(&dentry->d_lock); + child_mnt->mnt_mountpoint = dget_dlock(dentry); + dentry->d_flags |= DCACHE_MOUNTED; + spin_unlock(&dentry->d_lock); } static void attach_mnt(struct vfsmount *mnt, struct path *path) @@ -608,8 +619,8 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path) list_add_tail(&mnt->mnt_hash, mount_hashtable + hash(path->mnt, path->dentry)); list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts); - WARN_ON(mnt->mnt_mounted != 0); - mnt->mnt_mounted++; + WARN_ON(mnt->mnt_flags & MNT_MOUNTED); + mnt->mnt_flags |= MNT_MOUNTED; } /* @@ -632,8 +643,8 @@ static void commit_tree(struct vfsmount *mnt) list_add_tail(&mnt->mnt_hash, mount_hashtable + hash(parent, mnt->mnt_mountpoint)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); - WARN_ON(mnt->mnt_mounted != 0); - mnt->mnt_mounted++; + WARN_ON(mnt->mnt_flags & MNT_MOUNTED); + mnt->mnt_flags |= MNT_MOUNTED; touch_mnt_namespace(n); } @@ -737,9 +748,9 @@ static inline void __mntput(struct vfsmount *mnt) void mntput_no_expire(struct vfsmount *mnt) { - if (likely(mnt->mnt_mounted)) { + if (likely(mnt->mnt_flags & MNT_MOUNTED)) { vfsmount_read_lock(); - if (unlikely(!mnt->mnt_mounted)) { + if (unlikely(!mnt->mnt_flags & MNT_MOUNTED)) { vfsmount_read_unlock(); goto repeat; } @@ -751,7 +762,7 @@ void mntput_no_expire(struct vfsmount *mnt) repeat: vfsmount_write_lock(); - BUG_ON(mnt->mnt_mounted); + BUG_ON(mnt->mnt_flags & MNT_MOUNTED); dec_mnt_count(mnt); if (count_mnt_count(mnt)) { vfsmount_write_unlock(); @@ -1174,11 +1185,11 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; list_del_init(&p->mnt_child); - WARN_ON(p->mnt_mounted != 1); - p->mnt_mounted--; + WARN_ON(!(p->mnt_flags & MNT_MOUNTED)); + p->mnt_flags &= ~MNT_MOUNTED; if (p->mnt_parent != p) { p->mnt_parent->mnt_ghosts++; - p->mnt_mountpoint->d_mounted--; + dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint); } change_mnt_propagation(p, MS_PRIVATE); } |