diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-10 12:44:24 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-10 12:44:24 -0700 |
commit | b05430fc9341fea7a6228a3611c850a476809596 (patch) | |
tree | 91bd662d269a3478db78d6a04a34901f0cfe521b | |
parent | d0d272771035a36a7839bb70ab6ebae3f4f4960b (diff) | |
parent | 48f5ec21d9c67e881ff35343988e290ef5cf933f (diff) | |
download | lwn-b05430fc9341fea7a6228a3611c850a476809596.tar.gz lwn-b05430fc9341fea7a6228a3611c850a476809596.zip |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs pile 3 (of many) from Al Viro:
"Waiman's conversion of d_path() and bits related to it,
kern_path_mountpoint(), several cleanups and fixes (exportfs
one is -stable fodder, IMO).
There definitely will be more... ;-/"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
split read_seqretry_or_unlock(), convert d_walk() to resulting primitives
dcache: Translating dentry into pathname without taking rename_lock
autofs4 - fix device ioctl mount lookup
introduce kern_path_mountpoint()
rename user_path_umountat() to user_path_mountpoint_at()
take unlazy_walk() into umount_lookup_last()
Kill indirect include of file.h from eventfd.h, use fdget() in cgroup.c
prune_super(): sb->s_op is never NULL
exportfs: don't assume that ->iterate() won't feed us too long entries
afs: get rid of redundant ->d_name.len checks
-rw-r--r-- | fs/afs/dir.c | 24 | ||||
-rw-r--r-- | fs/autofs4/dev-ioctl.c | 23 | ||||
-rw-r--r-- | fs/dcache.c | 220 | ||||
-rw-r--r-- | fs/exportfs/expfs.c | 2 | ||||
-rw-r--r-- | fs/internal.h | 3 | ||||
-rw-r--r-- | fs/namei.c | 112 | ||||
-rw-r--r-- | fs/namespace.c | 2 | ||||
-rw-r--r-- | fs/super.c | 2 | ||||
-rw-r--r-- | include/linux/eventfd.h | 3 | ||||
-rw-r--r-- | include/linux/namei.h | 4 | ||||
-rw-r--r-- | kernel/cgroup.c | 33 |
11 files changed, 241 insertions, 187 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 0b74d3176ab7..646337dc5201 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -751,10 +751,6 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) _enter("{%x:%u},{%s},%ho", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); - ret = -ENAMETOOLONG; - if (dentry->d_name.len >= AFSNAMEMAX) - goto error; - key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); @@ -816,10 +812,6 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) _enter("{%x:%u},{%s}", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); - ret = -ENAMETOOLONG; - if (dentry->d_name.len >= AFSNAMEMAX) - goto error; - key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); @@ -936,10 +928,6 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, _enter("{%x:%u},{%s},%ho,", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); - ret = -ENAMETOOLONG; - if (dentry->d_name.len >= AFSNAMEMAX) - goto error; - key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); @@ -1005,10 +993,6 @@ static int afs_link(struct dentry *from, struct inode *dir, dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); - ret = -ENAMETOOLONG; - if (dentry->d_name.len >= AFSNAMEMAX) - goto error; - key = afs_request_key(dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); @@ -1053,10 +1037,6 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, content); - ret = -ENAMETOOLONG; - if (dentry->d_name.len >= AFSNAMEMAX) - goto error; - ret = -EINVAL; if (strlen(content) >= AFSPATHMAX) goto error; @@ -1127,10 +1107,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dvnode->fid.vid, new_dvnode->fid.vnode, new_dentry->d_name.name); - ret = -ENAMETOOLONG; - if (new_dentry->d_name.len >= AFSNAMEMAX) - goto error; - key = afs_request_key(orig_dvnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 743c7c2c949d..0f00da329e71 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -183,13 +183,14 @@ static int autofs_dev_ioctl_protosubver(struct file *fp, return 0; } +/* Find the topmost mount satisfying test() */ static int find_autofs_mount(const char *pathname, struct path *res, int test(struct path *path, void *data), void *data) { struct path path; - int err = kern_path(pathname, 0, &path); + int err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0); if (err) return err; err = -ENOENT; @@ -197,10 +198,9 @@ static int find_autofs_mount(const char *pathname, if (path.dentry->d_sb->s_magic == AUTOFS_SUPER_MAGIC) { if (test(&path, data)) { path_get(&path); - if (!err) /* already found some */ - path_put(res); *res = path; err = 0; + break; } } if (!follow_up(&path)) @@ -486,12 +486,11 @@ static int autofs_dev_ioctl_askumount(struct file *fp, * mount if there is one or 0 if it isn't a mountpoint. * * If we aren't supplied with a file descriptor then we - * lookup the nameidata of the path and check if it is the - * root of a mount. If a type is given we are looking for - * a particular autofs mount and if we don't find a match - * we return fail. If the located nameidata path is the - * root of a mount we return 1 along with the super magic - * of the mount or 0 otherwise. + * lookup the path and check if it is the root of a mount. + * If a type is given we are looking for a particular autofs + * mount and if we don't find a match we return fail. If the + * located path is the root of a mount we return 1 along with + * the super magic of the mount or 0 otherwise. * * In both cases the the device number (as returned by * new_encode_dev()) is also returned. @@ -519,9 +518,11 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, if (!fp || param->ioctlfd == -1) { if (autofs_type_any(type)) - err = kern_path(name, LOOKUP_FOLLOW, &path); + err = kern_path_mountpoint(AT_FDCWD, + name, &path, LOOKUP_FOLLOW); else - err = find_autofs_mount(name, &path, test_by_type, &type); + err = find_autofs_mount(name, &path, + test_by_type, &type); if (err) goto out; devid = new_encode_dev(path.dentry->d_sb->s_dev); diff --git a/fs/dcache.c b/fs/dcache.c index ca8e9cd60f87..4d9df3c940e6 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -88,6 +88,35 @@ EXPORT_SYMBOL(rename_lock); static struct kmem_cache *dentry_cache __read_mostly; +/** + * read_seqbegin_or_lock - begin a sequence number check or locking block + * lock: sequence lock + * seq : sequence number to be checked + * + * First try it once optimistically without taking the lock. If that fails, + * take the lock. The sequence number is also used as a marker for deciding + * whether to be a reader (even) or writer (odd). + * N.B. seq must be initialized to an even number to begin with. + */ +static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) +{ + if (!(*seq & 1)) /* Even */ + *seq = read_seqbegin(lock); + else /* Odd */ + write_seqlock(lock); +} + +static inline int need_seqretry(seqlock_t *lock, int seq) +{ + return !(seq & 1) && read_seqretry(lock, seq); +} + +static inline void done_seqretry(seqlock_t *lock, int seq) +{ + if (seq & 1) + write_sequnlock(lock); +} + /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try @@ -1012,7 +1041,7 @@ void shrink_dcache_for_umount(struct super_block *sb) * the parenthood after dropping the lock and check * that the sequence number still matches. */ -static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq) +static struct dentry *try_to_ascend(struct dentry *old, unsigned seq) { struct dentry *new = old->d_parent; @@ -1026,7 +1055,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq */ if (new != old->d_parent || (old->d_flags & DCACHE_DENTRY_KILLED) || - (!locked && read_seqretry(&rename_lock, seq))) { + need_seqretry(&rename_lock, seq)) { spin_unlock(&new->d_lock); new = NULL; } @@ -1063,13 +1092,12 @@ static void d_walk(struct dentry *parent, void *data, { struct dentry *this_parent; struct list_head *next; - unsigned seq; - int locked = 0; + unsigned seq = 0; enum d_walk_ret ret; bool retry = true; - seq = read_seqbegin(&rename_lock); again: + read_seqbegin_or_lock(&rename_lock, &seq); this_parent = parent; spin_lock(&this_parent->d_lock); @@ -1123,13 +1151,13 @@ resume: */ if (this_parent != parent) { struct dentry *child = this_parent; - this_parent = try_to_ascend(this_parent, locked, seq); + this_parent = try_to_ascend(this_parent, seq); if (!this_parent) goto rename_retry; next = child->d_u.d_child.next; goto resume; } - if (!locked && read_seqretry(&rename_lock, seq)) { + if (need_seqretry(&rename_lock, seq)) { spin_unlock(&this_parent->d_lock); goto rename_retry; } @@ -1138,17 +1166,13 @@ resume: out_unlock: spin_unlock(&this_parent->d_lock); - if (locked) - write_sequnlock(&rename_lock); + done_seqretry(&rename_lock, seq); return; rename_retry: if (!retry) return; - if (locked) - goto again; - locked = 1; - write_seqlock(&rename_lock); + seq = 1; goto again; } @@ -2647,9 +2671,39 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen) return 0; } +/** + * prepend_name - prepend a pathname in front of current buffer pointer + * buffer: buffer pointer + * buflen: allocated length of the buffer + * name: name string and length qstr structure + * + * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to + * make sure that either the old or the new name pointer and length are + * fetched. However, there may be mismatch between length and pointer. + * The length cannot be trusted, we need to copy it byte-by-byte until + * the length is reached or a null byte is found. It also prepends "/" at + * the beginning of the name. The sequence number check at the caller will + * retry it again when a d_move() does happen. So any garbage in the buffer + * due to mismatched pointer and length will be discarded. + */ static int prepend_name(char **buffer, int *buflen, struct qstr *name) { - return prepend(buffer, buflen, name->name, name->len); + const char *dname = ACCESS_ONCE(name->name); + u32 dlen = ACCESS_ONCE(name->len); + char *p; + + if (*buflen < dlen + 1) + return -ENAMETOOLONG; + *buflen -= dlen + 1; + p = *buffer -= dlen + 1; + *p++ = '/'; + while (dlen--) { + char c = *dname++; + if (!c) + break; + *p++ = c; + } + return 0; } /** @@ -2659,7 +2713,14 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) * @buffer: pointer to the end of the buffer * @buflen: pointer to buffer length * - * Caller holds the rename_lock. + * The function tries to write out the pathname without taking any lock other + * than the RCU read lock to make sure that dentries won't go away. It only + * checks the sequence number of the global rename_lock as any change in the + * dentry's d_seq will be preceded by changes in the rename_lock sequence + * number. If the sequence number had been change, it will restart the whole + * pathname back-tracing sequence again. It performs a total of 3 trials of + * lockless back-tracing sequences before falling back to take the + * rename_lock. */ static int prepend_path(const struct path *path, const struct path *root, @@ -2668,54 +2729,66 @@ static int prepend_path(const struct path *path, struct dentry *dentry = path->dentry; struct vfsmount *vfsmnt = path->mnt; struct mount *mnt = real_mount(vfsmnt); - bool slash = false; int error = 0; + unsigned seq = 0; + char *bptr; + int blen; + rcu_read_lock(); +restart: + bptr = *buffer; + blen = *buflen; + read_seqbegin_or_lock(&rename_lock, &seq); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { /* Global root? */ - if (!mnt_has_parent(mnt)) - goto global_root; - dentry = mnt->mnt_mountpoint; - mnt = mnt->mnt_parent; - vfsmnt = &mnt->mnt; - continue; + if (mnt_has_parent(mnt)) { + dentry = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + vfsmnt = &mnt->mnt; + continue; + } + /* + * Filesystems needing to implement special "root names" + * should do so with ->d_dname() + */ + if (IS_ROOT(dentry) && + (dentry->d_name.len != 1 || + dentry->d_name.name[0] != '/')) { + WARN(1, "Root dentry has weird name <%.*s>\n", + (int) dentry->d_name.len, + dentry->d_name.name); + } + if (!error) + error = is_mounted(vfsmnt) ? 1 : 2; + break; } parent = dentry->d_parent; prefetch(parent); - spin_lock(&dentry->d_lock); - error = prepend_name(buffer, buflen, &dentry->d_name); - spin_unlock(&dentry->d_lock); - if (!error) - error = prepend(buffer, buflen, "/", 1); + error = prepend_name(&bptr, &blen, &dentry->d_name); if (error) break; - slash = true; dentry = parent; } + if (!(seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + done_seqretry(&rename_lock, seq); - if (!error && !slash) - error = prepend(buffer, buflen, "/", 1); - - return error; - -global_root: - /* - * Filesystems needing to implement special "root names" - * should do so with ->d_dname() - */ - if (IS_ROOT(dentry) && - (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) { - WARN(1, "Root dentry has weird name <%.*s>\n", - (int) dentry->d_name.len, dentry->d_name.name); - } - if (!slash) - error = prepend(buffer, buflen, "/", 1); - if (!error) - error = is_mounted(vfsmnt) ? 1 : 2; + if (error >= 0 && bptr == *buffer) { + if (--blen < 0) + error = -ENAMETOOLONG; + else + *--bptr = '/'; + } + *buffer = bptr; + *buflen = blen; return error; } @@ -2744,9 +2817,7 @@ char *__d_path(const struct path *path, prepend(&res, &buflen, "\0", 1); br_read_lock(&vfsmount_lock); - write_seqlock(&rename_lock); error = prepend_path(path, root, &res, &buflen); - write_sequnlock(&rename_lock); br_read_unlock(&vfsmount_lock); if (error < 0) @@ -2765,9 +2836,7 @@ char *d_absolute_path(const struct path *path, prepend(&res, &buflen, "\0", 1); br_read_lock(&vfsmount_lock); - write_seqlock(&rename_lock); error = prepend_path(path, &root, &res, &buflen); - write_sequnlock(&rename_lock); br_read_unlock(&vfsmount_lock); if (error > 1) @@ -2833,9 +2902,7 @@ char *d_path(const struct path *path, char *buf, int buflen) get_fs_root(current->fs, &root); br_read_lock(&vfsmount_lock); - write_seqlock(&rename_lock); error = path_with_deleted(path, &root, &res, &buflen); - write_sequnlock(&rename_lock); br_read_unlock(&vfsmount_lock); if (error < 0) res = ERR_PTR(error); @@ -2870,10 +2937,10 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen) char *end = buffer + buflen; /* these dentries are never renamed, so d_lock is not needed */ if (prepend(&end, &buflen, " (deleted)", 11) || - prepend_name(&end, &buflen, &dentry->d_name) || + prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) || prepend(&end, &buflen, "/", 1)) end = ERR_PTR(-ENAMETOOLONG); - return end; + return end; } /* @@ -2881,30 +2948,42 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen) */ static char *__dentry_path(struct dentry *dentry, char *buf, int buflen) { - char *end = buf + buflen; - char *retval; + char *end, *retval; + int len, seq = 0; + int error = 0; - prepend(&end, &buflen, "\0", 1); + rcu_read_lock(); +restart: + end = buf + buflen; + len = buflen; + prepend(&end, &len, "\0", 1); if (buflen < 1) goto Elong; /* Get '/' right */ retval = end-1; *retval = '/'; - + read_seqbegin_or_lock(&rename_lock, &seq); while (!IS_ROOT(dentry)) { struct dentry *parent = dentry->d_parent; int error; prefetch(parent); - spin_lock(&dentry->d_lock); - error = prepend_name(&end, &buflen, &dentry->d_name); - spin_unlock(&dentry->d_lock); - if (error != 0 || prepend(&end, &buflen, "/", 1) != 0) - goto Elong; + error = prepend_name(&end, &len, &dentry->d_name); + if (error) + break; retval = end; dentry = parent; } + if (!(seq & 1)) + rcu_read_unlock(); + if (need_seqretry(&rename_lock, seq)) { + seq = 1; + goto restart; + } + done_seqretry(&rename_lock, seq); + if (error) + goto Elong; return retval; Elong: return ERR_PTR(-ENAMETOOLONG); @@ -2912,13 +2991,7 @@ Elong: char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen) { - char *retval; - - write_seqlock(&rename_lock); - retval = __dentry_path(dentry, buf, buflen); - write_sequnlock(&rename_lock); - - return retval; + return __dentry_path(dentry, buf, buflen); } EXPORT_SYMBOL(dentry_path_raw); @@ -2927,7 +3000,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) char *p = NULL; char *retval; - write_seqlock(&rename_lock); if (d_unlinked(dentry)) { p = buf + buflen; if (prepend(&p, &buflen, "//deleted", 10) != 0) @@ -2935,7 +3007,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) buflen++; } retval = __dentry_path(dentry, buf, buflen); - write_sequnlock(&rename_lock); if (!IS_ERR(retval) && p) *p = '/'; /* restore '/' overriden with '\0' */ return retval; @@ -2974,7 +3045,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) error = -ENOENT; br_read_lock(&vfsmount_lock); - write_seqlock(&rename_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; char *cwd = page + PAGE_SIZE; @@ -2982,7 +3052,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &root, &cwd, &buflen); - write_sequnlock(&rename_lock); br_read_unlock(&vfsmount_lock); if (error < 0) @@ -3003,7 +3072,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) error = -EFAULT; } } else { - write_sequnlock(&rename_lock); br_read_unlock(&vfsmount_lock); } diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 293bc2e47a73..a235f0016889 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -231,7 +231,7 @@ static int filldir_one(void * __buf, const char * name, int len, int result = 0; buf->sequence++; - if (buf->ino == ino) { + if (buf->ino == ino && len <= NAME_MAX) { memcpy(buf->name, name, len); buf->name[len] = '\0'; buf->found = 1; diff --git a/fs/internal.h b/fs/internal.h index d20893795526..2be46ea5dd0b 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -45,6 +45,9 @@ extern void __init chrdev_init(void); * namei.c */ extern int __inode_permission(struct inode *, int); +extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); +extern int vfs_path_lookup(struct dentry *, struct vfsmount *, + const char *, unsigned int, struct path *); /* * namespace.c diff --git a/fs/namei.c b/fs/namei.c index 5e6aaadc1dcd..409a441ba2ae 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2209,7 +2209,7 @@ user_path_parent(int dfd, const char __user *path, struct nameidata *nd, } /** - * umount_lookup_last - look up last component for umount + * mountpoint_last - look up last component for umount * @nd: pathwalk nameidata - currently pointing at parent directory of "last" * @path: pointer to container for result * @@ -2236,25 +2236,28 @@ user_path_parent(int dfd, const char __user *path, struct nameidata *nd, * to the link, and nd->path will *not* be put. */ static int -umount_lookup_last(struct nameidata *nd, struct path *path) +mountpoint_last(struct nameidata *nd, struct path *path) { int error = 0; struct dentry *dentry; struct dentry *dir = nd->path.dentry; - if (unlikely(nd->flags & LOOKUP_RCU)) { - WARN_ON_ONCE(1); - error = -ECHILD; - goto error_check; + /* If we're in rcuwalk, drop out of it to handle last component */ + if (nd->flags & LOOKUP_RCU) { + if (unlazy_walk(nd, NULL)) { + error = -ECHILD; + goto out; + } } nd->flags &= ~LOOKUP_PARENT; if (unlikely(nd->last_type != LAST_NORM)) { error = handle_dots(nd, nd->last_type); - if (!error) - dentry = dget(nd->path.dentry); - goto error_check; + if (error) + goto out; + dentry = dget(nd->path.dentry); + goto done; } mutex_lock(&dir->d_inode->i_mutex); @@ -2268,44 +2271,43 @@ umount_lookup_last(struct nameidata *nd, struct path *path) dentry = d_alloc(dir, &nd->last); if (!dentry) { error = -ENOMEM; - } else { - dentry = lookup_real(dir->d_inode, dentry, nd->flags); - if (IS_ERR(dentry)) - error = PTR_ERR(dentry); + goto out; } + dentry = lookup_real(dir->d_inode, dentry, nd->flags); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto out; } mutex_unlock(&dir->d_inode->i_mutex); -error_check: - if (!error) { - if (!dentry->d_inode) { - error = -ENOENT; - dput(dentry); - } else { - path->dentry = dentry; - path->mnt = mntget(nd->path.mnt); - if (should_follow_link(dentry->d_inode, - nd->flags & LOOKUP_FOLLOW)) - return 1; - follow_mount(path); - } +done: + if (!dentry->d_inode) { + error = -ENOENT; + dput(dentry); + goto out; } + path->dentry = dentry; + path->mnt = mntget(nd->path.mnt); + if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW)) + return 1; + follow_mount(path); + error = 0; +out: terminate_walk(nd); return error; } /** - * path_umountat - look up a path to be umounted + * path_mountpoint - look up a path to be umounted * @dfd: directory file descriptor to start walk from * @name: full pathname to walk * @flags: lookup flags - * @nd: pathwalk nameidata * * Look up the given name, but don't attempt to revalidate the last component. * Returns 0 and "path" will be valid on success; Retuns error otherwise. */ static int -path_umountat(int dfd, const char *name, struct path *path, unsigned int flags) +path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) { struct file *base = NULL; struct nameidata nd; @@ -2320,16 +2322,7 @@ path_umountat(int dfd, const char *name, struct path *path, unsigned int flags) if (err) goto out; - /* If we're in rcuwalk, drop out of it to handle last component */ - if (nd.flags & LOOKUP_RCU) { - err = unlazy_walk(&nd, NULL); - if (err) { - terminate_walk(&nd); - goto out; - } - } - - err = umount_lookup_last(&nd, path); + err = mountpoint_last(&nd, path); while (err > 0) { void *cookie; struct path link = *path; @@ -2340,7 +2333,7 @@ path_umountat(int dfd, const char *name, struct path *path, unsigned int flags) err = follow_link(&link, &nd, &cookie); if (err) break; - err = umount_lookup_last(&nd, path); + err = mountpoint_last(&nd, path); put_link(&nd, &link, cookie); } out: @@ -2353,8 +2346,22 @@ out: return err; } +static int +filename_mountpoint(int dfd, struct filename *s, struct path *path, + unsigned int flags) +{ + int error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); + if (unlikely(error == -ECHILD)) + error = path_mountpoint(dfd, s->name, path, flags); + if (unlikely(error == -ESTALE)) + error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL); + if (likely(!error)) + audit_inode(s, path->dentry, 0); + return error; +} + /** - * user_path_umountat - lookup a path from userland in order to umount it + * user_path_mountpoint_at - lookup a path from userland in order to umount it * @dfd: directory file descriptor * @name: pathname from userland * @flags: lookup flags @@ -2368,28 +2375,27 @@ out: * Returns 0 and populates "path" on success. */ int -user_path_umountat(int dfd, const char __user *name, unsigned int flags, +user_path_mountpoint_at(int dfd, const char __user *name, unsigned int flags, struct path *path) { struct filename *s = getname(name); int error; - if (IS_ERR(s)) return PTR_ERR(s); - - error = path_umountat(dfd, s->name, path, flags | LOOKUP_RCU); - if (unlikely(error == -ECHILD)) - error = path_umountat(dfd, s->name, path, flags); - if (unlikely(error == -ESTALE)) - error = path_umountat(dfd, s->name, path, flags | LOOKUP_REVAL); - - if (likely(!error)) - audit_inode(s, path->dentry, 0); - + error = filename_mountpoint(dfd, s, path, flags); putname(s); return error; } +int +kern_path_mountpoint(int dfd, const char *name, struct path *path, + unsigned int flags) +{ + struct filename s = {.name = name}; + return filename_mountpoint(dfd, &s, path, flags); +} +EXPORT_SYMBOL(kern_path_mountpoint); + /* * It's inline, so penalty for filesystems that don't use sticky bit is * minimal. diff --git a/fs/namespace.c b/fs/namespace.c index fc2b5226278d..25845d1b300b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1321,7 +1321,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) if (!(flags & UMOUNT_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; - retval = user_path_umountat(AT_FDCWD, name, lookup_flags, &path); + retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path); if (retval) goto out; mnt = real_mount(path.mnt); diff --git a/fs/super.c b/fs/super.c index 5536a95186e2..f6961ea84c56 100644 --- a/fs/super.c +++ b/fs/super.c @@ -71,7 +71,7 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) if (!grab_super_passive(sb)) return -1; - if (sb->s_op && sb->s_op->nr_cached_objects) + if (sb->s_op->nr_cached_objects) fs_objects = sb->s_op->nr_cached_objects(sb); total_objects = sb->s_nr_dentry_unused + diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index cf5d2af61b81..ff0b981f078e 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -9,7 +9,6 @@ #define _LINUX_EVENTFD_H #include <linux/fcntl.h> -#include <linux/file.h> #include <linux/wait.h> /* @@ -26,6 +25,8 @@ #define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE) +struct file; + #ifdef CONFIG_EVENTFD struct file *eventfd_file_create(unsigned int count, int flags); diff --git a/include/linux/namei.h b/include/linux/namei.h index cd09751c71a0..8e47bc7a1665 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -58,7 +58,6 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; extern int user_path_at(int, const char __user *, unsigned, struct path *); extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); -extern int user_path_umountat(int, const char __user *, unsigned int, struct path *); #define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path) #define user_lpath(name, path) user_path_at(AT_FDCWD, name, 0, path) @@ -71,8 +70,7 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); -extern int vfs_path_lookup(struct dentry *, struct vfsmount *, - const char *, unsigned int, struct path *); +extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e0aeb32415ff..2418b6e71a85 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -60,6 +60,7 @@ #include <linux/poll.h> #include <linux/flex_array.h> /* used in cgroup_attach_task */ #include <linux/kthread.h> +#include <linux/file.h> #include <linux/atomic.h> @@ -4034,8 +4035,8 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, struct cgroup_event *event; struct cgroup_subsys_state *cfile_css; unsigned int efd, cfd; - struct file *efile; - struct file *cfile; + struct fd efile; + struct fd cfile; char *endp; int ret; @@ -4058,31 +4059,31 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, init_waitqueue_func_entry(&event->wait, cgroup_event_wake); INIT_WORK(&event->remove, cgroup_event_remove); - efile = eventfd_fget(efd); - if (IS_ERR(efile)) { - ret = PTR_ERR(efile); + efile = fdget(efd); + if (!efile.file) { + ret = -EBADF; goto out_kfree; } - event->eventfd = eventfd_ctx_fileget(efile); + event->eventfd = eventfd_ctx_fileget(efile.file); if (IS_ERR(event->eventfd)) { ret = PTR_ERR(event->eventfd); goto out_put_efile; } - cfile = fget(cfd); - if (!cfile) { + cfile = fdget(cfd); + if (!cfile.file) { ret = -EBADF; goto out_put_eventfd; } /* the process need read permission on control file */ /* AV: shouldn't we check that it's been opened for read instead? */ - ret = inode_permission(file_inode(cfile), MAY_READ); + ret = inode_permission(file_inode(cfile.file), MAY_READ); if (ret < 0) goto out_put_cfile; - event->cft = __file_cft(cfile); + event->cft = __file_cft(cfile.file); if (IS_ERR(event->cft)) { ret = PTR_ERR(event->cft); goto out_put_cfile; @@ -4103,7 +4104,7 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, ret = -EINVAL; event->css = cgroup_css(cgrp, event->cft->ss); - cfile_css = css_from_dir(cfile->f_dentry->d_parent, event->cft->ss); + cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss); if (event->css && event->css == cfile_css && css_tryget(event->css)) ret = 0; @@ -4121,25 +4122,25 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, if (ret) goto out_put_css; - efile->f_op->poll(efile, &event->pt); + efile.file->f_op->poll(efile.file, &event->pt); spin_lock(&cgrp->event_list_lock); list_add(&event->list, &cgrp->event_list); spin_unlock(&cgrp->event_list_lock); - fput(cfile); - fput(efile); + fdput(cfile); + fdput(efile); return 0; out_put_css: css_put(event->css); out_put_cfile: - fput(cfile); + fdput(cfile); out_put_eventfd: eventfd_ctx_put(event->eventfd); out_put_efile: - fput(efile); + fdput(efile); out_kfree: kfree(event); |