From 15a9155fe3e8215c02b80df51ec2cac7c0d726ad Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Feb 2011 15:08:54 -0500 Subject: fix race in audit_get_nd() don't rely on pathname resolution ending up twice at the same point... Signed-off-by: Al Viro --- kernel/audit_watch.c | 85 ++++++++++++++++++++-------------------------------- 1 file changed, 32 insertions(+), 53 deletions(-) (limited to 'kernel') diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index d2e3c7866460..20b9fe6907d0 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -144,9 +144,9 @@ int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev) } /* Initialize a parent watch entry. */ -static struct audit_parent *audit_init_parent(struct nameidata *ndp) +static struct audit_parent *audit_init_parent(struct path *path) { - struct inode *inode = ndp->path.dentry->d_inode; + struct inode *inode = path->dentry->d_inode; struct audit_parent *parent; int ret; @@ -353,53 +353,40 @@ static void audit_remove_parent_watches(struct audit_parent *parent) } /* Get path information necessary for adding watches. */ -static int audit_get_nd(char *path, struct nameidata **ndp, struct nameidata **ndw) +static int audit_get_nd(struct audit_watch *watch, struct path *parent) { - struct nameidata *ndparent, *ndwatch; + struct nameidata nd; + struct dentry *d; int err; - ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL); - if (unlikely(!ndparent)) - return -ENOMEM; + err = path_lookup(watch->path, LOOKUP_PARENT, &nd); + if (err) + return err; - ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL); - if (unlikely(!ndwatch)) { - kfree(ndparent); - return -ENOMEM; + if (nd.last_type != LAST_NORM) { + path_put(&nd.path); + return -EINVAL; } - err = path_lookup(path, LOOKUP_PARENT, ndparent); - if (err) { - kfree(ndparent); - kfree(ndwatch); - return err; + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); + d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); + if (IS_ERR(d)) { + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + path_put(&nd.path); + return PTR_ERR(d); } - - err = path_lookup(path, 0, ndwatch); - if (err) { - kfree(ndwatch); - ndwatch = NULL; + if (d->d_inode) { + /* update watch filter fields */ + watch->dev = d->d_inode->i_sb->s_dev; + watch->ino = d->d_inode->i_ino; } + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - *ndp = ndparent; - *ndw = ndwatch; - + *parent = nd.path; + dput(d); return 0; } -/* Release resources used for watch path information. */ -static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw) -{ - if (ndp) { - path_put(&ndp->path); - kfree(ndp); - } - if (ndw) { - path_put(&ndw->path); - kfree(ndw); - } -} - /* Associate the given rule with an existing parent. * Caller must hold audit_filter_mutex. */ static void audit_add_to_parent(struct audit_krule *krule, @@ -440,31 +427,24 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) { struct audit_watch *watch = krule->watch; struct audit_parent *parent; - struct nameidata *ndp = NULL, *ndw = NULL; + struct path parent_path; int h, ret = 0; mutex_unlock(&audit_filter_mutex); /* Avoid calling path_lookup under audit_filter_mutex. */ - ret = audit_get_nd(watch->path, &ndp, &ndw); - if (ret) { - /* caller expects mutex locked */ - mutex_lock(&audit_filter_mutex); - goto error; - } + ret = audit_get_nd(watch, &parent_path); + /* caller expects mutex locked */ mutex_lock(&audit_filter_mutex); - /* update watch filter fields */ - if (ndw) { - watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev; - watch->ino = ndw->path.dentry->d_inode->i_ino; - } + if (ret) + return ret; /* either find an old parent or attach a new one */ - parent = audit_find_parent(ndp->path.dentry->d_inode); + parent = audit_find_parent(parent_path.dentry->d_inode); if (!parent) { - parent = audit_init_parent(ndp); + parent = audit_init_parent(&parent_path); if (IS_ERR(parent)) { ret = PTR_ERR(parent); goto error; @@ -479,9 +459,8 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) h = audit_hash_ino((u32)watch->ino); *list = &audit_inode_hash[h]; error: - audit_put_nd(ndp, ndw); /* NULL args OK */ + path_put(&parent_path); return ret; - } void audit_remove_watch_rule(struct audit_krule *krule) -- cgit v1.2.3 From c9c6cac0c2bdbda42e7b804838648d0bc60ddb13 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Feb 2011 15:15:47 -0500 Subject: kill path_lookup() all remaining callers pass LOOKUP_PARENT to it, so flags argument can die; renamed to kern_path_parent() Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/syscalls.c | 2 +- fs/namei.c | 7 +++---- fs/ocfs2/refcounttree.c | 2 +- include/linux/namei.h | 2 +- kernel/audit_watch.c | 2 +- net/unix/af_unix.c | 2 +- 6 files changed, 8 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index 187a7d32f86a..a3d2ce54ea2e 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -70,7 +70,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags, if (!IS_ERR(tmp)) { struct nameidata nd; - ret = path_lookup(tmp, LOOKUP_PARENT, &nd); + ret = kern_path_parent(tmp, &nd); if (!ret) { nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE; ret = spufs_create(&nd, flags, mode, neighbor); diff --git a/fs/namei.c b/fs/namei.c index a4689eb2df28..1d6bc8151553 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1747,10 +1747,9 @@ static int do_path_lookup(int dfd, const char *name, return retval; } -int path_lookup(const char *name, unsigned int flags, - struct nameidata *nd) +int kern_path_parent(const char *name, struct nameidata *nd) { - return do_path_lookup(AT_FDCWD, name, flags, nd); + return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd); } int kern_path(const char *name, unsigned int flags, struct path *path) @@ -3586,7 +3585,7 @@ EXPORT_SYMBOL(page_readlink); EXPORT_SYMBOL(__page_symlink); EXPORT_SYMBOL(page_symlink); EXPORT_SYMBOL(page_symlink_inode_operations); -EXPORT_SYMBOL(path_lookup); +EXPORT_SYMBOL(kern_path_parent); EXPORT_SYMBOL(kern_path); EXPORT_SYMBOL(vfs_path_lookup); EXPORT_SYMBOL(inode_permission); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 19ebc5aad391..29623da133cc 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4379,7 +4379,7 @@ static int ocfs2_user_path_parent(const char __user *path, if (IS_ERR(s)) return PTR_ERR(s); - error = path_lookup(s, LOOKUP_PARENT, nd); + error = kern_path_parent(s, nd); if (error) putname(s); else diff --git a/include/linux/namei.h b/include/linux/namei.h index f276d4fa01fc..58ce3433d4ec 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -72,7 +72,7 @@ extern int user_path_at(int, const char __user *, unsigned, struct path *); extern int kern_path(const char *, unsigned, struct path *); -extern int path_lookup(const char *, unsigned, struct nameidata *); +extern int kern_path_parent(const char *, struct nameidata *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct nameidata *); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 20b9fe6907d0..e683869365d9 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -359,7 +359,7 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent) struct dentry *d; int err; - err = path_lookup(watch->path, LOOKUP_PARENT, &nd); + err = kern_path_parent(watch->path, &nd); if (err) return err; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index dd419d286204..d8c04a602cf1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * Get the parent directory, calculate the hash for last * component. */ - err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); + err = kern_path_parent(sunaddr->sun_path, &nd); if (err) goto out_mknod_parent; -- cgit v1.2.3 From 73d049a40fc6269189c4e2ba6792cb5dd054883c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 11 Mar 2011 12:08:24 -0500 Subject: open-style analog of vfs_path_lookup() new function: file_open_root(dentry, mnt, name, flags) opens the file vfs_path_lookup would arrive to. Note that name can be empty; in that case the usual requirement that dentry should be a directory is lifted. open-coded equivalents switched to it, may_open() got down exactly one caller and became static. Signed-off-by: Al Viro --- arch/um/drivers/mconsole_kern.c | 21 ++--------- fs/internal.h | 2 ++ fs/namei.c | 80 ++++++++++++++++++++++++++--------------- fs/nfsctl.c | 21 +++-------- fs/open.c | 14 ++++++++ include/linux/fs.h | 4 +-- kernel/sysctl_binary.c | 19 +--------- 7 files changed, 77 insertions(+), 84 deletions(-) (limited to 'kernel') diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 975613b23dcf..c70e047eed72 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -124,35 +124,18 @@ void mconsole_log(struct mc_request *req) #if 0 void mconsole_proc(struct mc_request *req) { - struct nameidata nd; struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; struct file *file; - int n, err; + int n; char *ptr = req->request.data, *buf; mm_segment_t old_fs = get_fs(); ptr += strlen("proc"); ptr = skip_spaces(ptr); - err = vfs_path_lookup(mnt->mnt_root, mnt, ptr, LOOKUP_FOLLOW, &nd); - if (err) { - mconsole_reply(req, "Failed to look up file", 1, 0); - goto out; - } - - err = may_open(&nd.path, MAY_READ, O_RDONLY); - if (result) { - mconsole_reply(req, "Failed to open file", 1, 0); - path_put(&nd.path); - goto out; - } - - file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY, - current_cred()); - err = PTR_ERR(file); + file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY); if (IS_ERR(file)) { mconsole_reply(req, "Failed to open file", 1, 0); - path_put(&nd.path); goto out; } diff --git a/fs/internal.h b/fs/internal.h index 6fdbdf2c6047..52abc5287f50 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -114,6 +114,8 @@ struct open_flags { }; extern struct file *do_filp_open(int dfd, const char *pathname, const struct open_flags *op, int lookup_flags); +extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, + const char *, const struct open_flags *, int lookup_flags); /* * inode.c diff --git a/fs/namei.c b/fs/namei.c index 8ee7785d5642..abc8d2df121c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1487,11 +1487,13 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->depth = 0; if (flags & LOOKUP_ROOT) { struct inode *inode = nd->root.dentry->d_inode; - if (!inode->i_op->lookup) - return -ENOTDIR; - retval = inode_permission(inode, MAY_EXEC); - if (retval) - return retval; + if (*name) { + if (!inode->i_op->lookup) + return -ENOTDIR; + retval = inode_permission(inode, MAY_EXEC); + if (retval) + return retval; + } nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { @@ -1937,7 +1939,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, return error; } -int may_open(struct path *path, int acc_mode, int flag) +static int may_open(struct path *path, int acc_mode, int flag) { struct dentry *dentry = path->dentry; struct inode *inode = dentry->d_inode; @@ -2250,11 +2252,10 @@ exit: } static struct file *path_openat(int dfd, const char *pathname, - const struct open_flags *op, int flags) + struct nameidata *nd, const struct open_flags *op, int flags) { struct file *base = NULL; struct file *filp; - struct nameidata nd; struct path path; int count = 0; int error; @@ -2264,27 +2265,27 @@ static struct file *path_openat(int dfd, const char *pathname, return ERR_PTR(-ENFILE); filp->f_flags = op->open_flag; - nd.intent.open.file = filp; - nd.intent.open.flags = open_to_namei_flags(op->open_flag); - nd.intent.open.create_mode = op->mode; + nd->intent.open.file = filp; + nd->intent.open.flags = open_to_namei_flags(op->open_flag); + nd->intent.open.create_mode = op->mode; - error = path_init(dfd, pathname, flags | LOOKUP_PARENT, &nd, &base); + error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); if (unlikely(error)) goto out_filp; current->total_link_count = 0; - error = link_path_walk(pathname, &nd); + error = link_path_walk(pathname, nd); if (unlikely(error)) goto out_filp; - filp = do_last(&nd, &path, op, pathname); + filp = do_last(nd, &path, op, pathname); while (unlikely(!filp)) { /* trailing symlink */ struct path link = path; struct inode *linki = link.dentry->d_inode; void *cookie; - if (!(nd.flags & LOOKUP_FOLLOW) || count++ == 32) { - path_put_conditional(&path, &nd); - path_put(&nd.path); + if (!(nd->flags & LOOKUP_FOLLOW) || count++ == 32) { + path_put_conditional(&path, nd); + path_put(&nd->path); filp = ERR_PTR(-ELOOP); break; } @@ -2299,23 +2300,23 @@ static struct file *path_openat(int dfd, const char *pathname, * have to putname() it when we are done. Procfs-like symlinks * just set LAST_BIND. */ - nd.flags |= LOOKUP_PARENT; - nd.flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); - error = __do_follow_link(&link, &nd, &cookie); + nd->flags |= LOOKUP_PARENT; + nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); + error = __do_follow_link(&link, nd, &cookie); if (unlikely(error)) filp = ERR_PTR(error); else - filp = do_last(&nd, &path, op, pathname); + filp = do_last(nd, &path, op, pathname); if (!IS_ERR(cookie) && linki->i_op->put_link) - linki->i_op->put_link(link.dentry, &nd, cookie); + linki->i_op->put_link(link.dentry, nd, cookie); path_put(&link); } out: - if (nd.root.mnt && !(nd.flags & LOOKUP_ROOT)) - path_put(&nd.root); + if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) + path_put(&nd->root); if (base) fput(base); - release_open_intent(&nd); + release_open_intent(nd); return filp; out_filp: @@ -2326,16 +2327,39 @@ out_filp: struct file *do_filp_open(int dfd, const char *pathname, const struct open_flags *op, int flags) { + struct nameidata nd; struct file *filp; - filp = path_openat(dfd, pathname, op, flags | LOOKUP_RCU); + filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); if (unlikely(filp == ERR_PTR(-ECHILD))) - filp = path_openat(dfd, pathname, op, flags); + filp = path_openat(dfd, pathname, &nd, op, flags); if (unlikely(filp == ERR_PTR(-ESTALE))) - filp = path_openat(dfd, pathname, op, flags | LOOKUP_REVAL); + filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL); return filp; } +struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, + const char *name, const struct open_flags *op, int flags) +{ + struct nameidata nd; + struct file *file; + + nd.root.mnt = mnt; + nd.root.dentry = dentry; + + flags |= LOOKUP_ROOT; + + if (dentry->d_inode->i_op->follow_link) + return ERR_PTR(-ELOOP); + + file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU); + if (unlikely(file == ERR_PTR(-ECHILD))) + file = path_openat(-1, name, &nd, op, flags); + if (unlikely(file == ERR_PTR(-ESTALE))) + file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL); + return file; +} + /** * lookup_create - lookup a dentry, creating it if it doesn't exist * @nd: nameidata info diff --git a/fs/nfsctl.c b/fs/nfsctl.c index bf9cbd242ddd..124e8fcb0dd6 100644 --- a/fs/nfsctl.c +++ b/fs/nfsctl.c @@ -22,30 +22,17 @@ static struct file *do_open(char *name, int flags) { - struct nameidata nd; struct vfsmount *mnt; - int error; + struct file *file; mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); if (IS_ERR(mnt)) return (struct file *)mnt; - error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd); - mntput(mnt); /* drop do_kern_mount reference */ - if (error) - return ERR_PTR(error); - - if (flags == O_RDWR) - error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags); - else - error = may_open(&nd.path, MAY_WRITE, flags); + file = file_open_root(mnt->mnt_root, mnt, name, flags); - if (!error) - return dentry_open(nd.path.dentry, nd.path.mnt, flags, - current_cred()); - - path_put(&nd.path); - return ERR_PTR(error); + mntput(mnt); /* drop do_kern_mount reference */ + return file; } static struct { diff --git a/fs/open.c b/fs/open.c index d05e18c60bae..48afc5c139d2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -959,6 +959,20 @@ struct file *filp_open(const char *filename, int flags, int mode) } EXPORT_SYMBOL(filp_open); +struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, + const char *filename, int flags) +{ + struct open_flags op; + int lookup = build_open_flags(flags, 0, &op); + if (flags & O_CREAT) + return ERR_PTR(-EINVAL); + if (!filename && (flags & O_DIRECTORY)) + if (!dentry->d_inode->i_op->lookup) + return ERR_PTR(-ENOTDIR); + return do_file_open_root(dentry, mnt, filename, &op, lookup); +} +EXPORT_SYMBOL(file_open_root); + long do_sys_open(int dfd, const char __user *filename, int flags, int mode) { struct open_flags op; diff --git a/include/linux/fs.h b/include/linux/fs.h index 9c75714f92c1..bf5c3c896072 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1990,6 +1990,8 @@ extern int do_fallocate(struct file *file, int mode, loff_t offset, extern long do_sys_open(int dfd, const char __user *filename, int flags, int mode); extern struct file *filp_open(const char *, int, int); +extern struct file *file_open_root(struct dentry *, struct vfsmount *, + const char *, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); @@ -2205,8 +2207,6 @@ extern struct file *create_read_pipe(struct file *f, int flags); extern struct file *create_write_pipe(int flags); extern void free_write_pipe(struct file *); -extern int may_open(struct path *, int, int); - extern int kernel_read(struct file *, loff_t, char *, unsigned long); extern struct file * open_exec(const char *); diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index b875bedf7c9a..3b8e028b9601 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1321,13 +1321,11 @@ static ssize_t binary_sysctl(const int *name, int nlen, void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) { const struct bin_table *table = NULL; - struct nameidata nd; struct vfsmount *mnt; struct file *file; ssize_t result; char *pathname; int flags; - int acc_mode; pathname = sysctl_getname(name, nlen, &table); result = PTR_ERR(pathname); @@ -1337,28 +1335,17 @@ static ssize_t binary_sysctl(const int *name, int nlen, /* How should the sysctl be accessed? */ if (oldval && oldlen && newval && newlen) { flags = O_RDWR; - acc_mode = MAY_READ | MAY_WRITE; } else if (newval && newlen) { flags = O_WRONLY; - acc_mode = MAY_WRITE; } else if (oldval && oldlen) { flags = O_RDONLY; - acc_mode = MAY_READ; } else { result = 0; goto out_putname; } mnt = current->nsproxy->pid_ns->proc_mnt; - result = vfs_path_lookup(mnt->mnt_root, mnt, pathname, 0, &nd); - if (result) - goto out_putname; - - result = may_open(&nd.path, acc_mode, flags); - if (result) - goto out_putpath; - - file = dentry_open(nd.path.dentry, nd.path.mnt, flags, current_cred()); + file = file_open_root(mnt->mnt_root, mnt, pathname, flags); result = PTR_ERR(file); if (IS_ERR(file)) goto out_putname; @@ -1370,10 +1357,6 @@ out_putname: putname(pathname); out: return result; - -out_putpath: - path_put(&nd.path); - goto out_putname; } -- cgit v1.2.3 From 990d6c2d7aee921e3bce22b2d6a750fd552262be Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 29 Jan 2011 18:43:26 +0530 Subject: vfs: Add name to file handle conversion support The syscall also return mount id which can be used to lookup file system specific information such as uuid in /proc//mountinfo Signed-off-by: Aneesh Kumar K.V Signed-off-by: Al Viro --- fs/Kconfig | 2 +- fs/Makefile | 2 + fs/fhandle.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/exportfs.h | 3 ++ include/linux/fs.h | 7 ++++ include/linux/syscalls.h | 5 ++- init/Kconfig | 12 ++++++ kernel/sys_ni.c | 3 ++ 8 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 fs/fhandle.c (limited to 'kernel') diff --git a/fs/Kconfig b/fs/Kconfig index 3db9caa57edc..7cb53aafac1e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -47,7 +47,7 @@ config FS_POSIX_ACL def_bool n config EXPORTFS - tristate + bool config FILE_LOCKING bool "Enable POSIX file locking API" if EXPERT diff --git a/fs/Makefile b/fs/Makefile index a7f7cef0c0c8..ba01202844c5 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o obj-$(CONFIG_NFS_COMMON) += nfs_common/ obj-$(CONFIG_GENERIC_ACL) += generic_acl.o +obj-$(CONFIG_FHANDLE) += fhandle.o + obj-y += quota/ obj-$(CONFIG_PROC_FS) += proc/ diff --git a/fs/fhandle.c b/fs/fhandle.c new file mode 100644 index 000000000000..9f79e743a840 --- /dev/null +++ b/fs/fhandle.c @@ -0,0 +1,107 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +static long do_sys_name_to_handle(struct path *path, + struct file_handle __user *ufh, + int __user *mnt_id) +{ + long retval; + struct file_handle f_handle; + int handle_dwords, handle_bytes; + struct file_handle *handle = NULL; + + /* + * We need t make sure wether the file system + * support decoding of the file handle + */ + if (!path->mnt->mnt_sb->s_export_op || + !path->mnt->mnt_sb->s_export_op->fh_to_dentry) + return -EOPNOTSUPP; + + if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) + return -EFAULT; + + if (f_handle.handle_bytes > MAX_HANDLE_SZ) + return -EINVAL; + + handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, + GFP_KERNEL); + if (!handle) + return -ENOMEM; + + /* convert handle size to multiple of sizeof(u32) */ + handle_dwords = f_handle.handle_bytes >> 2; + + /* we ask for a non connected handle */ + retval = exportfs_encode_fh(path->dentry, + (struct fid *)handle->f_handle, + &handle_dwords, 0); + handle->handle_type = retval; + /* convert handle size to bytes */ + handle_bytes = handle_dwords * sizeof(u32); + handle->handle_bytes = handle_bytes; + if ((handle->handle_bytes > f_handle.handle_bytes) || + (retval == 255) || (retval == -ENOSPC)) { + /* As per old exportfs_encode_fh documentation + * we could return ENOSPC to indicate overflow + * But file system returned 255 always. So handle + * both the values + */ + /* + * set the handle size to zero so we copy only + * non variable part of the file_handle + */ + handle_bytes = 0; + retval = -EOVERFLOW; + } else + retval = 0; + /* copy the mount id */ + if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) || + copy_to_user(ufh, handle, + sizeof(struct file_handle) + handle_bytes)) + retval = -EFAULT; + kfree(handle); + return retval; +} + +/** + * sys_name_to_handle_at: convert name to handle + * @dfd: directory relative to which name is interpreted if not absolute + * @name: name that should be converted to handle. + * @handle: resulting file handle + * @mnt_id: mount id of the file system containing the file + * @flag: flag value to indicate whether to follow symlink or not + * + * @handle->handle_size indicate the space available to store the + * variable part of the file handle in bytes. If there is not + * enough space, the field is updated to return the minimum + * value required. + */ +SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, + struct file_handle __user *, handle, int __user *, mnt_id, + int, flag) +{ + struct path path; + int lookup_flags; + int err; + + if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) + return -EINVAL; + + lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0; + if (flag & AT_EMPTY_PATH) + lookup_flags |= LOOKUP_EMPTY; + err = user_path_at(dfd, name, lookup_flags, &path); + if (!err) { + err = do_sys_name_to_handle(&path, handle, mnt_id); + path_put(&path); + } + return err; +} diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 65afdfd31b7b..33a42f24b275 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -8,6 +8,9 @@ struct inode; struct super_block; struct vfsmount; +/* limit the handle size to NFSv4 handle size now */ +#define MAX_HANDLE_SZ 128 + /* * The fileid_type identifies how the file within the filesystem is encoded. * In theory this is freely set and parsed by the filesystem, but we try to diff --git a/include/linux/fs.h b/include/linux/fs.h index b7178b05cf3a..3f64630c0e10 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -978,6 +978,13 @@ struct file { #endif }; +struct file_handle { + __u32 handle_bytes; + int handle_type; + /* file identifier */ + unsigned char f_handle[0]; +}; + #define get_file(x) atomic_long_inc(&(x)->f_count) #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) #define file_count(x) atomic_long_read(&(x)->f_count) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 98664db1be47..970112613fb4 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -62,6 +62,7 @@ struct robust_list_head; struct getcpu_cache; struct old_linux_dirent; struct perf_event_attr; +struct file_handle; #include #include @@ -832,5 +833,7 @@ asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff); asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg); - +asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name, + struct file_handle __user *handle, + int __user *mnt_id, int flag); #endif diff --git a/init/Kconfig b/init/Kconfig index be788c0957d4..e72fa17fe559 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -287,6 +287,18 @@ config BSD_PROCESS_ACCT_V3 for processing it. A preliminary version of these tools is available at . +config FHANDLE + bool "open by fhandle syscalls" + select EXPORTFS + help + If you say Y here, a user level program will be able to map + file names to handle and then later use the handle for + different file system operations. This is useful in implementing + userspace file servers, which now track files using handles instead + of names. The handle would remain the same even if file names + get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2) + syscalls. + config TASKSTATS bool "Export task/process statistics through netlink (EXPERIMENTAL)" depends on NET diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index c782fe9924c7..4e013439ac28 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -186,3 +186,6 @@ cond_syscall(sys_perf_event_open); /* fanotify! */ cond_syscall(sys_fanotify_init); cond_syscall(sys_fanotify_mark); + +/* open by handle */ +cond_syscall(sys_name_to_handle_at); -- cgit v1.2.3 From becfd1f37544798cbdfd788f32c827160fab98c1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 29 Jan 2011 18:43:26 +0530 Subject: vfs: Add open by file handle support [AV: duplicate of open() guts removed; file_open_root() used instead] Signed-off-by: Aneesh Kumar K.V Signed-off-by: Al Viro --- fs/compat.c | 13 ++++ fs/exportfs/expfs.c | 2 + fs/fhandle.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++ fs/internal.h | 3 + include/linux/syscalls.h | 3 + kernel/sys_ni.c | 2 + 6 files changed, 181 insertions(+) (limited to 'kernel') diff --git a/fs/compat.c b/fs/compat.c index a071775f3bb3..c6d31a3bab88 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -2284,3 +2284,16 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd, } #endif /* CONFIG_TIMERFD */ + +#ifdef CONFIG_FHANDLE +/* + * Exactly like fs/open.c:sys_open_by_handle_at(), except that it + * doesn't set the O_LARGEFILE flag. + */ +asmlinkage long +compat_sys_open_by_handle_at(int mountdirfd, + struct file_handle __user *handle, int flags) +{ + return do_handle_open(mountdirfd, handle, flags); +} +#endif diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index cfe55731b6dc..b05acb796135 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -374,6 +374,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, /* * Try to get any dentry for the given file handle from the filesystem. */ + if (!nop || !nop->fh_to_dentry) + return ERR_PTR(-ESTALE); result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); if (!result) result = ERR_PTR(-ESTALE); diff --git a/fs/fhandle.c b/fs/fhandle.c index 9f79e743a840..bf93ad2bee07 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include "internal.h" @@ -105,3 +107,159 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, } return err; } + +static struct vfsmount *get_vfsmount_from_fd(int fd) +{ + struct path path; + + if (fd == AT_FDCWD) { + struct fs_struct *fs = current->fs; + spin_lock(&fs->lock); + path = fs->pwd; + mntget(path.mnt); + spin_unlock(&fs->lock); + } else { + int fput_needed; + struct file *file = fget_light(fd, &fput_needed); + if (!file) + return ERR_PTR(-EBADF); + path = file->f_path; + mntget(path.mnt); + fput_light(file, fput_needed); + } + return path.mnt; +} + +static int vfs_dentry_acceptable(void *context, struct dentry *dentry) +{ + return 1; +} + +static int do_handle_to_path(int mountdirfd, struct file_handle *handle, + struct path *path) +{ + int retval = 0; + int handle_dwords; + + path->mnt = get_vfsmount_from_fd(mountdirfd); + if (IS_ERR(path->mnt)) { + retval = PTR_ERR(path->mnt); + goto out_err; + } + /* change the handle size to multiple of sizeof(u32) */ + handle_dwords = handle->handle_bytes >> 2; + path->dentry = exportfs_decode_fh(path->mnt, + (struct fid *)handle->f_handle, + handle_dwords, handle->handle_type, + vfs_dentry_acceptable, NULL); + if (IS_ERR(path->dentry)) { + retval = PTR_ERR(path->dentry); + goto out_mnt; + } + return 0; +out_mnt: + mntput(path->mnt); +out_err: + return retval; +} + +static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, + struct path *path) +{ + int retval = 0; + struct file_handle f_handle; + struct file_handle *handle = NULL; + + /* + * With handle we don't look at the execute bit on the + * the directory. Ideally we would like CAP_DAC_SEARCH. + * But we don't have that + */ + if (!capable(CAP_DAC_READ_SEARCH)) { + retval = -EPERM; + goto out_err; + } + if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) { + retval = -EFAULT; + goto out_err; + } + if ((f_handle.handle_bytes > MAX_HANDLE_SZ) || + (f_handle.handle_bytes == 0)) { + retval = -EINVAL; + goto out_err; + } + handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, + GFP_KERNEL); + if (!handle) { + retval = -ENOMEM; + goto out_err; + } + /* copy the full handle */ + if (copy_from_user(handle, ufh, + sizeof(struct file_handle) + + f_handle.handle_bytes)) { + retval = -EFAULT; + goto out_handle; + } + + retval = do_handle_to_path(mountdirfd, handle, path); + +out_handle: + kfree(handle); +out_err: + return retval; +} + +long do_handle_open(int mountdirfd, + struct file_handle __user *ufh, int open_flag) +{ + long retval = 0; + struct path path; + struct file *file; + int fd; + + retval = handle_to_path(mountdirfd, ufh, &path); + if (retval) + return retval; + + fd = get_unused_fd_flags(open_flag); + if (fd < 0) { + path_put(&path); + return fd; + } + file = file_open_root(path.dentry, path.mnt, "", open_flag); + if (IS_ERR(file)) { + put_unused_fd(fd); + retval = PTR_ERR(file); + } else { + retval = fd; + fsnotify_open(file); + fd_install(fd, file); + } + path_put(&path); + return retval; +} + +/** + * sys_open_by_handle_at: Open the file handle + * @mountdirfd: directory file descriptor + * @handle: file handle to be opened + * @flag: open flags. + * + * @mountdirfd indicate the directory file descriptor + * of the mount point. file handle is decoded relative + * to the vfsmount pointed by the @mountdirfd. @flags + * value is same as the open(2) flags. + */ +SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, + struct file_handle __user *, handle, + int, flags) +{ + long ret; + + if (force_o_largefile()) + flags |= O_LARGEFILE; + + ret = do_handle_open(mountdirfd, handle, flags); + return ret; +} diff --git a/fs/internal.h b/fs/internal.h index 52abc5287f50..f3d15de44b15 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -117,6 +117,9 @@ extern struct file *do_filp_open(int dfd, const char *pathname, extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, const char *, const struct open_flags *, int lookup_flags); +extern long do_handle_open(int mountdirfd, + struct file_handle __user *ufh, int open_flag); + /* * inode.c */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 970112613fb4..2d9b79c0f224 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -836,4 +836,7 @@ asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg); asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name, struct file_handle __user *handle, int __user *mnt_id, int flag); +asmlinkage long sys_open_by_handle_at(int mountdirfd, + struct file_handle __user *handle, + int flags); #endif diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 4e013439ac28..25cc41cd8f33 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -189,3 +189,5 @@ cond_syscall(sys_fanotify_mark); /* open by handle */ cond_syscall(sys_name_to_handle_at); +cond_syscall(sys_open_by_handle_at); +cond_syscall(compat_sys_open_by_handle_at); -- cgit v1.2.3