From 64964528b24ea390824f0e5ce9d34b8d39b28cde Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 1 Nov 2014 00:37:32 -0400 Subject: make proc_ns_operations work with struct ns_common * instead of void * We can do that now. And kill ->inum(), while we are at it - all instances are identical. Signed-off-by: Al Viro --- include/linux/proc_ns.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux/proc_ns.h') diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 34a1e105bef4..f284959391fd 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -6,18 +6,18 @@ struct pid_namespace; struct nsproxy; +struct ns_common; struct proc_ns_operations { const char *name; int type; - void *(*get)(struct task_struct *task); - void (*put)(void *ns); - int (*install)(struct nsproxy *nsproxy, void *ns); - unsigned int (*inum)(void *ns); + struct ns_common *(*get)(struct task_struct *task); + void (*put)(struct ns_common *ns); + int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); }; struct proc_ns { - void *ns; + struct ns_common *ns; const struct proc_ns_operations *ns_ops; }; -- cgit v1.2.3 From 6344c433a452b1a05d03a61a6a85d89f793bb7b8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 1 Nov 2014 00:45:45 -0400 Subject: new helpers: ns_alloc_inum/ns_free_inum take struct ns_common *, for now simply wrappers around proc_{alloc,free}_inum() Signed-off-by: Al Viro --- fs/namespace.c | 4 ++-- include/linux/proc_ns.h | 3 +++ ipc/namespace.c | 6 +++--- kernel/pid_namespace.c | 4 ++-- kernel/user_namespace.c | 4 ++-- kernel/utsname.c | 4 ++-- net/core/net_namespace.c | 4 ++-- 7 files changed, 16 insertions(+), 13 deletions(-) (limited to 'include/linux/proc_ns.h') diff --git a/fs/namespace.c b/fs/namespace.c index b9c16c3f63f5..30738d200866 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2645,7 +2645,7 @@ dput_out: static void free_mnt_ns(struct mnt_namespace *ns) { - proc_free_inum(ns->ns.inum); + ns_free_inum(&ns->ns); put_user_ns(ns->user_ns); kfree(ns); } @@ -2667,7 +2667,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); if (!new_ns) return ERR_PTR(-ENOMEM); - ret = proc_alloc_inum(&new_ns->ns.inum); + ret = ns_alloc_inum(&new_ns->ns); if (ret) { kfree(new_ns); return ERR_PTR(ret); diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index f284959391fd..f5780ee7f8f7 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -71,4 +71,7 @@ static inline bool proc_ns_inode(struct inode *inode) { return false; } #endif /* CONFIG_PROC_FS */ +#define ns_alloc_inum(ns) proc_alloc_inum(&(ns)->inum) +#define ns_free_inum(ns) proc_free_inum((ns)->inum) + #endif /* _LINUX_PROC_NS_H */ diff --git a/ipc/namespace.c b/ipc/namespace.c index 531029a67fef..bcdd7a5c122a 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -26,7 +26,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, if (ns == NULL) return ERR_PTR(-ENOMEM); - err = proc_alloc_inum(&ns->ns.inum); + err = ns_alloc_inum(&ns->ns); if (err) { kfree(ns); return ERR_PTR(err); @@ -35,7 +35,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, atomic_set(&ns->count, 1); err = mq_init_ns(ns); if (err) { - proc_free_inum(ns->ns.inum); + ns_free_inum(&ns->ns); kfree(ns); return ERR_PTR(err); } @@ -119,7 +119,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) */ ipcns_notify(IPCNS_REMOVED); put_user_ns(ns->user_ns); - proc_free_inum(ns->ns.inum); + ns_free_inum(&ns->ns); kfree(ns); } diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 79aabce49a85..5aa9158a84d5 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -105,7 +105,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns if (ns->pid_cachep == NULL) goto out_free_map; - err = proc_alloc_inum(&ns->ns.inum); + err = ns_alloc_inum(&ns->ns); if (err) goto out_free_map; @@ -142,7 +142,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns) { int i; - proc_free_inum(ns->ns.inum); + ns_free_inum(&ns->ns); for (i = 0; i < PIDMAP_ENTRIES; i++) kfree(ns->pidmap[i].page); put_user_ns(ns->user_ns); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 29cd5ccfc37a..6bf8177768e5 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -86,7 +86,7 @@ int create_user_ns(struct cred *new) if (!ns) return -ENOMEM; - ret = proc_alloc_inum(&ns->ns.inum); + ret = ns_alloc_inum(&ns->ns); if (ret) { kmem_cache_free(user_ns_cachep, ns); return ret; @@ -136,7 +136,7 @@ void free_user_ns(struct user_namespace *ns) #ifdef CONFIG_PERSISTENT_KEYRINGS key_put(ns->persistent_keyring_register); #endif - proc_free_inum(ns->ns.inum); + ns_free_inum(&ns->ns); kmem_cache_free(user_ns_cachep, ns); ns = parent; } while (atomic_dec_and_test(&parent->count)); diff --git a/kernel/utsname.c b/kernel/utsname.c index 20697befe466..c2a2b321d88a 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -42,7 +42,7 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, if (!ns) return ERR_PTR(-ENOMEM); - err = proc_alloc_inum(&ns->ns.inum); + err = ns_alloc_inum(&ns->ns); if (err) { kfree(ns); return ERR_PTR(err); @@ -84,7 +84,7 @@ void free_uts_ns(struct kref *kref) ns = container_of(kref, struct uts_namespace, kref); put_user_ns(ns->user_ns); - proc_free_inum(ns->ns.inum); + ns_free_inum(&ns->ns); kfree(ns); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2161f0979fce..da775f53f3fd 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -386,12 +386,12 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_pid); static __net_init int net_ns_net_init(struct net *net) { - return proc_alloc_inum(&net->ns.inum); + return ns_alloc_inum(&net->ns); } static __net_exit void net_ns_net_exit(struct net *net) { - proc_free_inum(net->ns.inum); + ns_free_inum(&net->ns); } static struct pernet_operations __net_initdata net_ns_ops = { -- cgit v1.2.3 From f77c80142e1afe6d5c16975ca5d7d1fc324b16f9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 1 Nov 2014 03:13:17 -0400 Subject: bury struct proc_ns in fs/proc a) make get_proc_ns() return a pointer to struct ns_common b) mirror ns_ops in dentry->d_fsdata of ns dentries, so that is_mnt_ns_file() could get away with fewer dereferences. That way struct proc_ns becomes invisible outside of fs/proc/*.c Signed-off-by: Al Viro --- fs/namespace.c | 13 ++----------- fs/proc/internal.h | 5 +++++ fs/proc/namespaces.c | 7 ++++--- include/linux/proc_ns.h | 9 ++------- kernel/nsproxy.c | 4 +--- net/core/net_namespace.c | 4 +--- 6 files changed, 15 insertions(+), 27 deletions(-) (limited to 'include/linux/proc_ns.h') diff --git a/fs/namespace.c b/fs/namespace.c index f815218f92d3..9dfb4cac0c41 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1570,16 +1570,7 @@ static bool is_mnt_ns_file(struct dentry *dentry) { /* Is this a proxy for a mount namespace? */ struct inode *inode = dentry->d_inode; - struct proc_ns *ei; - - if (!proc_ns_inode(inode)) - return false; - - ei = get_proc_ns(inode); - if (ei->ns_ops != &mntns_operations) - return false; - - return true; + return proc_ns_inode(inode) && dentry->d_fsdata == &mntns_operations; } struct mnt_namespace *to_mnt_ns(struct ns_common *ns) @@ -1596,7 +1587,7 @@ static bool mnt_ns_loop(struct dentry *dentry) if (!is_mnt_ns_file(dentry)) return false; - mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode)->ns); + mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode)); return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index aa7a0ee182e1..0fabc48d905f 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -57,6 +57,11 @@ union proc_op { struct task_struct *task); }; +struct proc_ns { + struct ns_common *ns; + const struct proc_ns_operations *ns_ops; +}; + struct proc_inode { struct pid *pid; int fd; diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 995e8e98237d..18fc1cf899de 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -45,7 +45,7 @@ static const struct inode_operations ns_inode_operations = { static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) { struct inode *inode = dentry->d_inode; - const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops; + const struct proc_ns_operations *ns_ops = dentry->d_fsdata; return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", ns_ops->name, inode->i_ino); @@ -75,6 +75,7 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb, ns_ops->put(ns); return ERR_PTR(-ENOMEM); } + dentry->d_fsdata = (void *)ns_ops; inode = iget_locked(sb, ns->inum); if (!inode) { @@ -286,9 +287,9 @@ out_invalid: return ERR_PTR(-EINVAL); } -struct proc_ns *get_proc_ns(struct inode *inode) +struct ns_common *get_proc_ns(struct inode *inode) { - return &PROC_I(inode)->ns; + return PROC_I(inode)->ns.ns; } bool proc_ns_inode(struct inode *inode) diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index f5780ee7f8f7..2837ff41cfe3 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -16,11 +16,6 @@ struct proc_ns_operations { int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); }; -struct proc_ns { - struct ns_common *ns; - const struct proc_ns_operations *ns_ops; -}; - extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; extern const struct proc_ns_operations ipcns_operations; @@ -44,7 +39,7 @@ enum { extern int pid_ns_prepare_proc(struct pid_namespace *ns); extern void pid_ns_release_proc(struct pid_namespace *ns); extern struct file *proc_ns_fget(int fd); -extern struct proc_ns *get_proc_ns(struct inode *); +extern struct ns_common *get_proc_ns(struct inode *); extern int proc_alloc_inum(unsigned int *pino); extern void proc_free_inum(unsigned int inum); extern bool proc_ns_inode(struct inode *inode); @@ -59,7 +54,7 @@ static inline struct file *proc_ns_fget(int fd) return ERR_PTR(-EINVAL); } -static inline struct proc_ns *get_proc_ns(struct inode *inode) { return NULL; } +static inline struct ns_common *get_proc_ns(struct inode *inode) { return NULL; } static inline int proc_alloc_inum(unsigned int *inum) { diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 87c37221cb7f..49746c81ad8d 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -222,7 +222,6 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) { struct task_struct *tsk = current; struct nsproxy *new_nsproxy; - struct proc_ns *ei; struct file *file; struct ns_common *ns; int err; @@ -232,8 +231,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) return PTR_ERR(file); err = -EINVAL; - ei = get_proc_ns(file_inode(file)); - ns = ei->ns; + ns = get_proc_ns(file_inode(file)); if (nstype && (ns->ops->type != nstype)) goto out; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 4d4acaf7b498..ce780c722e48 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -337,7 +337,6 @@ EXPORT_SYMBOL_GPL(__put_net); struct net *get_net_ns_by_fd(int fd) { - struct proc_ns *ei; struct file *file; struct ns_common *ns; struct net *net; @@ -346,8 +345,7 @@ struct net *get_net_ns_by_fd(int fd) if (IS_ERR(file)) return ERR_CAST(file); - ei = get_proc_ns(file_inode(file)); - ns = ei->ns; + ns = get_proc_ns(file_inode(file)); if (ns->ops == &netns_operations) net = get_net(container_of(ns, struct net, ns)); else -- cgit v1.2.3 From e149ed2b805fefdccf7ccdfc19eca22fdd4514ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 1 Nov 2014 10:57:28 -0400 Subject: take the targets of /proc/*/ns/* symlinks to separate fs New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now. It's not mountable (not even registered, so it's not in /proc/filesystems, etc.). Files on it *are* bindable - we explicitly permit that in do_loopback(). This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well. get_proc_ns() is a macro now (it's simply returning ->i_private; would have been an inline, if not for header ordering headache). proc_ns_inode() is an ex-parrot. The interface used in procfs is ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops). Dentries and inodes are never hashed; a non-counting reference to dentry is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path() if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details of that mechanism. As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt; it does nd_jump_link() on a consistent pair it gets from ns_get_path(). Signed-off-by: Al Viro --- fs/Makefile | 2 +- fs/internal.h | 5 ++ fs/namespace.c | 9 ++- fs/nsfs.c | 161 +++++++++++++++++++++++++++++++++++++++++++++ fs/proc/inode.c | 5 -- fs/proc/namespaces.c | 152 ++++-------------------------------------- include/linux/ns_common.h | 1 + include/linux/proc_ns.h | 31 +++++---- include/uapi/linux/magic.h | 1 + init/main.c | 2 + 10 files changed, 208 insertions(+), 161 deletions(-) create mode 100644 fs/nsfs.c (limited to 'include/linux/proc_ns.h') diff --git a/fs/Makefile b/fs/Makefile index 34a1b9dea6dd..34393376eaa2 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o splice.o sync.o utimes.o \ - stack.o fs_struct.o statfs.o fs_pin.o + stack.o fs_struct.o statfs.o fs_pin.o nsfs.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o block_dev.o direct-io.o mpage.o diff --git a/fs/internal.h b/fs/internal.h index 757ba2abf21e..e9a61fe67575 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -147,3 +147,8 @@ extern const struct file_operations pipefifo_fops; */ extern void sb_pin_kill(struct super_block *sb); extern void mnt_pin_kill(struct mount *m); + +/* + * fs/nsfs.c + */ +extern struct dentry_operations ns_dentry_operations; diff --git a/fs/namespace.c b/fs/namespace.c index 9dfb4cac0c41..30df6e7dd807 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1569,8 +1569,8 @@ SYSCALL_DEFINE1(oldumount, char __user *, name) static bool is_mnt_ns_file(struct dentry *dentry) { /* Is this a proxy for a mount namespace? */ - struct inode *inode = dentry->d_inode; - return proc_ns_inode(inode) && dentry->d_fsdata == &mntns_operations; + return dentry->d_op == &ns_dentry_operations && + dentry->d_fsdata == &mntns_operations; } struct mnt_namespace *to_mnt_ns(struct ns_common *ns) @@ -2016,7 +2016,10 @@ static int do_loopback(struct path *path, const char *old_name, if (IS_MNT_UNBINDABLE(old)) goto out2; - if (!check_mnt(parent) || !check_mnt(old)) + if (!check_mnt(parent)) + goto out2; + + if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations) goto out2; if (!recurse && has_locked_children(old, old_path.dentry)) diff --git a/fs/nsfs.c b/fs/nsfs.c new file mode 100644 index 000000000000..af1b24fa899d --- /dev/null +++ b/fs/nsfs.c @@ -0,0 +1,161 @@ +#include +#include +#include +#include +#include +#include + +static struct vfsmount *nsfs_mnt; + +static const struct file_operations ns_file_operations = { + .llseek = no_llseek, +}; + +static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) +{ + struct inode *inode = dentry->d_inode; + const struct proc_ns_operations *ns_ops = dentry->d_fsdata; + + return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", + ns_ops->name, inode->i_ino); +} + +static void ns_prune_dentry(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + if (inode) { + struct ns_common *ns = inode->i_private; + atomic_long_set(&ns->stashed, 0); + } +} + +const struct dentry_operations ns_dentry_operations = +{ + .d_prune = ns_prune_dentry, + .d_delete = always_delete_dentry, + .d_dname = ns_dname, +}; + +static void nsfs_evict(struct inode *inode) +{ + struct ns_common *ns = inode->i_private; + clear_inode(inode); + ns->ops->put(ns); +} + +void *ns_get_path(struct path *path, struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct vfsmount *mnt = mntget(nsfs_mnt); + struct qstr qname = { .name = "", }; + struct dentry *dentry; + struct inode *inode; + struct ns_common *ns; + unsigned long d; + +again: + ns = ns_ops->get(task); + if (!ns) { + mntput(mnt); + return ERR_PTR(-ENOENT); + } + rcu_read_lock(); + d = atomic_long_read(&ns->stashed); + if (!d) + goto slow; + dentry = (struct dentry *)d; + if (!lockref_get_not_dead(&dentry->d_lockref)) + goto slow; + rcu_read_unlock(); + ns_ops->put(ns); +got_it: + path->mnt = mnt; + path->dentry = dentry; + return NULL; +slow: + rcu_read_unlock(); + inode = new_inode_pseudo(mnt->mnt_sb); + if (!inode) { + ns_ops->put(ns); + mntput(mnt); + return ERR_PTR(-ENOMEM); + } + inode->i_ino = ns->inum; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_flags |= S_IMMUTABLE; + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_fop = &ns_file_operations; + inode->i_private = ns; + + dentry = d_alloc_pseudo(mnt->mnt_sb, &qname); + if (!dentry) { + iput(inode); + mntput(mnt); + return ERR_PTR(-ENOMEM); + } + d_instantiate(dentry, inode); + dentry->d_fsdata = (void *)ns_ops; + d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); + if (d) { + d_delete(dentry); /* make sure ->d_prune() does nothing */ + dput(dentry); + cpu_relax(); + goto again; + } + goto got_it; +} + +int ns_get_name(char *buf, size_t size, struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct ns_common *ns; + int res = -ENOENT; + ns = ns_ops->get(task); + if (ns) { + res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum); + ns_ops->put(ns); + } + return res; +} + +struct file *proc_ns_fget(int fd) +{ + struct file *file; + + file = fget(fd); + if (!file) + return ERR_PTR(-EBADF); + + if (file->f_op != &ns_file_operations) + goto out_invalid; + + return file; + +out_invalid: + fput(file); + return ERR_PTR(-EINVAL); +} + +static const struct super_operations nsfs_ops = { + .statfs = simple_statfs, + .evict_inode = nsfs_evict, +}; +static struct dentry *nsfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_pseudo(fs_type, "nsfs:", &nsfs_ops, + &ns_dentry_operations, NSFS_MAGIC); +} +static struct file_system_type nsfs = { + .name = "nsfs", + .mount = nsfs_mount, + .kill_sb = kill_anon_super, +}; + +void __init nsfs_init(void) +{ + nsfs_mnt = kern_mount(&nsfs); + if (IS_ERR(nsfs_mnt)) + panic("can't set nsfs up\n"); + nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER; +} diff --git a/fs/proc/inode.c b/fs/proc/inode.c index a212996e0987..57a9be9a6668 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -32,7 +32,6 @@ static void proc_evict_inode(struct inode *inode) { struct proc_dir_entry *de; struct ctl_table_header *head; - struct ns_common *ns; truncate_inode_pages_final(&inode->i_data); clear_inode(inode); @@ -49,10 +48,6 @@ static void proc_evict_inode(struct inode *inode) RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL); sysctl_head_put(head); } - /* Release any associated namespace */ - ns = PROC_I(inode)->ns.ns; - if (ns && ns->ops) - ns->ops->put(ns); } static struct kmem_cache * proc_inode_cachep; diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 18fc1cf899de..aaaac77abad0 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -1,10 +1,6 @@ #include #include -#include #include -#include -#include -#include #include #include #include @@ -34,139 +30,45 @@ static const struct proc_ns_operations *ns_entries[] = { &mntns_operations, }; -static const struct file_operations ns_file_operations = { - .llseek = no_llseek, -}; - -static const struct inode_operations ns_inode_operations = { - .setattr = proc_setattr, -}; - -static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) -{ - struct inode *inode = dentry->d_inode; - const struct proc_ns_operations *ns_ops = dentry->d_fsdata; - - return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", - ns_ops->name, inode->i_ino); -} - -const struct dentry_operations ns_dentry_operations = -{ - .d_delete = always_delete_dentry, - .d_dname = ns_dname, -}; - -static struct dentry *proc_ns_get_dentry(struct super_block *sb, - struct task_struct *task, const struct proc_ns_operations *ns_ops) -{ - struct dentry *dentry, *result; - struct inode *inode; - struct proc_inode *ei; - struct qstr qname = { .name = "", }; - struct ns_common *ns; - - ns = ns_ops->get(task); - if (!ns) - return ERR_PTR(-ENOENT); - - dentry = d_alloc_pseudo(sb, &qname); - if (!dentry) { - ns_ops->put(ns); - return ERR_PTR(-ENOMEM); - } - dentry->d_fsdata = (void *)ns_ops; - - inode = iget_locked(sb, ns->inum); - if (!inode) { - dput(dentry); - ns_ops->put(ns); - return ERR_PTR(-ENOMEM); - } - - ei = PROC_I(inode); - if (inode->i_state & I_NEW) { - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_op = &ns_inode_operations; - inode->i_mode = S_IFREG | S_IRUGO; - inode->i_fop = &ns_file_operations; - ei->ns.ns_ops = ns_ops; - ei->ns.ns = ns; - unlock_new_inode(inode); - } else { - ns_ops->put(ns); - } - - d_set_d_op(dentry, &ns_dentry_operations); - result = d_instantiate_unique(dentry, inode); - if (result) { - dput(dentry); - dentry = result; - } - - return dentry; -} - static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; - struct super_block *sb = inode->i_sb; - struct proc_inode *ei = PROC_I(inode); + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops; struct task_struct *task; struct path ns_path; void *error = ERR_PTR(-EACCES); task = get_proc_task(inode); if (!task) - goto out; + return error; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out_put_task; - - ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops); - if (IS_ERR(ns_path.dentry)) { - error = ERR_CAST(ns_path.dentry); - goto out_put_task; + if (ptrace_may_access(task, PTRACE_MODE_READ)) { + error = ns_get_path(&ns_path, task, ns_ops); + if (!error) + nd_jump_link(nd, &ns_path); } - - ns_path.mnt = mntget(nd->path.mnt); - nd_jump_link(nd, &ns_path); - error = NULL; - -out_put_task: put_task_struct(task); -out: return error; } static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct inode *inode = dentry->d_inode; - struct proc_inode *ei = PROC_I(inode); - const struct proc_ns_operations *ns_ops = ei->ns.ns_ops; + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops; struct task_struct *task; - struct ns_common *ns; char name[50]; int res = -EACCES; task = get_proc_task(inode); if (!task) - goto out; - - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out_put_task; + return res; - res = -ENOENT; - ns = ns_ops->get(task); - if (!ns) - goto out_put_task; - - snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns->inum); - res = readlink_copy(buffer, buflen, name); - ns_ops->put(ns); -out_put_task: + if (ptrace_may_access(task, PTRACE_MODE_READ)) { + res = ns_get_name(name, sizeof(name), task, ns_ops); + if (res >= 0) + res = readlink_copy(buffer, buflen, name); + } put_task_struct(task); -out: return res; } @@ -268,31 +170,3 @@ const struct inode_operations proc_ns_dir_inode_operations = { .getattr = pid_getattr, .setattr = proc_setattr, }; - -struct file *proc_ns_fget(int fd) -{ - struct file *file; - - file = fget(fd); - if (!file) - return ERR_PTR(-EBADF); - - if (file->f_op != &ns_file_operations) - goto out_invalid; - - return file; - -out_invalid: - fput(file); - return ERR_PTR(-EINVAL); -} - -struct ns_common *get_proc_ns(struct inode *inode) -{ - return PROC_I(inode)->ns.ns; -} - -bool proc_ns_inode(struct inode *inode) -{ - return inode->i_fop == &ns_file_operations; -} diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index ce23cf4bbe69..85a5c8c16be9 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -4,6 +4,7 @@ struct proc_ns_operations; struct ns_common { + atomic_long_t stashed; const struct proc_ns_operations *ops; unsigned int inum; }; diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 2837ff41cfe3..42dfc615dbf8 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -4,9 +4,11 @@ #ifndef _LINUX_PROC_NS_H #define _LINUX_PROC_NS_H +#include + struct pid_namespace; struct nsproxy; -struct ns_common; +struct path; struct proc_ns_operations { const char *name; @@ -38,35 +40,38 @@ enum { extern int pid_ns_prepare_proc(struct pid_namespace *ns); extern void pid_ns_release_proc(struct pid_namespace *ns); -extern struct file *proc_ns_fget(int fd); -extern struct ns_common *get_proc_ns(struct inode *); extern int proc_alloc_inum(unsigned int *pino); extern void proc_free_inum(unsigned int inum); -extern bool proc_ns_inode(struct inode *inode); #else /* CONFIG_PROC_FS */ static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; } static inline void pid_ns_release_proc(struct pid_namespace *ns) {} -static inline struct file *proc_ns_fget(int fd) -{ - return ERR_PTR(-EINVAL); -} - -static inline struct ns_common *get_proc_ns(struct inode *inode) { return NULL; } - static inline int proc_alloc_inum(unsigned int *inum) { *inum = 1; return 0; } static inline void proc_free_inum(unsigned int inum) {} -static inline bool proc_ns_inode(struct inode *inode) { return false; } #endif /* CONFIG_PROC_FS */ -#define ns_alloc_inum(ns) proc_alloc_inum(&(ns)->inum) +static inline int ns_alloc_inum(struct ns_common *ns) +{ + atomic_long_set(&ns->stashed, 0); + return proc_alloc_inum(&ns->inum); +} + #define ns_free_inum(ns) proc_free_inum((ns)->inum) +extern struct file *proc_ns_fget(int fd); +#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private) +extern void *ns_get_path(struct path *path, struct task_struct *task, + const struct proc_ns_operations *ns_ops); + +extern int ns_get_name(char *buf, size_t size, struct task_struct *task, + const struct proc_ns_operations *ns_ops); +extern void nsfs_init(void); + #endif /* _LINUX_PROC_NS_H */ diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 77c60311a6c6..7d664ea85ebd 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -72,5 +72,6 @@ #define MTD_INODE_FS_MAGIC 0x11307854 #define ANON_INODE_FS_MAGIC 0x09041934 #define BTRFS_TEST_MAGIC 0x73727279 +#define NSFS_MAGIC 0x6e736673 #endif /* __LINUX_MAGIC_H__ */ diff --git a/init/main.c b/init/main.c index 800a0daede7e..bcc75057ea87 100644 --- a/init/main.c +++ b/init/main.c @@ -78,6 +78,7 @@ #include #include #include +#include #include #include @@ -660,6 +661,7 @@ asmlinkage __visible void __init start_kernel(void) /* rootfs populating might need page-writeback */ page_writeback_init(); proc_root_init(); + nsfs_init(); cgroup_init(); cpuset_init(); taskstats_init_early(); -- cgit v1.2.3