summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2010-03-07 16:41:34 -0800
committerEric W. Biederman <ebiederm@xmission.com>2011-05-10 14:31:44 -0700
commit6b4e306aa3dc94a0545eb9279475b1ab6209a31f (patch)
treeca8c6dec0805076f0b5ba7c547e3cb2004e3aea2 /fs/proc
parent0ee5623f9a6e52df90a78bd21179f8ab370e102e (diff)
downloadlwn-6b4e306aa3dc94a0545eb9279475b1ab6209a31f.tar.gz
lwn-6b4e306aa3dc94a0545eb9279475b1ab6209a31f.zip
ns: proc files for namespace naming policy.
Create files under /proc/<pid>/ns/ to allow controlling the namespaces of a process. This addresses three specific problems that can make namespaces hard to work with. - Namespaces require a dedicated process to pin them in memory. - It is not possible to use a namespace unless you are the child of the original creator. - Namespaces don't have names that userspace can use to talk about them. The namespace files under /proc/<pid>/ns/ can be opened and the file descriptor can be used to talk about a specific namespace, and to keep the specified namespace alive. A namespace can be kept alive by either holding the file descriptor open or bind mounting the file someplace else. aka: mount --bind /proc/self/ns/net /some/filesystem/path mount --bind /proc/self/fd/<N> /some/filesystem/path This allows namespaces to be named with userspace policy. It requires additional support to make use of these filedescriptors and that will be comming in the following patches. Acked-by: Daniel Lezcano <daniel.lezcano@free.fr> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/base.c20
-rw-r--r--fs/proc/inode.c7
-rw-r--r--fs/proc/internal.h18
-rw-r--r--fs/proc/namespaces.c188
5 files changed, 223 insertions, 11 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index df434c5f28fb..c1c729335924 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -20,6 +20,7 @@ proc-y += stat.o
proc-y += uptime.o
proc-y += version.o
proc-y += softirqs.o
+proc-y += namespaces.o
proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
proc-$(CONFIG_NET) += proc_net.o
proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dfa532730e55..dc8bca72b002 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -600,7 +600,7 @@ static int proc_fd_access_allowed(struct inode *inode)
return allowed;
}
-static int proc_setattr(struct dentry *dentry, struct iattr *attr)
+int proc_setattr(struct dentry *dentry, struct iattr *attr)
{
int error;
struct inode *inode = dentry->d_inode;
@@ -1736,8 +1736,7 @@ static int task_dumpable(struct task_struct *task)
return 0;
}
-
-static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
+struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
{
struct inode * inode;
struct proc_inode *ei;
@@ -1779,7 +1778,7 @@ out_unlock:
return NULL;
}
-static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
struct task_struct *task;
@@ -1820,7 +1819,7 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
* made this apply to all per process world readable and executable
* directories.
*/
-static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
+int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode;
struct task_struct *task;
@@ -1862,7 +1861,7 @@ static int pid_delete_dentry(const struct dentry * dentry)
return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
}
-static const struct dentry_operations pid_dentry_operations =
+const struct dentry_operations pid_dentry_operations =
{
.d_revalidate = pid_revalidate,
.d_delete = pid_delete_dentry,
@@ -1870,9 +1869,6 @@ static const struct dentry_operations pid_dentry_operations =
/* Lookups */
-typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
- struct task_struct *, const void *);
-
/*
* Fill a directory entry.
*
@@ -1885,8 +1881,8 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
* reported by readdir in sync with the inode numbers reported
* by stat.
*/
-static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
- char *name, int len,
+int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+ const char *name, int len,
instantiate_t instantiate, struct task_struct *task, const void *ptr)
{
struct dentry *child, *dir = filp->f_path.dentry;
@@ -2820,6 +2816,7 @@ static const struct pid_entry tgid_base_stuff[] = {
DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
+ DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
#ifdef CONFIG_NET
DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
#endif
@@ -3168,6 +3165,7 @@ out_no_task:
static const struct pid_entry tid_base_stuff[] = {
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
+ DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
REG("environ", S_IRUSR, proc_environ_operations),
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d15aa1b1cc8f..74b48cfa1bb2 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode)
{
struct proc_dir_entry *de;
struct ctl_table_header *head;
+ const struct proc_ns_operations *ns_ops;
truncate_inode_pages(&inode->i_data, 0);
end_writeback(inode);
@@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode)
rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
sysctl_head_put(head);
}
+ /* Release any associated namespace */
+ ns_ops = PROC_I(inode)->ns_ops;
+ if (ns_ops && ns_ops->put)
+ ns_ops->put(PROC_I(inode)->ns);
}
static struct kmem_cache * proc_inode_cachep;
@@ -62,6 +67,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
ei->pde = NULL;
ei->sysctl = NULL;
ei->sysctl_entry = NULL;
+ ei->ns = NULL;
+ ei->ns_ops = NULL;
inode = &ei->vfs_inode;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
return inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c03e8d3a3a5b..96245a1b1a7c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -119,3 +119,21 @@ struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
*/
int proc_readdir(struct file *, void *, filldir_t);
struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
+
+
+
+/* Lookups */
+typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
+ struct task_struct *, const void *);
+int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+ const char *name, int len,
+ instantiate_t instantiate, struct task_struct *task, const void *ptr);
+int pid_revalidate(struct dentry *dentry, struct nameidata *nd);
+struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
+extern const struct dentry_operations pid_dentry_operations;
+int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
+int proc_setattr(struct dentry *dentry, struct iattr *attr);
+
+extern const struct inode_operations proc_ns_dir_inode_operations;
+extern const struct file_operations proc_ns_dir_operations;
+
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
new file mode 100644
index 000000000000..6ae9f07d59ee
--- /dev/null
+++ b/fs/proc/namespaces.c
@@ -0,0 +1,188 @@
+#include <linux/proc_fs.h>
+#include <linux/nsproxy.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/fs_struct.h>
+#include <linux/mount.h>
+#include <linux/path.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <net/net_namespace.h>
+#include <linux/mnt_namespace.h>
+#include <linux/ipc_namespace.h>
+#include <linux/pid_namespace.h>
+#include "internal.h"
+
+
+static const struct proc_ns_operations *ns_entries[] = {
+};
+
+static const struct file_operations ns_file_operations = {
+ .llseek = no_llseek,
+};
+
+static struct dentry *proc_ns_instantiate(struct inode *dir,
+ struct dentry *dentry, struct task_struct *task, const void *ptr)
+{
+ const struct proc_ns_operations *ns_ops = ptr;
+ struct inode *inode;
+ struct proc_inode *ei;
+ struct dentry *error = ERR_PTR(-ENOENT);
+
+ inode = proc_pid_make_inode(dir->i_sb, task);
+ if (!inode)
+ goto out;
+
+ ei = PROC_I(inode);
+ inode->i_mode = S_IFREG|S_IRUSR;
+ inode->i_fop = &ns_file_operations;
+ ei->ns_ops = ns_ops;
+ ei->ns = ns_ops->get(task);
+ if (!ei->ns)
+ goto out_iput;
+
+ dentry->d_op = &pid_dentry_operations;
+ d_add(dentry, inode);
+ /* Close the race of the process dying before we return the dentry */
+ if (pid_revalidate(dentry, NULL))
+ error = NULL;
+out:
+ return error;
+out_iput:
+ iput(inode);
+ goto out;
+}
+
+static int proc_ns_fill_cache(struct file *filp, void *dirent,
+ filldir_t filldir, struct task_struct *task,
+ const struct proc_ns_operations *ops)
+{
+ return proc_fill_cache(filp, dirent, filldir,
+ ops->name, strlen(ops->name),
+ proc_ns_instantiate, task, ops);
+}
+
+static int proc_ns_dir_readdir(struct file *filp, void *dirent,
+ filldir_t filldir)
+{
+ int i;
+ struct dentry *dentry = filp->f_path.dentry;
+ struct inode *inode = dentry->d_inode;
+ struct task_struct *task = get_proc_task(inode);
+ const struct proc_ns_operations **entry, **last;
+ ino_t ino;
+ int ret;
+
+ ret = -ENOENT;
+ if (!task)
+ goto out_no_task;
+
+ ret = -EPERM;
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ goto out;
+
+ ret = 0;
+ i = filp->f_pos;
+ switch (i) {
+ case 0:
+ ino = inode->i_ino;
+ if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
+ goto out;
+ i++;
+ filp->f_pos++;
+ /* fall through */
+ case 1:
+ ino = parent_ino(dentry);
+ if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
+ goto out;
+ i++;
+ filp->f_pos++;
+ /* fall through */
+ default:
+ i -= 2;
+ if (i >= ARRAY_SIZE(ns_entries)) {
+ ret = 1;
+ goto out;
+ }
+ entry = ns_entries + i;
+ last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
+ while (entry <= last) {
+ if (proc_ns_fill_cache(filp, dirent, filldir,
+ task, *entry) < 0)
+ goto out;
+ filp->f_pos++;
+ entry++;
+ }
+ }
+
+ ret = 1;
+out:
+ put_task_struct(task);
+out_no_task:
+ return ret;
+}
+
+const struct file_operations proc_ns_dir_operations = {
+ .read = generic_read_dir,
+ .readdir = proc_ns_dir_readdir,
+};
+
+static struct dentry *proc_ns_dir_lookup(struct inode *dir,
+ struct dentry *dentry, struct nameidata *nd)
+{
+ struct dentry *error;
+ struct task_struct *task = get_proc_task(dir);
+ const struct proc_ns_operations **entry, **last;
+ unsigned int len = dentry->d_name.len;
+
+ error = ERR_PTR(-ENOENT);
+
+ if (!task)
+ goto out_no_task;
+
+ error = ERR_PTR(-EPERM);
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
+ goto out;
+
+ last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
+ for (entry = ns_entries; entry <= last; entry++) {
+ if (strlen((*entry)->name) != len)
+ continue;
+ if (!memcmp(dentry->d_name.name, (*entry)->name, len))
+ break;
+ }
+ if (entry > last)
+ goto out;
+
+ error = proc_ns_instantiate(dir, dentry, task, *entry);
+out:
+ put_task_struct(task);
+out_no_task:
+ return error;
+}
+
+const struct inode_operations proc_ns_dir_inode_operations = {
+ .lookup = proc_ns_dir_lookup,
+ .getattr = pid_getattr,
+ .setattr = proc_setattr,
+};
+
+struct file *proc_ns_fget(int fd)
+{
+ struct file *file;
+
+ file = fget(fd);
+ if (!file)
+ return ERR_PTR(-EBADF);
+
+ if (file->f_op != &ns_file_operations)
+ goto out_invalid;
+
+ return file;
+
+out_invalid:
+ fput(file);
+ return ERR_PTR(-EINVAL);
+}
+