From 90f8572b0f021fdd1baa68e00a8c30482ee9e5f4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 29 Jun 2015 14:42:03 -0500 Subject: vfs: Commit to never having exectuables on proc and sysfs. Today proc and sysfs do not contain any executable files. Several applications today mount proc or sysfs without noexec and nosuid and then depend on there being no exectuables files on proc or sysfs. Having any executable files show on proc or sysfs would cause a user space visible regression, and most likely security problems. Therefore commit to never allowing executables on proc and sysfs by adding a new flag to mark them as filesystems without executables and enforce that flag. Test the flag where MNT_NOEXEC is tested today, so that the only user visible effect will be that exectuables will be treated as if the execute bit is cleared. The filesystems proc and sysfs do not currently incoporate any executable files so this does not result in any user visible effects. This makes it unnecessary to vet changes to proc and sysfs tightly for adding exectuable files or changes to chattr that would modify existing files, as no matter what the individual file say they will not be treated as exectuable files by the vfs. Not having to vet changes to closely is important as without this we are only one proc_create call (or another goof up in the implementation of notify_change) from having problematic executables on proc. Those mistakes are all too easy to make and would create a situation where there are security issues or the assumptions of some program having to be broken (and cause userspace regressions). Signed-off-by: "Eric W. Biederman" --- kernel/sys.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 259fda25eb6b..fa2f2f671a5c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1668,8 +1668,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) * overall picture. */ err = -EACCES; - if (!S_ISREG(inode->i_mode) || - exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) + if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path)) goto exit; err = inode_permission(inode, MAY_EXEC); -- cgit v1.2.3 From 12c641ab8270f787dfcce08b5f20ce8b65008096 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 10 Aug 2015 17:35:07 -0500 Subject: unshare: Unsharing a thread does not require unsharing a vm In the logic in the initial commit of unshare made creating a new thread group for a process, contingent upon creating a new memory address space for that process. That is wrong. Two separate processes in different thread groups can share a memory address space and clone allows creation of such proceses. This is significant because it was observed that mm_users > 1 does not mean that a process is multi-threaded, as reading /proc/PID/maps temporarily increments mm_users, which allows other processes to (accidentally) interfere with unshare() calls. Correct the check in check_unshare_flags() to test for !thread_group_empty() for CLONE_THREAD, CLONE_SIGHAND, and CLONE_VM. For sighand->count > 1 for CLONE_SIGHAND and CLONE_VM. For !current_is_single_threaded instead of mm_users > 1 for CLONE_VM. By using the correct checks in unshare this removes the possibility of an accidental denial of service attack. Additionally using the correct checks in unshare ensures that only an explicit unshare(CLONE_VM) can possibly trigger the slow path of current_is_single_threaded(). As an explict unshare(CLONE_VM) is pointless it is not expected there are many applications that make that call. Cc: stable@vger.kernel.org Fixes: b2e0d98705e60e45bbb3c0032c48824ad7ae0704 userns: Implement unshare of the user namespace Reported-by: Ricky Zhou Reported-by: Kees Cook Reviewed-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- kernel/fork.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 1bfefc6f96a4..d544ae97f999 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1866,13 +1866,21 @@ static int check_unshare_flags(unsigned long unshare_flags) CLONE_NEWUSER|CLONE_NEWPID)) return -EINVAL; /* - * Not implemented, but pretend it works if there is nothing to - * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND - * needs to unshare vm. + * Not implemented, but pretend it works if there is nothing + * to unshare. Note that unsharing the address space or the + * signal handlers also need to unshare the signal queues (aka + * CLONE_THREAD). */ if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) { - /* FIXME: get_task_mm() increments ->mm_users */ - if (atomic_read(¤t->mm->mm_users) > 1) + if (!thread_group_empty(current)) + return -EINVAL; + } + if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) { + if (atomic_read(¤t->sighand->count) > 1) + return -EINVAL; + } + if (unshare_flags & CLONE_VM) { + if (!current_is_single_threaded()) return -EINVAL; } @@ -1940,16 +1948,16 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) */ if (unshare_flags & CLONE_NEWUSER) unshare_flags |= CLONE_THREAD | CLONE_FS; - /* - * If unsharing a thread from a thread group, must also unshare vm. - */ - if (unshare_flags & CLONE_THREAD) - unshare_flags |= CLONE_VM; /* * If unsharing vm, must also unshare signal handlers. */ if (unshare_flags & CLONE_VM) unshare_flags |= CLONE_SIGHAND; + /* + * If unsharing a signal handlers, must also unshare the signal queues. + */ + if (unshare_flags & CLONE_SIGHAND) + unshare_flags |= CLONE_THREAD; /* * If unsharing namespace, must also unshare filesystem information. */ -- cgit v1.2.3 From faf00da544045fdc1454f3b9e6d7f65c841de302 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 10 Aug 2015 18:25:44 -0500 Subject: userns,pidns: Force thread group sharing, not signal handler sharing. The code that places signals in signal queues computes the uids, gids, and pids at the time the signals are enqueued. Which means that tasks that share signal queues must be in the same pid and user namespaces. Sharing signal handlers is fine, but bizarre. So make the code in fork and userns_install clearer by only testing for what is functionally necessary. Also update the comment in unshare about unsharing a user namespace to be a little more explicit and make a little more sense. Acked-by: Oleg Nesterov Signed-off-by: "Eric W. Biederman" --- kernel/fork.c | 8 ++++---- kernel/user_namespace.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index d544ae97f999..2c72b8a8ae24 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1273,10 +1273,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, /* * If the new process will be in a different pid or user namespace - * do not allow it to share a thread group or signal handlers or - * parent with the forking task. + * do not allow it to share a thread group with the forking task. */ - if (clone_flags & CLONE_SIGHAND) { + if (clone_flags & CLONE_THREAD) { if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) || (task_active_pid_ns(current) != current->nsproxy->pid_ns_for_children)) @@ -1944,7 +1943,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) int err; /* - * If unsharing a user namespace must also unshare the thread. + * If unsharing a user namespace must also unshare the thread group + * and unshare the filesystem root and working directories. */ if (unshare_flags & CLONE_NEWUSER) unshare_flags |= CLONE_THREAD | CLONE_FS; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 4109f8320684..f65a0a06a8c0 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -976,8 +976,8 @@ static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns) if (user_ns == current_user_ns()) return -EINVAL; - /* Threaded processes may not enter a different user namespace */ - if (atomic_read(¤t->mm->mm_users) > 1) + /* Tasks that share a thread group must share a user namespace */ + if (!thread_group_empty(current)) return -EINVAL; if (current->fs->users != 1) -- cgit v1.2.3