From 21e851943e31022731cd5fad386ca8fb552dbe64 Mon Sep 17 00:00:00 2001
From: "Raphael S.Carvalho" <raphael.scarv@gmail.com>
Date: Wed, 27 Feb 2013 15:32:09 -0300
Subject: kernel/nsproxy.c: Improving a snippet of code.

It seems GCC generates a better code in that way, so I changed that statement.
Btw, they have the same semantic, so I'm sending this patch due to performance issues.

Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: Raphael S.Carvalho <raphael.scarv@gmail.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 kernel/nsproxy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 364ceab15f0c..d9afd256318f 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -148,7 +148,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 	 * means share undolist with parent, so we must forbid using
 	 * it along with CLONE_NEWIPC.
 	 */
-	if ((flags & CLONE_NEWIPC) && (flags & CLONE_SYSVSEM)) {
+	if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) ==
+		(CLONE_NEWIPC | CLONE_SYSVSEM)) {
 		err = -EINVAL;
 		goto out;
 	}
-- 
cgit v1.2.3


From e51db73532955dc5eaba4235e62b74b460709d5b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 30 Mar 2013 19:57:41 -0700
Subject: userns: Better restrictions on when proc and sysfs can be mounted

Rely on the fact that another flavor of the filesystem is already
mounted and do not rely on state in the user namespace.

Verify that the mounted filesystem is not covered in any significant
way.  I would love to verify that the previously mounted filesystem
has no mounts on top but there are at least the directories
/proc/sys/fs/binfmt_misc and /sys/fs/cgroup/ that exist explicitly
for other filesystems to mount on top of.

Refactor the test into a function named fs_fully_visible and call that
function from the mount routines of proc and sysfs.  This makes this
test local to the filesystems involved and the results current of when
the mounts take place, removing a weird threading of the user
namespace, the mount namespace and the filesystems themselves.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c                 | 37 +++++++++++++++++++++++++------------
 fs/proc/root.c                 |  7 +++++--
 fs/sysfs/mount.c               |  3 ++-
 include/linux/fs.h             |  1 +
 include/linux/user_namespace.h |  4 ----
 kernel/user.c                  |  2 --
 kernel/user_namespace.c        |  2 --
 7 files changed, 33 insertions(+), 23 deletions(-)

(limited to 'kernel')

diff --git a/fs/namespace.c b/fs/namespace.c
index 64627f883bf2..877e4277f496 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2867,25 +2867,38 @@ bool current_chrooted(void)
 	return chrooted;
 }
 
-void update_mnt_policy(struct user_namespace *userns)
+bool fs_fully_visible(struct file_system_type *type)
 {
 	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
 	struct mount *mnt;
+	bool visible = false;
 
-	down_read(&namespace_sem);
+	if (unlikely(!ns))
+		return false;
+
+	namespace_lock();
 	list_for_each_entry(mnt, &ns->list, mnt_list) {
-		switch (mnt->mnt.mnt_sb->s_magic) {
-		case SYSFS_MAGIC:
-			userns->may_mount_sysfs = true;
-			break;
-		case PROC_SUPER_MAGIC:
-			userns->may_mount_proc = true;
-			break;
+		struct mount *child;
+		if (mnt->mnt.mnt_sb->s_type != type)
+			continue;
+
+		/* This mount is not fully visible if there are any child mounts
+		 * that cover anything except for empty directories.
+		 */
+		list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+			struct inode *inode = child->mnt_mountpoint->d_inode;
+			if (!S_ISDIR(inode->i_mode))
+				goto next;
+			if (inode->i_nlink != 2)
+				goto next;
 		}
-		if (userns->may_mount_sysfs && userns->may_mount_proc)
-			break;
+		visible = true;
+		goto found;
+	next:	;
 	}
-	up_read(&namespace_sem);
+found:
+	namespace_unlock();
+	return visible;
 }
 
 static void *mntns_get(struct task_struct *task)
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 38bd5d423fcd..45e5fb7da09b 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -110,8 +110,11 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
 		ns = task_active_pid_ns(current);
 		options = data;
 
-		if (!current_user_ns()->may_mount_proc ||
-		    !ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+		if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
+			return ERR_PTR(-EPERM);
+
+		/* Does the mounter have privilege over the pid namespace? */
+		if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
 			return ERR_PTR(-EPERM);
 	}
 
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index afd83273e6ce..4a2da3a4b1b1 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -112,7 +112,8 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
 	struct super_block *sb;
 	int error;
 
-	if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
+	if (!(flags & MS_KERNMOUNT) && !capable(CAP_SYS_ADMIN) &&
+	    !fs_fully_visible(fs_type))
 		return ERR_PTR(-EPERM);
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 981874773e85..3050c620f062 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1897,6 +1897,7 @@ extern int vfs_ustat(dev_t, struct kstatfs *);
 extern int freeze_super(struct super_block *super);
 extern int thaw_super(struct super_block *super);
 extern bool our_mnt(struct vfsmount *mnt);
+extern bool fs_fully_visible(struct file_system_type *);
 
 extern int current_umask(void);
 
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index b6b215f13b45..4ce009324933 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -26,8 +26,6 @@ struct user_namespace {
 	kuid_t			owner;
 	kgid_t			group;
 	unsigned int		proc_inum;
-	bool			may_mount_sysfs;
-	bool			may_mount_proc;
 };
 
 extern struct user_namespace init_user_ns;
@@ -84,6 +82,4 @@ static inline void put_user_ns(struct user_namespace *ns)
 
 #endif
 
-void update_mnt_policy(struct user_namespace *userns);
-
 #endif /* _LINUX_USER_H */
diff --git a/kernel/user.c b/kernel/user.c
index 69b4c3d48cde..5bbb91988e69 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,8 +51,6 @@ struct user_namespace init_user_ns = {
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
 	.proc_inum = PROC_USER_INIT_INO,
-	.may_mount_sysfs = true,
-	.may_mount_proc = true,
 };
 EXPORT_SYMBOL_GPL(init_user_ns);
 
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index d8c30db06c5b..d58ad1e7a794 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -97,8 +97,6 @@ int create_user_ns(struct cred *new)
 
 	set_cred_user_ns(new, ns);
 
-	update_mnt_policy(ns);
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From a606488513543312805fab2b93070cefe6a3016c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 29 Aug 2013 13:56:50 -0700
Subject: pidns: Fix hang in zap_pid_ns_processes by sending a potentially
 extra wakeup

Serge Hallyn <serge.hallyn@ubuntu.com> writes:

> Since commit af4b8a83add95ef40716401395b44a1b579965f4 it's been
> possible to get into a situation where a pidns reaper is
> <defunct>, reparented to host pid 1, but never reaped.  How to
> reproduce this is documented at
>
> https://bugs.launchpad.net/ubuntu/+source/lxc/+bug/1168526
> (and see
> https://bugs.launchpad.net/ubuntu/+source/lxc/+bug/1168526/comments/13)
> In short, run repeated starts of a container whose init is
>
> Process.exit(0);
>
> sysrq-t when such a task is playing zombie shows:
>
> [  131.132978] init            x ffff88011fc14580     0  2084   2039 0x00000000
> [  131.132978]  ffff880116e89ea8 0000000000000002 ffff880116e89fd8 0000000000014580
> [  131.132978]  ffff880116e89fd8 0000000000014580 ffff8801172a0000 ffff8801172a0000
> [  131.132978]  ffff8801172a0630 ffff88011729fff0 ffff880116e14650 ffff88011729fff0
> [  131.132978] Call Trace:
> [  131.132978]  [<ffffffff816f6159>] schedule+0x29/0x70
> [  131.132978]  [<ffffffff81064591>] do_exit+0x6e1/0xa40
> [  131.132978]  [<ffffffff81071eae>] ? signal_wake_up_state+0x1e/0x30
> [  131.132978]  [<ffffffff8106496f>] do_group_exit+0x3f/0xa0
> [  131.132978]  [<ffffffff810649e4>] SyS_exit_group+0x14/0x20
> [  131.132978]  [<ffffffff8170102f>] tracesys+0xe1/0xe6
>
> Further debugging showed that every time this happened, zap_pid_ns_processes()
> started with nr_hashed being 3, while we were expecting it to drop to 2.
> Any time it didn't happen, nr_hashed was 1 or 2.  So the reaper was
> waiting for nr_hashed to become 2, but free_pid() only wakes the reaper
> if nr_hashed hits 1.

The issue is that when the task group leader of an init process exits
before other tasks of the init process when the init process finally
exits it will be a secondary task sleeping in zap_pid_ns_processes and
waiting to wake up when the number of hashed pids drops to two.  This
case waits forever as free_pid only sends a wake up when the number of
hashed pids drops to 1.

To correct this the simple strategy of sending a possibly unncessary
wake up when the number of hashed pids drops to 2 is adopted.

Sending one extraneous wake up is relatively harmless, at worst we
waste a little cpu time in the rare case when a pid namespace
appropaches exiting.

We can detect the case when the pid namespace drops to just two pids
hashed race free in free_pid.

Dereferencing pid_ns->child_reaper with the pidmap_lock held is safe
without out the tasklist_lock because it is guaranteed that the
detach_pid will be called on the child_reaper before it is freed and
detach_pid calls __change_pid which calls free_pid which takes the
pidmap_lock.  __change_pid only calls free_pid if this is the
last use of the pid.  For a thread that is not the thread group leader
the threads pid will only ever have one user because a threads pid
is not allowed to be the pid of a process, of a process group or
a session.  For a thread that is a thread group leader all of
the other threads of that process will be reaped before it is allowed
for the thread group leader to be reaped ensuring there will only
be one user of the threads pid as a process pid.  Furthermore
because the thread is the init process of a pid namespace all of the
other processes in the pid namespace will have also been already freed
leading to the fact that the pid will not be used as a session pid or
a process group pid for any other running process.

CC: stable@vger.kernel.org
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Tested-by: Serge Hallyn <serge.hallyn@canonical.com>
Reported-by: Serge Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/pid.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/pid.c b/kernel/pid.c
index 66505c1dfc51..ebe5e80b10f8 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -265,6 +265,7 @@ void free_pid(struct pid *pid)
 		struct pid_namespace *ns = upid->ns;
 		hlist_del_rcu(&upid->pid_chain);
 		switch(--ns->nr_hashed) {
+		case 2:
 		case 1:
 			/* When all that is left in the pid namespace
 			 * is the reaper wake up the reaper.  The reaper
-- 
cgit v1.2.3


From dbef0c1c4c5f8ce5d1f5bd8cee092a7afb4ac21b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 9 Mar 2013 16:15:23 -0800
Subject: namespaces: Simplify copy_namespaces so it is clear what is going on.

Remove the test for the impossible case where tsk->nsproxy == NULL.  Fork
will never be called with tsk->nsproxy == NULL.

Only call get_nsproxy when we don't need to generate a new_nsproxy,
and mark the case where we don't generate a new nsproxy as likely.

Remove the code to drop an unnecessarily acquired nsproxy value.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/nsproxy.c | 35 +++++++++++------------------------
 1 file changed, 11 insertions(+), 24 deletions(-)

(limited to 'kernel')

diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index d9afd256318f..a1ed01139276 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -125,22 +125,16 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 	struct nsproxy *old_ns = tsk->nsproxy;
 	struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
 	struct nsproxy *new_ns;
-	int err = 0;
 
-	if (!old_ns)
+	if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+			      CLONE_NEWPID | CLONE_NEWNET)))) {
+		get_nsproxy(old_ns);
 		return 0;
-
-	get_nsproxy(old_ns);
-
-	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-				CLONE_NEWPID | CLONE_NEWNET)))
-		return 0;
-
-	if (!ns_capable(user_ns, CAP_SYS_ADMIN)) {
-		err = -EPERM;
-		goto out;
 	}
 
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
+		return -EPERM;
+
 	/*
 	 * CLONE_NEWIPC must detach from the undolist: after switching
 	 * to a new ipc namespace, the semaphore arrays from the old
@@ -149,22 +143,15 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 	 * it along with CLONE_NEWIPC.
 	 */
 	if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) ==
-		(CLONE_NEWIPC | CLONE_SYSVSEM)) {
-		err = -EINVAL;
-		goto out;
-	}
+		(CLONE_NEWIPC | CLONE_SYSVSEM)) 
+		return -EINVAL;
 
 	new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs);
-	if (IS_ERR(new_ns)) {
-		err = PTR_ERR(new_ns);
-		goto out;
-	}
+	if (IS_ERR(new_ns))
+		return  PTR_ERR(new_ns);
 
 	tsk->nsproxy = new_ns;
-
-out:
-	put_nsproxy(old_ns);
-	return err;
+	return 0;
 }
 
 void free_nsproxy(struct nsproxy *ns)
-- 
cgit v1.2.3


From 6e556ce209b09528dbf1931cbfd5d323e1345926 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 5 Mar 2013 13:59:48 -0800
Subject: pidns: Don't have unshare(CLONE_NEWPID) imply CLONE_THREAD

I goofed when I made unshare(CLONE_NEWPID) only work in a
single-threaded process.  There is no need for that requirement and in
fact I analyzied things right for setns.  The hard requirement
is for tasks that share a VM to all be in the pid namespace and
we properly prevent that in do_fork.

Just to be certain I took a look through do_wait and
forget_original_parent and there are no cases that make it any harder
for children to be in the multiple pid namespaces than it is for
children to be in the same pid namespace.  I also performed a check to
see if there were in uses of task->nsproxy_pid_ns I was not familiar
with, but it is only used when allocating a new pid for a new task,
and in checks to prevent craziness from happening.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/fork.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 66635c80a813..eb45f1d72703 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1817,11 +1817,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 	 */
 	if (unshare_flags & CLONE_NEWUSER)
 		unshare_flags |= CLONE_THREAD | CLONE_FS;
-	/*
-	 * If unsharing a pid namespace must also unshare the thread.
-	 */
-	if (unshare_flags & CLONE_NEWPID)
-		unshare_flags |= CLONE_THREAD;
 	/*
 	 * If unsharing a thread from a thread group, must also unshare vm.
 	 */
-- 
cgit v1.2.3


From c7b96acf1456ef127fef461fcfedb54b81fecfbb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 20 Mar 2013 12:49:49 -0700
Subject: userns:  Kill nsown_capable it makes the wrong thing easy

nsown_capable is a special case of ns_capable essentially for just CAP_SETUID and
CAP_SETGID.  For the existing users it doesn't noticably simplify things and
from the suggested patches I have seen it encourages people to do the wrong
thing.  So remove nsown_capable.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c             |  4 ++--
 fs/open.c                  |  2 +-
 include/linux/capability.h |  1 -
 ipc/namespace.c            |  2 +-
 kernel/capability.c        | 12 ------------
 kernel/groups.c            |  2 +-
 kernel/pid_namespace.c     |  2 +-
 kernel/sys.c               | 20 ++++++++++----------
 kernel/uid16.c             |  2 +-
 kernel/utsname.c           |  2 +-
 net/core/net_namespace.c   |  2 +-
 net/core/scm.c             |  4 ++--
 12 files changed, 21 insertions(+), 34 deletions(-)

(limited to 'kernel')

diff --git a/fs/namespace.c b/fs/namespace.c
index 877e4277f496..dc519a1437ee 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2929,8 +2929,8 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
 	struct path root;
 
 	if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
-	    !nsown_capable(CAP_SYS_CHROOT) ||
-	    !nsown_capable(CAP_SYS_ADMIN))
+	    !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
+	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
 		return -EPERM;
 
 	if (fs->users != 1)
diff --git a/fs/open.c b/fs/open.c
index 9156cb050d08..1c9d23f7e683 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -443,7 +443,7 @@ retry:
 		goto dput_and_out;
 
 	error = -EPERM;
-	if (!nsown_capable(CAP_SYS_CHROOT))
+	if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
 		goto dput_and_out;
 	error = security_path_chroot(&path);
 	if (error)
diff --git a/include/linux/capability.h b/include/linux/capability.h
index d9a4f7f40f32..a6ee1f9a5018 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -210,7 +210,6 @@ extern bool has_ns_capability_noaudit(struct task_struct *t,
 				      struct user_namespace *ns, int cap);
 extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
-extern bool nsown_capable(int cap);
 extern bool inode_capable(const struct inode *inode, int cap);
 extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
 
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 7ee61bf44933..4be6581d3b7f 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -171,7 +171,7 @@ static int ipcns_install(struct nsproxy *nsproxy, void *new)
 {
 	struct ipc_namespace *ns = new;
 	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
-	    !nsown_capable(CAP_SYS_ADMIN))
+	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
 		return -EPERM;
 
 	/* Ditch state from the old ipc namespace */
diff --git a/kernel/capability.c b/kernel/capability.c
index f6c2ce5701e1..6fc1c8af44df 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -432,18 +432,6 @@ bool capable(int cap)
 }
 EXPORT_SYMBOL(capable);
 
-/**
- * nsown_capable - Check superior capability to one's own user_ns
- * @cap: The capability in question
- *
- * Return true if the current task has the given superior capability
- * targeted at its own user namespace.
- */
-bool nsown_capable(int cap)
-{
-	return ns_capable(current_user_ns(), cap);
-}
-
 /**
  * inode_capable - Check superior capability over inode
  * @inode: The inode in question
diff --git a/kernel/groups.c b/kernel/groups.c
index 6b2588dd04ff..90cf1c38c8ea 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -233,7 +233,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
 	struct group_info *group_info;
 	int retval;
 
-	if (!nsown_capable(CAP_SETGID))
+	if (!ns_capable(current_user_ns(), CAP_SETGID))
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 6917e8edb48e..ee1f6bb83d67 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -329,7 +329,7 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns)
 	struct pid_namespace *ancestor, *new = ns;
 
 	if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
-	    !nsown_capable(CAP_SYS_ADMIN))
+	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
 		return -EPERM;
 
 	/*
diff --git a/kernel/sys.c b/kernel/sys.c
index 771129b299f8..c18ecca575b4 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -337,7 +337,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
 	if (rgid != (gid_t) -1) {
 		if (gid_eq(old->gid, krgid) ||
 		    gid_eq(old->egid, krgid) ||
-		    nsown_capable(CAP_SETGID))
+		    ns_capable(old->user_ns, CAP_SETGID))
 			new->gid = krgid;
 		else
 			goto error;
@@ -346,7 +346,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
 		if (gid_eq(old->gid, kegid) ||
 		    gid_eq(old->egid, kegid) ||
 		    gid_eq(old->sgid, kegid) ||
-		    nsown_capable(CAP_SETGID))
+		    ns_capable(old->user_ns, CAP_SETGID))
 			new->egid = kegid;
 		else
 			goto error;
@@ -387,7 +387,7 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
 	old = current_cred();
 
 	retval = -EPERM;
-	if (nsown_capable(CAP_SETGID))
+	if (ns_capable(old->user_ns, CAP_SETGID))
 		new->gid = new->egid = new->sgid = new->fsgid = kgid;
 	else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid))
 		new->egid = new->fsgid = kgid;
@@ -471,7 +471,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
 		new->uid = kruid;
 		if (!uid_eq(old->uid, kruid) &&
 		    !uid_eq(old->euid, kruid) &&
-		    !nsown_capable(CAP_SETUID))
+		    !ns_capable(old->user_ns, CAP_SETUID))
 			goto error;
 	}
 
@@ -480,7 +480,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
 		if (!uid_eq(old->uid, keuid) &&
 		    !uid_eq(old->euid, keuid) &&
 		    !uid_eq(old->suid, keuid) &&
-		    !nsown_capable(CAP_SETUID))
+		    !ns_capable(old->user_ns, CAP_SETUID))
 			goto error;
 	}
 
@@ -534,7 +534,7 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
 	old = current_cred();
 
 	retval = -EPERM;
-	if (nsown_capable(CAP_SETUID)) {
+	if (ns_capable(old->user_ns, CAP_SETUID)) {
 		new->suid = new->uid = kuid;
 		if (!uid_eq(kuid, old->uid)) {
 			retval = set_user(new);
@@ -591,7 +591,7 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
 	old = current_cred();
 
 	retval = -EPERM;
-	if (!nsown_capable(CAP_SETUID)) {
+	if (!ns_capable(old->user_ns, CAP_SETUID)) {
 		if (ruid != (uid_t) -1        && !uid_eq(kruid, old->uid) &&
 		    !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
 			goto error;
@@ -673,7 +673,7 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
 	old = current_cred();
 
 	retval = -EPERM;
-	if (!nsown_capable(CAP_SETGID)) {
+	if (!ns_capable(old->user_ns, CAP_SETGID)) {
 		if (rgid != (gid_t) -1        && !gid_eq(krgid, old->gid) &&
 		    !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
 			goto error;
@@ -744,7 +744,7 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
 
 	if (uid_eq(kuid, old->uid)  || uid_eq(kuid, old->euid)  ||
 	    uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) ||
-	    nsown_capable(CAP_SETUID)) {
+	    ns_capable(old->user_ns, CAP_SETUID)) {
 		if (!uid_eq(kuid, old->fsuid)) {
 			new->fsuid = kuid;
 			if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
@@ -783,7 +783,7 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
 
 	if (gid_eq(kgid, old->gid)  || gid_eq(kgid, old->egid)  ||
 	    gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) ||
-	    nsown_capable(CAP_SETGID)) {
+	    ns_capable(old->user_ns, CAP_SETGID)) {
 		if (!gid_eq(kgid, old->fsgid)) {
 			new->fsgid = kgid;
 			goto change_okay;
diff --git a/kernel/uid16.c b/kernel/uid16.c
index f6c83d7ef000..602e5bbbceff 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
 	struct group_info *group_info;
 	int retval;
 
-	if (!nsown_capable(CAP_SETGID))
+	if (!ns_capable(current_user_ns(), CAP_SETGID))
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 2fc8576efaa8..fd393124e507 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -114,7 +114,7 @@ static int utsns_install(struct nsproxy *nsproxy, void *new)
 	struct uts_namespace *ns = new;
 
 	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
-	    !nsown_capable(CAP_SYS_ADMIN))
+	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
 		return -EPERM;
 
 	get_uts_ns(ns);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index f97652036754..81d3a9a08453 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -651,7 +651,7 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
 	struct net *net = ns;
 
 	if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
-	    !nsown_capable(CAP_SYS_ADMIN))
+	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
 		return -EPERM;
 
 	put_net(nsproxy->net_ns);
diff --git a/net/core/scm.c b/net/core/scm.c
index 03795d0147f2..c346f58d97c2 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -56,9 +56,9 @@ static __inline__ int scm_check_creds(struct ucred *creds)
 	if ((creds->pid == task_tgid_vnr(current) ||
 	     ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) &&
 	    ((uid_eq(uid, cred->uid)   || uid_eq(uid, cred->euid) ||
-	      uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
+	      uid_eq(uid, cred->suid)) || ns_capable(cred->user_ns, CAP_SETUID)) &&
 	    ((gid_eq(gid, cred->gid)   || gid_eq(gid, cred->egid) ||
-	      gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) {
+	      gid_eq(gid, cred->sgid)) || ns_capable(cred->user_ns, CAP_SETGID))) {
 	       return 0;
 	}
 	return -EPERM;
-- 
cgit v1.2.3