From d07b846f6200454c50d791796edb82660192513d Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Wed, 23 Sep 2015 15:16:04 -0500
Subject: fs: Limit file caps to the user namespace of the super block

Capability sets attached to files must be ignored except in the
user namespaces where the mounter is privileged, i.e. s_user_ns
and its descendants. Otherwise a vector exists for gaining
privileges in namespaces where a user is not already privileged.

Add a new helper function, current_in_user_ns(), to test whether a user
namespace is the same as or a descendant of another namespace.
Use this helper to determine whether a file's capability set
should be applied to the caps constructed during exec.

--EWB Replaced in_userns with the simpler current_in_userns.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 security/commoncap.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'security')

diff --git a/security/commoncap.c b/security/commoncap.c
index e7fadde737f4..e109e6dac858 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -455,6 +455,8 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c
 
 	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
 		return 0;
+	if (!current_in_userns(bprm->file->f_path.mnt->mnt_sb->s_user_ns))
+		return 0;
 
 	rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
 	if (rc < 0) {
-- 
cgit v1.2.3


From 380cf5ba6b0a0b307f4afb62b186ca801defb203 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Thu, 23 Jun 2016 16:41:05 -0500
Subject: fs: Treat foreign mounts as nosuid

If a process gets access to a mount from a different user
namespace, that process should not be able to take advantage of
setuid files or selinux entrypoints from that filesystem.  Prevent
this by treating mounts from other mount namespaces and those not
owned by current_user_ns() or an ancestor as nosuid.

This will make it safer to allow more complex filesystems to be
mounted in non-root user namespaces.

This does not remove the need for MNT_LOCK_NOSUID.  The setuid,
setgid, and file capability bits can no longer be abused if code in
a user namespace were to clear nosuid on an untrusted filesystem,
but this patch, by itself, is insufficient to protect the system
from abuse of files that, when execed, would increase MAC privilege.

As a more concrete explanation, any task that can manipulate a
vfsmount associated with a given user namespace already has
capabilities in that namespace and all of its descendents.  If they
can cause a malicious setuid, setgid, or file-caps executable to
appear in that mount, then that executable will only allow them to
elevate privileges in exactly the set of namespaces in which they
are already privileges.

On the other hand, if they can cause a malicious executable to
appear with a dangerous MAC label, running it could change the
caller's security context in a way that should not have been
possible, even inside the namespace in which the task is confined.

As a hardening measure, this would have made CVE-2014-5207 much
more difficult to exploit.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Acked-by: James Morris <james.l.morris@oracle.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/exec.c                |  2 +-
 fs/namespace.c           | 13 +++++++++++++
 include/linux/mount.h    |  1 +
 security/commoncap.c     |  8 +++++++-
 security/selinux/hooks.c |  2 +-
 5 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'security')

diff --git a/fs/exec.c b/fs/exec.c
index 887c1c955df8..ca239fc86d8d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1411,7 +1411,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
 	bprm->cred->euid = current_euid();
 	bprm->cred->egid = current_egid();
 
-	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+	if (!mnt_may_suid(bprm->file->f_path.mnt))
 		return;
 
 	if (task_no_new_privs(current))
diff --git a/fs/namespace.c b/fs/namespace.c
index 9786a38d1681..aabe8e397fc3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3280,6 +3280,19 @@ static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
 	return !mnt_already_visible(ns, mnt, new_mnt_flags);
 }
 
+bool mnt_may_suid(struct vfsmount *mnt)
+{
+	/*
+	 * Foreign mounts (accessed via fchdir or through /proc
+	 * symlinks) are always treated as if they are nosuid.  This
+	 * prevents namespaces from trusting potentially unsafe
+	 * suid/sgid bits, file caps, or security labels that originate
+	 * in other namespaces.
+	 */
+	return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
+	       current_in_userns(mnt->mnt_sb->s_user_ns);
+}
+
 static struct ns_common *mntns_get(struct task_struct *task)
 {
 	struct ns_common *ns = NULL;
diff --git a/include/linux/mount.h b/include/linux/mount.h
index f822c3c11377..54a594d49733 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -81,6 +81,7 @@ extern void mntput(struct vfsmount *mnt);
 extern struct vfsmount *mntget(struct vfsmount *mnt);
 extern struct vfsmount *mnt_clone_internal(struct path *path);
 extern int __mnt_is_readonly(struct vfsmount *mnt);
+extern bool mnt_may_suid(struct vfsmount *mnt);
 
 struct path;
 extern struct vfsmount *clone_private_mount(struct path *path);
diff --git a/security/commoncap.c b/security/commoncap.c
index e109e6dac858..14540bd78561 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -453,8 +453,14 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c
 	if (!file_caps_enabled)
 		return 0;
 
-	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+	if (!mnt_may_suid(bprm->file->f_path.mnt))
 		return 0;
+
+	/*
+	 * This check is redundant with mnt_may_suid() but is kept to make
+	 * explicit that capability bits are limited to s_user_ns and its
+	 * descendants.
+	 */
 	if (!current_in_userns(bprm->file->f_path.mnt->mnt_sb->s_user_ns))
 		return 0;
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a86d537eb79b..15541756eb07 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2259,7 +2259,7 @@ static int check_nnp_nosuid(const struct linux_binprm *bprm,
 			    const struct task_security_struct *new_tsec)
 {
 	int nnp = (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS);
-	int nosuid = (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID);
+	int nosuid = !mnt_may_suid(bprm->file->f_path.mnt);
 	int rc;
 
 	if (!nnp && !nosuid)
-- 
cgit v1.2.3


From 9f50eda2a9277e0bc51d8ca5dd2ec1d0e73601bc Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Wed, 23 Sep 2015 15:16:06 -0500
Subject: Smack: Add support for unprivileged mounts from user namespaces

Security labels from unprivileged mounts cannot be trusted.
Ideally for these mounts we would assign the objects in the
filesystem the same label as the inode for the backing device
passed to mount. Unfortunately it's currently impossible to
determine which inode this is from the LSM mount hooks, so we
settle for the label of the process doing the mount.

This label is assigned to s_root, and also to smk_default to
ensure that new inodes receive this label. The transmute property
is also set on s_root to make this behavior more explicit, even
though it is technically not necessary.

If a filesystem has existing security labels, access to inodes is
permitted if the label is the same as smk_root, otherwise access
is denied. The SMACK64EXEC xattr is completely ignored.

Explicit setting of security labels continues to require
CAP_MAC_ADMIN in init_user_ns.

Altogether, this ensures that filesystem objects are not
accessible to subjects which cannot already access the backing
store, that MAC is not violated for any objects in the fileystem
which are already labeled, and that a user cannot use an
unprivileged mount to gain elevated MAC privileges.

sysfs, tmpfs, and ramfs are already mountable from user
namespaces and support security labels. We can't rule out the
possibility that these filesystems may already be used in mounts
from user namespaces with security lables set from the init
namespace, so failing to trust lables in these filesystems may
introduce regressions. It is safe to trust labels from these
filesystems, since the unprivileged user does not control the
backing store and thus cannot supply security labels, so an
explicit exception is made to trust labels from these
filesystems.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 security/smack/smack.h     |  8 +++++++-
 security/smack/smack_lsm.c | 41 ++++++++++++++++++++++++++++++-----------
 2 files changed, 37 insertions(+), 12 deletions(-)

(limited to 'security')

diff --git a/security/smack/smack.h b/security/smack/smack.h
index 6c91156ae225..26e58f1804b1 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -90,9 +90,15 @@ struct superblock_smack {
 	struct smack_known	*smk_floor;
 	struct smack_known	*smk_hat;
 	struct smack_known	*smk_default;
-	int			smk_initialized;
+	int			smk_flags;
 };
 
+/*
+ * Superblock flags
+ */
+#define SMK_SB_INITIALIZED	0x01
+#define SMK_SB_UNTRUSTED	0x02
+
 struct socket_smack {
 	struct smack_known	*smk_out;	/* outbound label */
 	struct smack_known	*smk_in;	/* inbound label */
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 6777295f4b2b..a799b7af11af 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -549,7 +549,7 @@ static int smack_sb_alloc_security(struct super_block *sb)
 	sbsp->smk_floor = &smack_known_floor;
 	sbsp->smk_hat = &smack_known_hat;
 	/*
-	 * smk_initialized will be zero from kzalloc.
+	 * SMK_SB_INITIALIZED will be zero from kzalloc.
 	 */
 	sb->s_security = sbsp;
 
@@ -766,10 +766,10 @@ static int smack_set_mnt_opts(struct super_block *sb,
 	int num_opts = opts->num_mnt_opts;
 	int transmute = 0;
 
-	if (sp->smk_initialized)
+	if (sp->smk_flags & SMK_SB_INITIALIZED)
 		return 0;
 
-	sp->smk_initialized = 1;
+	sp->smk_flags |= SMK_SB_INITIALIZED;
 
 	for (i = 0; i < num_opts; i++) {
 		switch (opts->mnt_opts_flags[i]) {
@@ -821,6 +821,17 @@ static int smack_set_mnt_opts(struct super_block *sb,
 		skp = smk_of_current();
 		sp->smk_root = skp;
 		sp->smk_default = skp;
+		/*
+		 * For a handful of fs types with no user-controlled
+		 * backing store it's okay to trust security labels
+		 * in the filesystem. The rest are untrusted.
+		 */
+		if (sb->s_user_ns != &init_user_ns &&
+		    sb->s_magic != SYSFS_MAGIC && sb->s_magic != TMPFS_MAGIC &&
+		    sb->s_magic != RAMFS_MAGIC) {
+			transmute = 1;
+			sp->smk_flags |= SMK_SB_UNTRUSTED;
+		}
 	}
 
 	/*
@@ -1203,6 +1214,7 @@ static int smack_inode_rename(struct inode *old_inode,
  */
 static int smack_inode_permission(struct inode *inode, int mask)
 {
+	struct superblock_smack *sbsp = inode->i_sb->s_security;
 	struct smk_audit_info ad;
 	int no_block = mask & MAY_NOT_BLOCK;
 	int rc;
@@ -1214,6 +1226,11 @@ static int smack_inode_permission(struct inode *inode, int mask)
 	if (mask == 0)
 		return 0;
 
+	if (sbsp->smk_flags & SMK_SB_UNTRUSTED) {
+		if (smk_of_inode(inode) != sbsp->smk_root)
+			return -EACCES;
+	}
+
 	/* May be droppable after audit */
 	if (no_block)
 		return -ECHILD;
@@ -3529,14 +3546,16 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
 			if (rc >= 0)
 				transflag = SMK_INODE_TRANSMUTE;
 		}
-		/*
-		 * Don't let the exec or mmap label be "*" or "@".
-		 */
-		skp = smk_fetch(XATTR_NAME_SMACKEXEC, inode, dp);
-		if (IS_ERR(skp) || skp == &smack_known_star ||
-		    skp == &smack_known_web)
-			skp = NULL;
-		isp->smk_task = skp;
+		if (!(sbsp->smk_flags & SMK_SB_UNTRUSTED)) {
+			/*
+			 * Don't let the exec or mmap label be "*" or "@".
+			 */
+			skp = smk_fetch(XATTR_NAME_SMACKEXEC, inode, dp);
+			if (IS_ERR(skp) || skp == &smack_known_star ||
+			    skp == &smack_known_web)
+				skp = NULL;
+			isp->smk_task = skp;
+		}
 
 		skp = smk_fetch(XATTR_NAME_SMACKMMAP, inode, dp);
 		if (IS_ERR(skp) || skp == &smack_known_star ||
-- 
cgit v1.2.3


From 809c02e091a8272bc8586a5d606565bc900f3467 Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Tue, 26 Apr 2016 14:36:22 -0500
Subject: Smack: Handle labels consistently in untrusted mounts

The SMACK64, SMACK64EXEC, and SMACK64MMAP labels are all handled
differently in untrusted mounts. This is confusing and
potentically problematic. Change this to handle them all the same
way that SMACK64 is currently handled; that is, read the label
from disk and check it at use time. For SMACK64 and SMACK64MMAP
access is denied if the label does not match smk_root. To be
consistent with suid, a SMACK64EXEC label which does not match
smk_root will still allow execution of the file but will not run
with the label supplied in the xattr.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 security/smack/smack_lsm.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

(limited to 'security')

diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index a799b7af11af..b75634dbf53b 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -919,6 +919,7 @@ static int smack_bprm_set_creds(struct linux_binprm *bprm)
 	struct inode *inode = file_inode(bprm->file);
 	struct task_smack *bsp = bprm->cred->security;
 	struct inode_smack *isp;
+	struct superblock_smack *sbsp;
 	int rc;
 
 	if (bprm->cred_prepared)
@@ -928,6 +929,11 @@ static int smack_bprm_set_creds(struct linux_binprm *bprm)
 	if (isp->smk_task == NULL || isp->smk_task == bsp->smk_task)
 		return 0;
 
+	sbsp = inode->i_sb->s_security;
+	if ((sbsp->smk_flags & SMK_SB_UNTRUSTED) &&
+	    isp->smk_task != sbsp->smk_root)
+		return 0;
+
 	if (bprm->unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
 		struct task_struct *tracer;
 		rc = 0;
@@ -1725,6 +1731,7 @@ static int smack_mmap_file(struct file *file,
 	struct task_smack *tsp;
 	struct smack_known *okp;
 	struct inode_smack *isp;
+	struct superblock_smack *sbsp;
 	int may;
 	int mmay;
 	int tmay;
@@ -1736,6 +1743,10 @@ static int smack_mmap_file(struct file *file,
 	isp = file_inode(file)->i_security;
 	if (isp->smk_mmap == NULL)
 		return 0;
+	sbsp = file_inode(file)->i_sb->s_security;
+	if (sbsp->smk_flags & SMK_SB_UNTRUSTED &&
+	    isp->smk_mmap != sbsp->smk_root)
+		return -EACCES;
 	mkp = isp->smk_mmap;
 
 	tsp = current_security();
@@ -3546,16 +3557,14 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
 			if (rc >= 0)
 				transflag = SMK_INODE_TRANSMUTE;
 		}
-		if (!(sbsp->smk_flags & SMK_SB_UNTRUSTED)) {
-			/*
-			 * Don't let the exec or mmap label be "*" or "@".
-			 */
-			skp = smk_fetch(XATTR_NAME_SMACKEXEC, inode, dp);
-			if (IS_ERR(skp) || skp == &smack_known_star ||
-			    skp == &smack_known_web)
-				skp = NULL;
-			isp->smk_task = skp;
-		}
+		/*
+		 * Don't let the exec or mmap label be "*" or "@".
+		 */
+		skp = smk_fetch(XATTR_NAME_SMACKEXEC, inode, dp);
+		if (IS_ERR(skp) || skp == &smack_known_star ||
+		    skp == &smack_known_web)
+			skp = NULL;
+		isp->smk_task = skp;
 
 		skp = smk_fetch(XATTR_NAME_SMACKMMAP, inode, dp);
 		if (IS_ERR(skp) || skp == &smack_known_star ||
-- 
cgit v1.2.3


From aad82892af261b9903cc11c55be3ecf5f0b0b4f8 Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Tue, 26 Apr 2016 14:36:20 -0500
Subject: selinux: Add support for unprivileged mounts from user namespaces

Security labels from unprivileged mounts in user namespaces must
be ignored. Force superblocks from user namespaces whose labeling
behavior is to use xattrs to use mountpoint labeling instead.
For the mountpoint label, default to converting the current task
context into a form suitable for file objects, but also allow the
policy writer to specify a different label through policy
transition rules.

Pieced together from code snippets provided by Stephen Smalley.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: James Morris <james.l.morris@oracle.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 security/selinux/hooks.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'security')

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 15541756eb07..19be9d39c742 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -830,6 +830,28 @@ static int selinux_set_mnt_opts(struct super_block *sb,
 			goto out;
 		}
 	}
+
+	/*
+	 * If this is a user namespace mount, no contexts are allowed
+	 * on the command line and security labels must be ignored.
+	 */
+	if (sb->s_user_ns != &init_user_ns) {
+		if (context_sid || fscontext_sid || rootcontext_sid ||
+		    defcontext_sid) {
+			rc = -EACCES;
+			goto out;
+		}
+		if (sbsec->behavior == SECURITY_FS_USE_XATTR) {
+			sbsec->behavior = SECURITY_FS_USE_MNTPOINT;
+			rc = security_transition_sid(current_sid(), current_sid(),
+						     SECCLASS_FILE, NULL,
+						     &sbsec->mntpoint_sid);
+			if (rc)
+				goto out;
+		}
+		goto out_set_opts;
+	}
+
 	/* sets the context of the superblock for the fs being mounted. */
 	if (fscontext_sid) {
 		rc = may_context_mount_sb_relabel(fscontext_sid, sbsec, cred);
@@ -898,6 +920,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
 		sbsec->def_sid = defcontext_sid;
 	}
 
+out_set_opts:
 	rc = sb_finish_set_opts(sb);
 out:
 	mutex_unlock(&sbsec->lock);
-- 
cgit v1.2.3


From 0b3c9761d1e405514a551ed24d3ea89aea26ce14 Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Thu, 5 Feb 2015 10:44:50 -0600
Subject: evm: Translate user/group ids relative to s_user_ns when computing
 HMAC

The EVM HMAC should be calculated using the on disk user and
group ids, so the k[ug]ids in the inode must be translated
relative to the s_user_ns of the inode's super block.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 security/integrity/evm/evm_crypto.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'security')

diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index 30b6b7d0429f..11c1d30bd705 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -151,8 +151,8 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode,
 	memset(&hmac_misc, 0, sizeof(hmac_misc));
 	hmac_misc.ino = inode->i_ino;
 	hmac_misc.generation = inode->i_generation;
-	hmac_misc.uid = from_kuid(&init_user_ns, inode->i_uid);
-	hmac_misc.gid = from_kgid(&init_user_ns, inode->i_gid);
+	hmac_misc.uid = from_kuid(inode->i_sb->s_user_ns, inode->i_uid);
+	hmac_misc.gid = from_kgid(inode->i_sb->s_user_ns, inode->i_gid);
 	hmac_misc.mode = inode->i_mode;
 	crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc));
 	if (evm_hmac_attrs & EVM_ATTR_FSUUID)
-- 
cgit v1.2.3