summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-17 12:31:40 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-17 12:31:40 -0800
commit87c31b39abcb6fb6bd7d111200c9627a594bf6a9 (patch)
treeab2e5331fea9b823cb92719d0954a9141451c931 /tools
parentf045bbb9fa1bf6f507ad4de12d4e3471d8f672f1 (diff)
parentdb86da7cb76f797a1a8b445166a15cb922c6ff85 (diff)
downloadlwn-87c31b39abcb6fb6bd7d111200c9627a594bf6a9.tar.gz
lwn-87c31b39abcb6fb6bd7d111200c9627a594bf6a9.zip
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull user namespace related fixes from Eric Biederman: "As these are bug fixes almost all of thes changes are marked for backporting to stable. The first change (implicitly adding MNT_NODEV on remount) addresses a regression that was created when security issues with unprivileged remount were closed. I go on to update the remount test to make it easy to detect if this issue reoccurs. Then there are a handful of mount and umount related fixes. Then half of the changes deal with the a recently discovered design bug in the permission checks of gid_map. Unix since the beginning has allowed setting group permissions on files to less than the user and other permissions (aka ---rwx---rwx). As the unix permission checks stop as soon as a group matches, and setgroups allows setting groups that can not later be dropped, results in a situtation where it is possible to legitimately use a group to assign fewer privileges to a process. Which means dropping a group can increase a processes privileges. The fix I have adopted is that gid_map is now no longer writable without privilege unless the new file /proc/self/setgroups has been set to permanently disable setgroups. The bulk of user namespace using applications even the applications using applications using user namespaces without privilege remain unaffected by this change. Unfortunately this ix breaks a couple user space applications, that were relying on the problematic behavior (one of which was tools/selftests/mount/unprivileged-remount-test.c). To hopefully prevent needing a regression fix on top of my security fix I rounded folks who work with the container implementations mostly like to be affected and encouraged them to test the changes. > So far nothing broke on my libvirt-lxc test bed. :-) > Tested with openSUSE 13.2 and libvirt 1.2.9. > Tested-by: Richard Weinberger <richard@nod.at> > Tested on Fedora20 with libvirt 1.2.11, works fine. > Tested-by: Chen Hanxiao <chenhanxiao@cn.fujitsu.com> > Ok, thanks - yes, unprivileged lxc is working fine with your kernels. > Just to be sure I was testing the right thing I also tested using > my unprivileged nsexec testcases, and they failed on setgroup/setgid > as now expected, and succeeded there without your patches. > Tested-by: Serge Hallyn <serge.hallyn@ubuntu.com> > I tested this with Sandstorm. It breaks as is and it works if I add > the setgroups thing. > Tested-by: Andy Lutomirski <luto@amacapital.net> # breaks things as designed :(" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: userns: Unbreak the unprivileged remount tests userns; Correct the comment in map_write userns: Allow setting gid_maps without privilege when setgroups is disabled userns: Add a knob to disable setgroups on a per user namespace basis userns: Rename id_map_mutex to userns_state_mutex userns: Only allow the creator of the userns unprivileged mappings userns: Check euid no fsuid when establishing an unprivileged uid mapping userns: Don't allow unprivileged creation of gid mappings userns: Don't allow setgroups until a gid mapping has been setablished userns: Document what the invariant required for safe unprivileged mappings. groups: Consolidate the setgroups permission checks mnt: Clear mnt_expire during pivot_root mnt: Carefully set CL_UNPRIVILEGED in clone_mnt mnt: Move the clear of MNT_LOCKED from copy_tree to it's callers. umount: Do not allow unmounting rootfs. umount: Disallow unprivileged mount force mnt: Update unprivileged remount test mnt: Implicitly add MNT_NODEV on remount when it was implicitly added by mount
Diffstat (limited to 'tools')
-rw-r--r--tools/testing/selftests/mount/unprivileged-remount-test.c204
1 files changed, 166 insertions, 38 deletions
diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c
index 1b3ff2fda4d0..517785052f1c 100644
--- a/tools/testing/selftests/mount/unprivileged-remount-test.c
+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
@@ -6,6 +6,8 @@
#include <sys/types.h>
#include <sys/mount.h>
#include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
@@ -32,11 +34,14 @@
# define CLONE_NEWPID 0x20000000
#endif
+#ifndef MS_REC
+# define MS_REC 16384
+#endif
#ifndef MS_RELATIME
-#define MS_RELATIME (1 << 21)
+# define MS_RELATIME (1 << 21)
#endif
#ifndef MS_STRICTATIME
-#define MS_STRICTATIME (1 << 24)
+# define MS_STRICTATIME (1 << 24)
#endif
static void die(char *fmt, ...)
@@ -48,17 +53,14 @@ static void die(char *fmt, ...)
exit(EXIT_FAILURE);
}
-static void write_file(char *filename, char *fmt, ...)
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
{
char buf[4096];
int fd;
ssize_t written;
int buf_len;
- va_list ap;
- va_start(ap, fmt);
buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
- va_end(ap);
if (buf_len < 0) {
die("vsnprintf failed: %s\n",
strerror(errno));
@@ -69,6 +71,8 @@ static void write_file(char *filename, char *fmt, ...)
fd = open(filename, O_WRONLY);
if (fd < 0) {
+ if ((errno == ENOENT) && enoent_ok)
+ return;
die("open of %s failed: %s\n",
filename, strerror(errno));
}
@@ -87,6 +91,65 @@ static void write_file(char *filename, char *fmt, ...)
}
}
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(true, filename, fmt, ap);
+ va_end(ap);
+
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(false, filename, fmt, ap);
+ va_end(ap);
+
+}
+
+static int read_mnt_flags(const char *path)
+{
+ int ret;
+ struct statvfs stat;
+ int mnt_flags;
+
+ ret = statvfs(path, &stat);
+ if (ret != 0) {
+ die("statvfs of %s failed: %s\n",
+ path, strerror(errno));
+ }
+ if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \
+ ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \
+ ST_SYNCHRONOUS | ST_MANDLOCK)) {
+ die("Unrecognized mount flags\n");
+ }
+ mnt_flags = 0;
+ if (stat.f_flag & ST_RDONLY)
+ mnt_flags |= MS_RDONLY;
+ if (stat.f_flag & ST_NOSUID)
+ mnt_flags |= MS_NOSUID;
+ if (stat.f_flag & ST_NODEV)
+ mnt_flags |= MS_NODEV;
+ if (stat.f_flag & ST_NOEXEC)
+ mnt_flags |= MS_NOEXEC;
+ if (stat.f_flag & ST_NOATIME)
+ mnt_flags |= MS_NOATIME;
+ if (stat.f_flag & ST_NODIRATIME)
+ mnt_flags |= MS_NODIRATIME;
+ if (stat.f_flag & ST_RELATIME)
+ mnt_flags |= MS_RELATIME;
+ if (stat.f_flag & ST_SYNCHRONOUS)
+ mnt_flags |= MS_SYNCHRONOUS;
+ if (stat.f_flag & ST_MANDLOCK)
+ mnt_flags |= ST_MANDLOCK;
+
+ return mnt_flags;
+}
+
static void create_and_enter_userns(void)
{
uid_t uid;
@@ -100,13 +163,10 @@ static void create_and_enter_userns(void)
strerror(errno));
}
+ maybe_write_file("/proc/self/setgroups", "deny");
write_file("/proc/self/uid_map", "0 %d 1", uid);
write_file("/proc/self/gid_map", "0 %d 1", gid);
- if (setgroups(0, NULL) != 0) {
- die("setgroups failed: %s\n",
- strerror(errno));
- }
if (setgid(0) != 0) {
die ("setgid(0) failed %s\n",
strerror(errno));
@@ -118,7 +178,8 @@ static void create_and_enter_userns(void)
}
static
-bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
+bool test_unpriv_remount(const char *fstype, const char *mount_options,
+ int mount_flags, int remount_flags, int invalid_flags)
{
pid_t child;
@@ -151,9 +212,11 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
strerror(errno));
}
- if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) {
- die("mount of /tmp failed: %s\n",
- strerror(errno));
+ if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) {
+ die("mount of %s with options '%s' on /tmp failed: %s\n",
+ fstype,
+ mount_options? mount_options : "",
+ strerror(errno));
}
create_and_enter_userns();
@@ -181,62 +244,127 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
static bool test_unpriv_remount_simple(int mount_flags)
{
- return test_unpriv_remount(mount_flags, mount_flags, 0);
+ return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0);
}
static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
{
- return test_unpriv_remount(mount_flags, mount_flags, invalid_flags);
+ return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags,
+ invalid_flags);
+}
+
+static bool test_priv_mount_unpriv_remount(void)
+{
+ pid_t child;
+ int ret;
+ const char *orig_path = "/dev";
+ const char *dest_path = "/tmp";
+ int orig_mnt_flags, remount_mnt_flags;
+
+ child = fork();
+ if (child == -1) {
+ die("fork failed: %s\n",
+ strerror(errno));
+ }
+ if (child != 0) { /* parent */
+ pid_t pid;
+ int status;
+ pid = waitpid(child, &status, 0);
+ if (pid == -1) {
+ die("waitpid failed: %s\n",
+ strerror(errno));
+ }
+ if (pid != child) {
+ die("waited for %d got %d\n",
+ child, pid);
+ }
+ if (!WIFEXITED(status)) {
+ die("child did not terminate cleanly\n");
+ }
+ return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ }
+
+ orig_mnt_flags = read_mnt_flags(orig_path);
+
+ create_and_enter_userns();
+ ret = unshare(CLONE_NEWNS);
+ if (ret != 0) {
+ die("unshare(CLONE_NEWNS) failed: %s\n",
+ strerror(errno));
+ }
+
+ ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL);
+ if (ret != 0) {
+ die("recursive bind mount of %s onto %s failed: %s\n",
+ orig_path, dest_path, strerror(errno));
+ }
+
+ ret = mount(dest_path, dest_path, "none",
+ MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL);
+ if (ret != 0) {
+ /* system("cat /proc/self/mounts"); */
+ die("remount of /tmp failed: %s\n",
+ strerror(errno));
+ }
+
+ remount_mnt_flags = read_mnt_flags(dest_path);
+ if (orig_mnt_flags != remount_mnt_flags) {
+ die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n",
+ dest_path, orig_path);
+ }
+ exit(EXIT_SUCCESS);
}
int main(int argc, char **argv)
{
- if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) {
+ if (!test_unpriv_remount_simple(MS_RDONLY)) {
die("MS_RDONLY malfunctions\n");
}
- if (!test_unpriv_remount_simple(MS_NODEV)) {
+ if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) {
die("MS_NODEV malfunctions\n");
}
- if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) {
+ if (!test_unpriv_remount_simple(MS_NOSUID)) {
die("MS_NOSUID malfunctions\n");
}
- if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) {
+ if (!test_unpriv_remount_simple(MS_NOEXEC)) {
die("MS_NOEXEC malfunctions\n");
}
- if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV,
- MS_NOATIME|MS_NODEV))
+ if (!test_unpriv_remount_atime(MS_RELATIME,
+ MS_NOATIME))
{
die("MS_RELATIME malfunctions\n");
}
- if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV,
- MS_NOATIME|MS_NODEV))
+ if (!test_unpriv_remount_atime(MS_STRICTATIME,
+ MS_NOATIME))
{
die("MS_STRICTATIME malfunctions\n");
}
- if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV,
- MS_STRICTATIME|MS_NODEV))
+ if (!test_unpriv_remount_atime(MS_NOATIME,
+ MS_STRICTATIME))
{
- die("MS_RELATIME malfunctions\n");
+ die("MS_NOATIME malfunctions\n");
}
- if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV,
- MS_NOATIME|MS_NODEV))
+ if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME,
+ MS_NOATIME))
{
- die("MS_RELATIME malfunctions\n");
+ die("MS_RELATIME|MS_NODIRATIME malfunctions\n");
}
- if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV,
- MS_NOATIME|MS_NODEV))
+ if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME,
+ MS_NOATIME))
{
- die("MS_RELATIME malfunctions\n");
+ die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n");
}
- if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV,
- MS_STRICTATIME|MS_NODEV))
+ if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME,
+ MS_STRICTATIME))
{
- die("MS_RELATIME malfunctions\n");
+ die("MS_NOATIME|MS_DIRATIME malfunctions\n");
}
- if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV,
- MS_NOATIME|MS_NODEV))
+ if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME))
{
die("Default atime malfunctions\n");
}
+ if (!test_priv_mount_unpriv_remount()) {
+ die("Mount flags unexpectedly changed after remount\n");
+ }
return EXIT_SUCCESS;
}