diff options
Diffstat (limited to 'tools/testing/selftests/filesystems')
50 files changed, 7428 insertions, 238 deletions
diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index 828b66a10c63..64ac0dfa46b7 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -1,4 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only dnotify_test devpts_pts +fclog file_stressor +anon_inode_test +kernfs_test diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 66305fc34c60..85427d7f19b9 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := devpts_pts file_stressor +TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test fclog TEST_GEN_PROGS_EXTENDED := dnotify_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c new file mode 100644 index 000000000000..2c4c50500116 --- /dev/null +++ b/tools/testing/selftests/filesystems/anon_inode_test.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ + +#include <fcntl.h> +#include <stdio.h> +#include <sys/stat.h> + +#include "kselftest_harness.h" +#include "wrappers.h" + +TEST(anon_inode_no_chown) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_LT(fchown(fd_context, 1234, 5678), 0); + ASSERT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST(anon_inode_no_chmod) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_LT(fchmod(fd_context, 0777), 0); + ASSERT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST(anon_inode_no_exec) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + char *const empty_argv[] = {NULL}; + char *const empty_envp[] = {NULL}; + + ASSERT_LT(execveat(fd_context, "", empty_argv, empty_envp, AT_EMPTY_PATH), 0); + ASSERT_EQ(errno, EACCES); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST(anon_inode_no_open) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_GE(dup2(fd_context, 500), 0); + ASSERT_EQ(close(fd_context), 0); + fd_context = 500; + + ASSERT_LT(open("/proc/self/fd/500", 0), 0); + ASSERT_EQ(errno, ENXIO); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST_HARNESS_MAIN + diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 81db85a5cc16..a1a79a6fef17 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -21,7 +21,7 @@ #include <linux/android/binder.h> #include <linux/android/binderfs.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" #define DEFAULT_THREADS 4 @@ -65,6 +65,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata) "oneway_spam_detection", "extended_error", "freeze_notification", + "transaction_report", }; change_mountns(_metadata); diff --git a/tools/testing/selftests/filesystems/devpts_pts.c b/tools/testing/selftests/filesystems/devpts_pts.c index b1fc9b916ace..aa8d5324f2a6 100644 --- a/tools/testing/selftests/filesystems/devpts_pts.c +++ b/tools/testing/selftests/filesystems/devpts_pts.c @@ -11,7 +11,7 @@ #include <asm/ioctls.h> #include <sys/mount.h> #include <sys/wait.h> -#include "../kselftest.h" +#include "kselftest.h" static bool terminal_dup2(int duplicate, int original) { @@ -119,9 +119,7 @@ static int do_tiocgptpeer(char *ptmx, char *expected_procfd_contents) goto do_cleanup; } -#ifdef TIOCGPTPEER slave = ioctl(master, TIOCGPTPEER, O_RDWR | O_NOCTTY | O_CLOEXEC); -#endif if (slave < 0) { if (errno == EINVAL) { fprintf(stderr, "TIOCGPTPEER is not supported. " diff --git a/tools/testing/selftests/filesystems/empty_mntns/.gitignore b/tools/testing/selftests/filesystems/empty_mntns/.gitignore new file mode 100644 index 000000000000..99f89d329db2 --- /dev/null +++ b/tools/testing/selftests/filesystems/empty_mntns/.gitignore @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +clone3_empty_mntns_test +empty_mntns_test +overmount_chroot_test diff --git a/tools/testing/selftests/filesystems/empty_mntns/Makefile b/tools/testing/selftests/filesystems/empty_mntns/Makefile new file mode 100644 index 000000000000..22e3fb915e81 --- /dev/null +++ b/tools/testing/selftests/filesystems/empty_mntns/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +LDLIBS += -lcap + +TEST_GEN_PROGS := empty_mntns_test overmount_chroot_test clone3_empty_mntns_test + +include ../../lib.mk + +$(OUTPUT)/empty_mntns_test: ../utils.c +$(OUTPUT)/overmount_chroot_test: ../utils.c +$(OUTPUT)/clone3_empty_mntns_test: ../utils.c diff --git a/tools/testing/selftests/filesystems/empty_mntns/clone3_empty_mntns_test.c b/tools/testing/selftests/filesystems/empty_mntns/clone3_empty_mntns_test.c new file mode 100644 index 000000000000..6370086f886d --- /dev/null +++ b/tools/testing/selftests/filesystems/empty_mntns/clone3_empty_mntns_test.c @@ -0,0 +1,938 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Tests for empty mount namespace creation via clone3() CLONE_EMPTY_MNTNS + * + * These tests exercise the clone3() code path for creating empty mount + * namespaces, which is distinct from the unshare() path tested in + * empty_mntns_test.c. With clone3(), CLONE_EMPTY_MNTNS (0x2000000000ULL) + * is a 64-bit flag that implies CLONE_NEWNS. The implication happens in + * kernel_clone() before copy_process(), unlike unshare() where it goes + * through UNSHARE_EMPTY_MNTNS -> CLONE_EMPTY_MNTNS conversion in + * unshare_nsproxy_namespaces(). + * + * Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <linux/mount.h> +#include <linux/stat.h> +#include <stdio.h> +#include <string.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "../utils.h" +#include "../wrappers.h" +#include "clone3/clone3_selftests.h" +#include "empty_mntns.h" +#include "kselftest_harness.h" + +static pid_t clone3_empty_mntns(uint64_t extra_flags) +{ + struct __clone_args args = { + .flags = CLONE_EMPTY_MNTNS | extra_flags, + .exit_signal = SIGCHLD, + }; + + return sys_clone3(&args, sizeof(args)); +} + +static bool clone3_empty_mntns_supported(void) +{ + pid_t pid; + int status; + + pid = fork(); + if (pid < 0) + return false; + + if (pid == 0) { + if (enter_userns()) + _exit(1); + + pid = clone3_empty_mntns(0); + if (pid < 0) + _exit(1); + + if (pid == 0) + _exit(0); + + _exit(wait_for_pid(pid) != 0); + } + + if (waitpid(pid, &status, 0) != pid) + return false; + + if (!WIFEXITED(status)) + return false; + + return WEXITSTATUS(status) == 0; +} + +FIXTURE(clone3_empty_mntns) {}; + +FIXTURE_SETUP(clone3_empty_mntns) +{ + if (!clone3_empty_mntns_supported()) + SKIP(return, "CLONE_EMPTY_MNTNS via clone3 not supported"); +} + +FIXTURE_TEARDOWN(clone3_empty_mntns) {} + +/* + * Basic clone3() with CLONE_EMPTY_MNTNS: child gets empty mount namespace + * with exactly 1 mount and root == cwd. + */ +TEST_F(clone3_empty_mntns, basic) +{ + pid_t pid, inner; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + if (enter_userns()) + _exit(1); + + inner = clone3_empty_mntns(0); + if (inner < 0) + _exit(2); + + if (inner == 0) { + uint64_t root_id, cwd_id; + + if (count_mounts() != 1) + _exit(3); + + root_id = get_unique_mnt_id("/"); + cwd_id = get_unique_mnt_id("."); + if (root_id == 0 || cwd_id == 0) + _exit(4); + + if (root_id != cwd_id) + _exit(5); + + _exit(0); + } + + _exit(wait_for_pid(inner)); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * CLONE_EMPTY_MNTNS implies CLONE_NEWNS. Verify that it works without + * explicitly setting CLONE_NEWNS (tests fork.c:2627-2630). + */ +TEST_F(clone3_empty_mntns, implies_newns) +{ + pid_t pid, inner; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ssize_t parent_mounts; + + if (enter_userns()) + _exit(1); + + /* Verify we have mounts in our current namespace. */ + parent_mounts = count_mounts(); + if (parent_mounts < 1) + _exit(2); + + /* Only CLONE_EMPTY_MNTNS, no explicit CLONE_NEWNS. */ + inner = clone3_empty_mntns(0); + if (inner < 0) + _exit(3); + + if (inner == 0) { + if (count_mounts() != 1) + _exit(4); + + _exit(0); + } + + /* Parent still has its mounts. */ + if (count_mounts() != parent_mounts) + _exit(5); + + _exit(wait_for_pid(inner)); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * Helper macro: generate a test that clones with CLONE_EMPTY_MNTNS | + * @extra_flags and verifies the child has exactly one mount. + */ +#define TEST_CLONE3_FLAGS(test_name, extra_flags) \ +TEST_F(clone3_empty_mntns, test_name) \ +{ \ + pid_t pid, inner; \ + \ + pid = fork(); \ + ASSERT_GE(pid, 0); \ + \ + if (pid == 0) { \ + if (enter_userns()) \ + _exit(1); \ + \ + inner = clone3_empty_mntns(extra_flags); \ + if (inner < 0) \ + _exit(2); \ + \ + if (inner == 0) { \ + if (count_mounts() != 1) \ + _exit(3); \ + _exit(0); \ + } \ + \ + _exit(wait_for_pid(inner)); \ + } \ + \ + ASSERT_EQ(wait_for_pid(pid), 0); \ +} + +/* Redundant CLONE_NEWNS | CLONE_EMPTY_MNTNS should succeed. */ +TEST_CLONE3_FLAGS(with_explicit_newns, CLONE_NEWNS) + +/* CLONE_EMPTY_MNTNS combined with CLONE_NEWUSER. */ +TEST_CLONE3_FLAGS(with_newuser, CLONE_NEWUSER) + +/* CLONE_EMPTY_MNTNS combined with other namespace flags. */ +TEST_CLONE3_FLAGS(with_other_ns_flags, CLONE_NEWUTS | CLONE_NEWIPC) + +/* + * CLONE_EMPTY_MNTNS combined with CLONE_NEWPID. + */ +TEST_F(clone3_empty_mntns, with_newpid) +{ + pid_t pid, inner; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + if (enter_userns()) + _exit(1); + + inner = clone3_empty_mntns(CLONE_NEWPID); + if (inner < 0) + _exit(2); + + if (inner == 0) { + if (count_mounts() != 1) + _exit(3); + + /* In a new PID namespace, getpid() returns 1. */ + if (getpid() != 1) + _exit(4); + + _exit(0); + } + + _exit(wait_for_pid(inner)); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * CLONE_EMPTY_MNTNS | CLONE_FS must fail because the implied CLONE_NEWNS + * and CLONE_FS are mutually exclusive (fork.c:1981). + */ +TEST_F(clone3_empty_mntns, with_clone_fs_fails) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + struct __clone_args args = { + .flags = CLONE_EMPTY_MNTNS | CLONE_FS, + .exit_signal = SIGCHLD, + }; + pid_t ret; + + if (enter_userns()) + _exit(1); + + ret = sys_clone3(&args, sizeof(args)); + if (ret >= 0) { + if (ret == 0) + _exit(0); + wait_for_pid(ret); + _exit(2); + } + + if (errno != EINVAL) + _exit(3); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * CLONE_EMPTY_MNTNS combined with CLONE_PIDFD returns a valid pidfd. + */ +TEST_F(clone3_empty_mntns, with_pidfd) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + struct __clone_args args = { + .flags = CLONE_EMPTY_MNTNS | CLONE_PIDFD, + .exit_signal = SIGCHLD, + }; + int pidfd = -1; + pid_t inner; + + if (enter_userns()) + _exit(1); + + args.pidfd = (uintptr_t)&pidfd; + + inner = sys_clone3(&args, sizeof(args)); + if (inner < 0) + _exit(2); + + if (inner == 0) { + if (count_mounts() != 1) + _exit(3); + + _exit(0); + } + + /* Verify we got a valid pidfd. */ + if (pidfd < 0) + _exit(4); + + close(pidfd); + _exit(wait_for_pid(inner)); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * clone3 without CAP_SYS_ADMIN must fail with EPERM. + */ +TEST_F(clone3_empty_mntns, eperm_without_caps) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + pid_t ret; + + /* Skip if already root. */ + if (getuid() == 0) + _exit(0); + + ret = clone3_empty_mntns(0); + if (ret >= 0) { + if (ret == 0) + _exit(0); + wait_for_pid(ret); + _exit(1); + } + + if (errno != EPERM) + _exit(2); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * Parent's mount namespace is unaffected after clone3 with CLONE_EMPTY_MNTNS. + */ +TEST_F(clone3_empty_mntns, parent_unchanged) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ssize_t nr_before, nr_after; + pid_t inner; + + if (enter_userns()) + _exit(1); + + nr_before = count_mounts(); + if (nr_before < 1) + _exit(2); + + inner = clone3_empty_mntns(0); + if (inner < 0) + _exit(3); + + if (inner == 0) + _exit(0); + + if (wait_for_pid(inner) != 0) + _exit(4); + + nr_after = count_mounts(); + if (nr_after != nr_before) + _exit(5); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * Parent with many mounts: child still gets exactly 1 mount. + */ +TEST_F(clone3_empty_mntns, many_parent_mounts) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + char tmpdir[] = "/tmp/clone3_mntns_test.XXXXXX"; + pid_t inner; + int i; + + if (enter_userns()) + _exit(1); + + if (unshare(CLONE_NEWNS)) + _exit(2); + + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) + _exit(3); + + if (!mkdtemp(tmpdir)) + _exit(4); + + if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M")) + _exit(5); + + for (i = 0; i < 5; i++) { + char subdir[256]; + + snprintf(subdir, sizeof(subdir), "%s/sub%d", tmpdir, i); + if (mkdir(subdir, 0755) && errno != EEXIST) + _exit(6); + if (mount(subdir, subdir, NULL, MS_BIND, NULL)) + _exit(7); + } + + if (count_mounts() < 5) + _exit(8); + + inner = clone3_empty_mntns(0); + if (inner < 0) + _exit(9); + + if (inner == 0) { + if (count_mounts() != 1) + _exit(10); + + _exit(0); + } + + _exit(wait_for_pid(inner)); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * Verify the child's root mount is nullfs with expected statmount properties. + */ +TEST_F(clone3_empty_mntns, mount_properties) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + pid_t inner; + + if (enter_userns()) + _exit(1); + + inner = clone3_empty_mntns(0); + if (inner < 0) + _exit(2); + + if (inner == 0) { + struct statmount *sm; + uint64_t root_id; + + root_id = get_unique_mnt_id("/"); + if (!root_id) + _exit(3); + + sm = statmount_alloc(root_id, 0, + STATMOUNT_MNT_BASIC | + STATMOUNT_MNT_POINT | + STATMOUNT_FS_TYPE, 0); + if (!sm) + _exit(4); + + /* Root mount point is "/". */ + if (!(sm->mask & STATMOUNT_MNT_POINT)) + _exit(5); + if (strcmp(sm->str + sm->mnt_point, "/") != 0) + _exit(6); + + /* Filesystem type is nullfs. */ + if (!(sm->mask & STATMOUNT_FS_TYPE)) + _exit(7); + if (strcmp(sm->str + sm->fs_type, "nullfs") != 0) + _exit(8); + + /* Root mount is its own parent. */ + if (!(sm->mask & STATMOUNT_MNT_BASIC)) + _exit(9); + if (sm->mnt_parent_id != sm->mnt_id) + _exit(10); + + free(sm); + _exit(0); + } + + _exit(wait_for_pid(inner)); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * Listmount returns only the root mount in the child's empty namespace. + */ +TEST_F(clone3_empty_mntns, listmount_single_entry) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + pid_t inner; + + if (enter_userns()) + _exit(1); + + inner = clone3_empty_mntns(0); + if (inner < 0) + _exit(2); + + if (inner == 0) { + uint64_t list[16]; + ssize_t nr_mounts; + uint64_t root_id; + + nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 16, 0); + if (nr_mounts != 1) + _exit(3); + + root_id = get_unique_mnt_id("/"); + if (!root_id) + _exit(4); + + if (list[0] != root_id) + _exit(5); + + _exit(0); + } + + _exit(wait_for_pid(inner)); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * Child can mount tmpfs over nullfs root (the primary container use case). + * + * Uses the new mount API (fsopen/fsmount/move_mount) because resolving + * "/" returns the process root directly without following overmounts. + * The mount fd from fsmount lets us fchdir + chroot into the new tmpfs. + */ +TEST_F(clone3_empty_mntns, child_overmount_tmpfs) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + pid_t inner; + + if (enter_userns()) + _exit(1); + + inner = clone3_empty_mntns(0); + if (inner < 0) + _exit(2); + + if (inner == 0) { + struct statmount *sm; + uint64_t root_id; + int fd, fsfd, mntfd; + + if (count_mounts() != 1) + _exit(3); + + /* Verify root is nullfs. */ + root_id = get_unique_mnt_id("/"); + if (!root_id) + _exit(4); + + sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0); + if (!sm) + _exit(5); + if (!(sm->mask & STATMOUNT_FS_TYPE)) + _exit(6); + if (strcmp(sm->str + sm->fs_type, "nullfs") != 0) + _exit(7); + free(sm); + + /* Create tmpfs via the new mount API. */ + fsfd = sys_fsopen("tmpfs", 0); + if (fsfd < 0) + _exit(8); + + if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, + "size", "1M", 0)) { + close(fsfd); + _exit(9); + } + + if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, + NULL, NULL, 0)) { + close(fsfd); + _exit(10); + } + + mntfd = sys_fsmount(fsfd, 0, 0); + close(fsfd); + if (mntfd < 0) + _exit(11); + + /* Attach tmpfs to "/". */ + if (sys_move_mount(mntfd, "", AT_FDCWD, "/", + MOVE_MOUNT_F_EMPTY_PATH)) { + close(mntfd); + _exit(12); + } + + if (count_mounts() != 2) { + close(mntfd); + _exit(13); + } + + /* Enter the tmpfs. */ + if (fchdir(mntfd)) { + close(mntfd); + _exit(14); + } + + if (chroot(".")) { + close(mntfd); + _exit(15); + } + + close(mntfd); + + /* Verify "/" is now tmpfs. */ + root_id = get_unique_mnt_id("/"); + if (!root_id) + _exit(16); + + sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0); + if (!sm) + _exit(17); + if (!(sm->mask & STATMOUNT_FS_TYPE)) + _exit(18); + if (strcmp(sm->str + sm->fs_type, "tmpfs") != 0) + _exit(19); + free(sm); + + /* Verify tmpfs is writable. */ + fd = open("/testfile", O_CREAT | O_RDWR, 0644); + if (fd < 0) + _exit(20); + + if (write(fd, "test", 4) != 4) { + close(fd); + _exit(21); + } + close(fd); + + if (access("/testfile", F_OK)) + _exit(22); + + _exit(0); + } + + _exit(wait_for_pid(inner)); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * Multiple clone3 calls with CLONE_EMPTY_MNTNS produce children with + * distinct mount namespace root mount IDs. + */ +TEST_F(clone3_empty_mntns, repeated) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int pipe1[2], pipe2[2]; + uint64_t id1 = 0, id2 = 0; + pid_t inner1, inner2; + + if (enter_userns()) + _exit(1); + + if (pipe(pipe1) || pipe(pipe2)) + _exit(2); + + inner1 = clone3_empty_mntns(0); + if (inner1 < 0) + _exit(3); + + if (inner1 == 0) { + uint64_t root_id; + + close(pipe1[0]); + root_id = get_unique_mnt_id("/"); + if (write(pipe1[1], &root_id, sizeof(root_id)) != sizeof(root_id)) + _exit(1); + close(pipe1[1]); + _exit(0); + } + + inner2 = clone3_empty_mntns(0); + if (inner2 < 0) + _exit(4); + + if (inner2 == 0) { + uint64_t root_id; + + close(pipe2[0]); + root_id = get_unique_mnt_id("/"); + if (write(pipe2[1], &root_id, sizeof(root_id)) != sizeof(root_id)) + _exit(1); + close(pipe2[1]); + _exit(0); + } + + close(pipe1[1]); + close(pipe2[1]); + + if (read(pipe1[0], &id1, sizeof(id1)) != sizeof(id1)) + _exit(5); + if (read(pipe2[0], &id2, sizeof(id2)) != sizeof(id2)) + _exit(6); + + close(pipe1[0]); + close(pipe2[0]); + + if (wait_for_pid(inner1) || wait_for_pid(inner2)) + _exit(7); + + /* Each child must have a distinct root mount ID. */ + if (id1 == 0 || id2 == 0) + _exit(8); + if (id1 == id2) + _exit(9); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * Verify setns() into a child's empty mount namespace works. + */ +TEST_F(clone3_empty_mntns, setns_into_child_mntns) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int pipe_fd[2]; + pid_t inner; + char c; + + if (enter_userns()) + _exit(1); + + if (pipe(pipe_fd)) + _exit(2); + + inner = clone3_empty_mntns(0); + if (inner < 0) + _exit(3); + + if (inner == 0) { + /* Signal parent we're ready. */ + close(pipe_fd[0]); + if (write(pipe_fd[1], "r", 1) != 1) + _exit(1); + + /* + * Wait for parent to finish. Reading from our + * write end will block until the parent closes + * its read end, giving us an implicit barrier. + */ + if (read(pipe_fd[1], &c, 1) < 0) + ; + close(pipe_fd[1]); + _exit(0); + } + + close(pipe_fd[1]); + + /* Wait for child to be ready. */ + if (read(pipe_fd[0], &c, 1) != 1) + _exit(4); + + /* Open child's mount namespace. */ + { + char path[64]; + int mntns_fd; + + snprintf(path, sizeof(path), "/proc/%d/ns/mnt", inner); + mntns_fd = open(path, O_RDONLY); + if (mntns_fd < 0) + _exit(5); + + if (setns(mntns_fd, CLONE_NEWNS)) + _exit(6); + + close(mntns_fd); + } + + /* Now we should be in the child's empty mntns. */ + if (count_mounts() != 1) + _exit(7); + + close(pipe_fd[0]); + _exit(wait_for_pid(inner)); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * Tests below do not require CLONE_EMPTY_MNTNS support. + */ + +/* + * Unknown 64-bit flags beyond the known set are rejected. + */ +TEST(unknown_flags_rejected) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + struct __clone_args args = { + .flags = 0x800000000ULL, + .exit_signal = SIGCHLD, + }; + pid_t ret; + + ret = sys_clone3(&args, sizeof(args)); + if (ret >= 0) { + if (ret == 0) + _exit(0); + wait_for_pid(ret); + _exit(1); + } + + if (errno != EINVAL) + _exit(2); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * Regular clone3 with CLONE_NEWNS (without CLONE_EMPTY_MNTNS) still + * copies the full mount tree. + */ +TEST(clone3_newns_full_copy) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + struct __clone_args args = { + .flags = CLONE_NEWNS, + .exit_signal = SIGCHLD, + }; + ssize_t parent_mounts; + pid_t inner; + + if (enter_userns()) + _exit(1); + + parent_mounts = count_mounts(); + if (parent_mounts < 1) + _exit(2); + + inner = sys_clone3(&args, sizeof(args)); + if (inner < 0) + _exit(3); + + if (inner == 0) { + /* Full copy should have at least as many mounts. */ + if (count_mounts() < parent_mounts) + _exit(1); + + _exit(0); + } + + _exit(wait_for_pid(inner)); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/empty_mntns/empty_mntns.h b/tools/testing/selftests/filesystems/empty_mntns/empty_mntns.h new file mode 100644 index 000000000000..3d9c6b14bbef --- /dev/null +++ b/tools/testing/selftests/filesystems/empty_mntns/empty_mntns.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef EMPTY_MNTNS_H +#define EMPTY_MNTNS_H + +#include <errno.h> +#include <stdlib.h> + +#include "../statmount/statmount.h" + +#ifndef UNSHARE_EMPTY_MNTNS +#define UNSHARE_EMPTY_MNTNS 0x00100000 +#endif + +#ifndef CLONE_EMPTY_MNTNS +#define CLONE_EMPTY_MNTNS (1ULL << 37) +#endif + +static inline ssize_t count_mounts(void) +{ + uint64_t list[4096]; + + return listmount(LSMT_ROOT, 0, 0, list, sizeof(list) / sizeof(list[0]), 0); +} + +#endif /* EMPTY_MNTNS_H */ diff --git a/tools/testing/selftests/filesystems/empty_mntns/empty_mntns_test.c b/tools/testing/selftests/filesystems/empty_mntns/empty_mntns_test.c new file mode 100644 index 000000000000..43e296b97d84 --- /dev/null +++ b/tools/testing/selftests/filesystems/empty_mntns/empty_mntns_test.c @@ -0,0 +1,725 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Tests for empty mount namespace creation via UNSHARE_EMPTY_MNTNS + * + * Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <linux/mount.h> +#include <linux/stat.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "../utils.h" +#include "../wrappers.h" +#include "empty_mntns.h" +#include "kselftest_harness.h" + +static bool unshare_empty_mntns_supported(void) +{ + pid_t pid; + int status; + + pid = fork(); + if (pid < 0) + return false; + + if (pid == 0) { + if (enter_userns()) + _exit(1); + + if (unshare(UNSHARE_EMPTY_MNTNS) && errno == EINVAL) + _exit(1); + _exit(0); + } + + if (waitpid(pid, &status, 0) != pid) + return false; + + if (!WIFEXITED(status)) + return false; + + return WEXITSTATUS(status) == 0; +} + + +FIXTURE(empty_mntns) {}; + +FIXTURE_SETUP(empty_mntns) +{ + if (!unshare_empty_mntns_supported()) + SKIP(return, "UNSHARE_EMPTY_MNTNS not supported"); +} + +FIXTURE_TEARDOWN(empty_mntns) {} + +/* Verify unshare succeeds, produces exactly 1 mount, and root == cwd */ +TEST_F(empty_mntns, basic) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + uint64_t root_id, cwd_id; + + if (enter_userns()) + _exit(1); + + if (unshare(UNSHARE_EMPTY_MNTNS)) + _exit(2); + + if (count_mounts() != 1) + _exit(3); + + root_id = get_unique_mnt_id("/"); + cwd_id = get_unique_mnt_id("."); + if (root_id == 0 || cwd_id == 0) + _exit(4); + + if (root_id != cwd_id) + _exit(5); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * UNSHARE_EMPTY_MNTNS combined with CLONE_NEWUSER. + * + * The user namespace must be created first so /proc is still accessible + * for writing uid_map/gid_map. The empty mount namespace is created + * afterwards. + */ +TEST_F(empty_mntns, with_clone_newuser) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + uid_t uid = getuid(); + gid_t gid = getgid(); + char map[100]; + + if (unshare(CLONE_NEWUSER)) + _exit(1); + + snprintf(map, sizeof(map), "0 %d 1", uid); + if (write_file("/proc/self/uid_map", map)) + _exit(2); + + if (write_file("/proc/self/setgroups", "deny")) + _exit(3); + + snprintf(map, sizeof(map), "0 %d 1", gid); + if (write_file("/proc/self/gid_map", map)) + _exit(4); + + if (unshare(UNSHARE_EMPTY_MNTNS)) + _exit(5); + + if (count_mounts() != 1) + _exit(6); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* UNSHARE_EMPTY_MNTNS combined with other namespace flags */ +TEST_F(empty_mntns, with_other_ns_flags) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + if (enter_userns()) + _exit(1); + + if (unshare(UNSHARE_EMPTY_MNTNS | CLONE_NEWUTS | CLONE_NEWIPC)) + _exit(2); + + if (count_mounts() != 1) + _exit(3); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* EPERM without proper capabilities */ +TEST_F(empty_mntns, eperm_without_caps) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Skip if already root */ + if (getuid() == 0) + _exit(0); + + if (unshare(UNSHARE_EMPTY_MNTNS) == 0) + _exit(1); + + if (errno != EPERM) + _exit(2); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* Many source mounts still result in exactly 1 mount */ +TEST_F(empty_mntns, many_source_mounts) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + char tmpdir[] = "/tmp/empty_mntns_test.XXXXXX"; + int i; + + if (enter_userns()) + _exit(1); + + if (unshare(CLONE_NEWNS)) + _exit(2); + + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) + _exit(3); + + if (!mkdtemp(tmpdir)) + _exit(4); + + if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M")) + _exit(5); + + for (i = 0; i < 5; i++) { + char subdir[256]; + + snprintf(subdir, sizeof(subdir), "%s/sub%d", tmpdir, i); + if (mkdir(subdir, 0755) && errno != EEXIST) + _exit(6); + if (mount(subdir, subdir, NULL, MS_BIND, NULL)) + _exit(7); + } + + if (count_mounts() < 5) + _exit(8); + + if (unshare(UNSHARE_EMPTY_MNTNS)) + _exit(9); + + if (count_mounts() != 1) + _exit(10); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* CWD on a different mount gets reset to root */ +TEST_F(empty_mntns, cwd_reset) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + char tmpdir[] = "/tmp/empty_mntns_cwd.XXXXXX"; + uint64_t root_id, cwd_id; + struct statmount *sm; + + if (enter_userns()) + _exit(1); + + if (unshare(CLONE_NEWNS)) + _exit(2); + + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) + _exit(3); + + if (!mkdtemp(tmpdir)) + _exit(4); + + if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M")) + _exit(5); + + if (chdir(tmpdir)) + _exit(6); + + if (unshare(UNSHARE_EMPTY_MNTNS)) + _exit(7); + + root_id = get_unique_mnt_id("/"); + cwd_id = get_unique_mnt_id("."); + if (root_id == 0 || cwd_id == 0) + _exit(8); + + if (root_id != cwd_id) + _exit(9); + + sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT, 0); + if (!sm) + _exit(10); + + if (strcmp(sm->str + sm->mnt_point, "/") != 0) + _exit(11); + + free(sm); + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* Verify statmount properties of the root mount */ +TEST_F(empty_mntns, mount_properties) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + struct statmount *sm; + uint64_t root_id; + + if (enter_userns()) + _exit(1); + + if (unshare(UNSHARE_EMPTY_MNTNS)) + _exit(2); + + root_id = get_unique_mnt_id("/"); + if (!root_id) + _exit(3); + + sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_ROOT | + STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE, 0); + if (!sm) + _exit(4); + + if (!(sm->mask & STATMOUNT_MNT_POINT)) + _exit(5); + + if (strcmp(sm->str + sm->mnt_point, "/") != 0) + _exit(6); + + if (!(sm->mask & STATMOUNT_MNT_BASIC)) + _exit(7); + + if (sm->mnt_id != root_id) + _exit(8); + + free(sm); + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* Consecutive UNSHARE_EMPTY_MNTNS calls produce new namespaces */ +TEST_F(empty_mntns, repeated_unshare) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + uint64_t first_root_id, second_root_id; + + if (enter_userns()) + _exit(1); + + if (unshare(UNSHARE_EMPTY_MNTNS)) + _exit(2); + + if (count_mounts() != 1) + _exit(3); + + first_root_id = get_unique_mnt_id("/"); + + if (unshare(UNSHARE_EMPTY_MNTNS)) + _exit(4); + + if (count_mounts() != 1) + _exit(5); + + second_root_id = get_unique_mnt_id("/"); + + if (first_root_id == second_root_id) + _exit(6); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* Root mount's parent is itself */ +TEST_F(empty_mntns, root_is_own_parent) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + struct statmount sm; + uint64_t root_id; + + if (enter_userns()) + _exit(1); + + if (unshare(UNSHARE_EMPTY_MNTNS)) + _exit(2); + + root_id = get_unique_mnt_id("/"); + if (!root_id) + _exit(3); + + if (statmount(root_id, 0, 0, STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0) < 0) + _exit(4); + + if (!(sm.mask & STATMOUNT_MNT_BASIC)) + _exit(5); + + if (sm.mnt_parent_id != sm.mnt_id) + _exit(6); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* Listmount returns only the root mount */ +TEST_F(empty_mntns, listmount_single_entry) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + uint64_t list[16]; + ssize_t nr_mounts; + uint64_t root_id; + + if (enter_userns()) + _exit(1); + + if (unshare(UNSHARE_EMPTY_MNTNS)) + _exit(2); + + nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 16, 0); + if (nr_mounts != 1) + _exit(3); + + root_id = get_unique_mnt_id("/"); + if (!root_id) + _exit(4); + + if (list[0] != root_id) + _exit(5); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * Mount tmpfs over nullfs root to build a writable filesystem from scratch. + * This exercises the intended usage pattern: create an empty mount namespace + * (which has a nullfs root), then mount a real filesystem over it. + * + * Because resolving "/" returns the process root directly (via nd_jump_root) + * without following overmounts, we use the new mount API (fsopen/fsmount) + * to obtain a mount fd, then fchdir + chroot to enter the new filesystem. + */ +TEST_F(empty_mntns, overmount_tmpfs) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + struct statmount *sm; + uint64_t root_id, cwd_id; + int fd, fsfd, mntfd; + + if (enter_userns()) + _exit(1); + + if (unshare(UNSHARE_EMPTY_MNTNS)) + _exit(2); + + if (count_mounts() != 1) + _exit(3); + + root_id = get_unique_mnt_id("/"); + if (!root_id) + _exit(4); + + /* Verify root is nullfs */ + sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0); + if (!sm) + _exit(5); + + if (!(sm->mask & STATMOUNT_FS_TYPE)) + _exit(6); + + if (strcmp(sm->str + sm->fs_type, "nullfs") != 0) + _exit(7); + + free(sm); + + cwd_id = get_unique_mnt_id("."); + if (!cwd_id || root_id != cwd_id) + _exit(8); + + /* + * nullfs root is immutable. open(O_CREAT) returns ENOENT + * because empty_dir_lookup() returns -ENOENT before the + * IS_IMMUTABLE permission check in may_o_create() is reached. + */ + fd = open("/test", O_CREAT | O_RDWR, 0644); + if (fd >= 0) { + close(fd); + _exit(9); + } + if (errno != ENOENT) + _exit(10); + + /* + * Use the new mount API to create tmpfs and get a mount fd. + * We need the fd because after attaching the tmpfs on top of + * "/", path resolution of "/" still returns the process root + * (nullfs) without following the overmount. The mount fd + * lets us fchdir + chroot into the tmpfs. + */ + fsfd = sys_fsopen("tmpfs", 0); + if (fsfd < 0) + _exit(11); + + if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "size", "1M", 0)) { + close(fsfd); + _exit(12); + } + + if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)) { + close(fsfd); + _exit(13); + } + + mntfd = sys_fsmount(fsfd, 0, 0); + close(fsfd); + if (mntfd < 0) + _exit(14); + + if (sys_move_mount(mntfd, "", AT_FDCWD, "/", + MOVE_MOUNT_F_EMPTY_PATH)) { + close(mntfd); + _exit(15); + } + + if (count_mounts() != 2) { + close(mntfd); + _exit(16); + } + + /* Enter the tmpfs via the mount fd */ + if (fchdir(mntfd)) { + close(mntfd); + _exit(17); + } + + if (chroot(".")) { + close(mntfd); + _exit(18); + } + + close(mntfd); + + /* Verify "/" now resolves to tmpfs */ + root_id = get_unique_mnt_id("/"); + if (!root_id) + _exit(19); + + sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0); + if (!sm) + _exit(20); + + if (!(sm->mask & STATMOUNT_FS_TYPE)) + _exit(21); + + if (strcmp(sm->str + sm->fs_type, "tmpfs") != 0) + _exit(22); + + free(sm); + + /* Verify tmpfs is writable */ + fd = open("/testfile", O_CREAT | O_RDWR, 0644); + if (fd < 0) + _exit(23); + + if (write(fd, "test", 4) != 4) { + close(fd); + _exit(24); + } + + close(fd); + + if (access("/testfile", F_OK)) + _exit(25); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* + * Tests below do not require UNSHARE_EMPTY_MNTNS support. + */ + +/* Invalid unshare flags return EINVAL */ +TEST(invalid_flags) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + if (enter_userns()) + _exit(1); + + if (unshare(0x80000000) == 0) + _exit(2); + + if (errno != EINVAL) + _exit(3); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* Regular CLONE_NEWNS still copies the full mount tree */ +TEST(clone_newns_full_copy) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ssize_t nr_mounts_before, nr_mounts_after; + char tmpdir[] = "/tmp/empty_mntns_regr.XXXXXX"; + int i; + + if (enter_userns()) + _exit(1); + + if (unshare(CLONE_NEWNS)) + _exit(2); + + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) + _exit(3); + + if (!mkdtemp(tmpdir)) + _exit(4); + + if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M")) + _exit(5); + + for (i = 0; i < 3; i++) { + char subdir[256]; + + snprintf(subdir, sizeof(subdir), "%s/sub%d", tmpdir, i); + if (mkdir(subdir, 0755) && errno != EEXIST) + _exit(6); + if (mount(subdir, subdir, NULL, MS_BIND, NULL)) + _exit(7); + } + + nr_mounts_before = count_mounts(); + if (nr_mounts_before < 3) + _exit(8); + + if (unshare(CLONE_NEWNS)) + _exit(9); + + nr_mounts_after = count_mounts(); + if (nr_mounts_after < nr_mounts_before) + _exit(10); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +/* Other namespace unshares are unaffected */ +TEST(other_ns_unaffected) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + char hostname[256]; + + if (enter_userns()) + _exit(1); + + if (unshare(CLONE_NEWUTS)) + _exit(2); + + if (sethostname("test-empty-mntns", 16)) + _exit(3); + + if (gethostname(hostname, sizeof(hostname))) + _exit(4); + + if (strcmp(hostname, "test-empty-mntns") != 0) + _exit(5); + + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/empty_mntns/overmount_chroot_test.c b/tools/testing/selftests/filesystems/empty_mntns/overmount_chroot_test.c new file mode 100644 index 000000000000..6e21c58258c3 --- /dev/null +++ b/tools/testing/selftests/filesystems/empty_mntns/overmount_chroot_test.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test: rootfs overmounted multiple times with chroot into topmost + * + * This test creates a scenario where: + * 1. A new mount namespace is created with a tmpfs root (via pivot_root) + * 2. A mountpoint is created and overmounted multiple times + * 3. The caller chroots into the topmost mount layer + * + * The test verifies that: + * - Multiple overmounts create separate mount layers + * - Each layer's files are isolated + * - chroot correctly sets the process's root to the topmost layer + * - After chroot, only the topmost layer's files are visible + * + * Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <linux/mount.h> +#include <linux/stat.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "../utils.h" +#include "empty_mntns.h" +#include "kselftest_harness.h" + +#define NR_OVERMOUNTS 5 + +/* + * Setup a proper root filesystem using pivot_root. + * This ensures we own the root directory in our user namespace. + */ +static int setup_root(void) +{ + char tmpdir[] = "/tmp/overmount_test.XXXXXX"; + char oldroot[256]; + + if (!mkdtemp(tmpdir)) + return -1; + + /* Mount tmpfs at the temporary directory */ + if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=10M")) + return -1; + + /* Create directory for old root */ + snprintf(oldroot, sizeof(oldroot), "%s/oldroot", tmpdir); + if (mkdir(oldroot, 0755)) + return -1; + + /* pivot_root to use the tmpfs as new root */ + if (syscall(SYS_pivot_root, tmpdir, oldroot)) + return -1; + + if (chdir("/")) + return -1; + + /* Unmount old root */ + if (umount2("/oldroot", MNT_DETACH)) + return -1; + + /* Remove oldroot directory */ + if (rmdir("/oldroot")) + return -1; + + return 0; +} + +/* + * Test scenario: + * 1. Enter a user namespace to gain CAP_SYS_ADMIN + * 2. Create a new mount namespace + * 3. Setup a tmpfs root via pivot_root + * 4. Create a mountpoint /newroot and overmount it multiple times + * 5. Create a marker file in each layer + * 6. Chroot into /newroot (the topmost overmount) + * 7. Verify we're in the topmost layer (only topmost marker visible) + */ +TEST(overmount_chroot) +{ + pid_t pid; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ssize_t nr_mounts; + uint64_t mnt_ids[NR_OVERMOUNTS + 1]; + uint64_t root_id_before, root_id_after; + struct statmount *sm; + char marker[64]; + int fd, i; + + /* Step 1: Enter user namespace for privileges */ + if (enter_userns()) + _exit(1); + + /* Step 2: Create a new mount namespace */ + if (unshare(CLONE_NEWNS)) + _exit(2); + + /* Step 3: Make the mount tree private */ + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) + _exit(3); + + /* Step 4: Setup a proper tmpfs root via pivot_root */ + if (setup_root()) + _exit(4); + + /* Create the base mount point for overmounting */ + if (mkdir("/newroot", 0755)) + _exit(5); + + /* Mount base tmpfs on /newroot */ + if (mount("tmpfs", "/newroot", "tmpfs", 0, "size=1M")) + _exit(6); + + /* Record base mount ID */ + mnt_ids[0] = get_unique_mnt_id("/newroot"); + if (!mnt_ids[0]) + _exit(7); + + /* Create marker in base layer */ + fd = open("/newroot/layer_0", O_CREAT | O_RDWR, 0644); + if (fd < 0) + _exit(8); + if (write(fd, "layer_0", 7) != 7) { + close(fd); + _exit(9); + } + close(fd); + + /* Step 5: Overmount /newroot multiple times with tmpfs */ + for (i = 0; i < NR_OVERMOUNTS; i++) { + if (mount("tmpfs", "/newroot", "tmpfs", 0, "size=1M")) + _exit(10); + + /* Record mount ID for this layer */ + mnt_ids[i + 1] = get_unique_mnt_id("/newroot"); + if (!mnt_ids[i + 1]) + _exit(11); + + /* Create a marker file in each layer */ + snprintf(marker, sizeof(marker), "/newroot/layer_%d", i + 1); + fd = open(marker, O_CREAT | O_RDWR, 0644); + if (fd < 0) + _exit(12); + + if (write(fd, marker, strlen(marker)) != (ssize_t)strlen(marker)) { + close(fd); + _exit(13); + } + close(fd); + } + + /* Verify mount count increased */ + nr_mounts = count_mounts(); + if (nr_mounts < NR_OVERMOUNTS + 2) + _exit(14); + + /* Record root mount ID before chroot */ + root_id_before = get_unique_mnt_id("/newroot"); + + /* Verify this is the topmost layer's mount */ + if (root_id_before != mnt_ids[NR_OVERMOUNTS]) + _exit(15); + + /* Step 6: Chroot into /newroot (the topmost overmount) */ + if (chroot("/newroot")) + _exit(16); + + /* Change to root directory within the chroot */ + if (chdir("/")) + _exit(17); + + /* Step 7: Verify we're in the topmost layer */ + root_id_after = get_unique_mnt_id("/"); + + /* The mount ID should be the same as the topmost layer */ + if (root_id_after != mnt_ids[NR_OVERMOUNTS]) + _exit(18); + + /* Verify the topmost layer's marker file exists */ + snprintf(marker, sizeof(marker), "/layer_%d", NR_OVERMOUNTS); + if (access(marker, F_OK)) + _exit(19); + + /* Verify we cannot see markers from lower layers (they're hidden) */ + for (i = 0; i < NR_OVERMOUNTS; i++) { + snprintf(marker, sizeof(marker), "/layer_%d", i); + if (access(marker, F_OK) == 0) + _exit(20); + } + + /* Verify the root mount is tmpfs */ + sm = statmount_alloc(root_id_after, 0, + STATMOUNT_MNT_BASIC | STATMOUNT_MNT_ROOT | + STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE, 0); + if (!sm) + _exit(21); + + if (sm->mask & STATMOUNT_FS_TYPE) { + if (strcmp(sm->str + sm->fs_type, "tmpfs") != 0) { + free(sm); + _exit(22); + } + } + + free(sm); + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c index 65ede506305c..8bc57a2ef966 100644 --- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -11,7 +11,7 @@ #include <sys/epoll.h> #include <sys/socket.h> #include <sys/eventfd.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" struct epoll_mtcontext { diff --git a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c index 85acb4e3ef00..1b48f267157d 100644 --- a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c +++ b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c @@ -11,7 +11,7 @@ #include <pthread.h> #include <sys/epoll.h> #include <sys/eventfd.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" #define EVENTFD_TEST_ITERATIONS 100000UL @@ -50,7 +50,7 @@ TEST(eventfd_check_flag_rdwr) ASSERT_GE(fd, 0); flags = fcntl(fd, F_GETFL); - // since the kernel automatically added O_RDWR. + // The kernel automatically adds the O_RDWR flag. EXPECT_EQ(flags, O_RDWR); close(fd); @@ -85,7 +85,7 @@ TEST(eventfd_check_flag_nonblock) close(fd); } -TEST(eventfd_chek_flag_cloexec_and_nonblock) +TEST(eventfd_check_flag_cloexec_and_nonblock) { int fd, flags; @@ -178,8 +178,7 @@ TEST(eventfd_check_flag_semaphore) // The semaphore could only be obtained from fdinfo. ret = verify_fdinfo(fd, &err, "eventfd-semaphore: ", 19, "1\n"); if (ret != 0) - ksft_print_msg("eventfd-semaphore check failed, msg: %s\n", - err.msg); + ksft_print_msg("eventfd semaphore flag check failed: %s\n", err.msg); EXPECT_EQ(ret, 0); close(fd); diff --git a/tools/testing/selftests/filesystems/fclog.c b/tools/testing/selftests/filesystems/fclog.c new file mode 100644 index 000000000000..551c4a0f395a --- /dev/null +++ b/tools/testing/selftests/filesystems/fclog.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai <cyphar@cyphar.com> + * Copyright (C) 2025 SUSE LLC. + */ + +#include <assert.h> +#include <errno.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mount.h> + +#include "kselftest_harness.h" + +#define ASSERT_ERRNO(expected, _t, seen) \ + __EXPECT(expected, #expected, \ + ({__typeof__(seen) _tmp_seen = (seen); \ + _tmp_seen >= 0 ? _tmp_seen : -errno; }), #seen, _t, 1) + +#define ASSERT_ERRNO_EQ(expected, seen) \ + ASSERT_ERRNO(expected, ==, seen) + +#define ASSERT_SUCCESS(seen) \ + ASSERT_ERRNO(0, <=, seen) + +FIXTURE(ns) +{ + int host_mntns; +}; + +FIXTURE_SETUP(ns) +{ + /* Stash the old mntns. */ + self->host_mntns = open("/proc/self/ns/mnt", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(self->host_mntns); + + /* Create a new mount namespace and make it private. */ + ASSERT_SUCCESS(unshare(CLONE_NEWNS)); + ASSERT_SUCCESS(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL)); +} + +FIXTURE_TEARDOWN(ns) +{ + ASSERT_SUCCESS(setns(self->host_mntns, CLONE_NEWNS)); + ASSERT_SUCCESS(close(self->host_mntns)); +} + +TEST_F(ns, fscontext_log_enodata) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + /* A brand new fscontext has no log entries. */ + char buf[128] = {}; + for (int i = 0; i < 16; i++) + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_F(ns, fscontext_log_errorfc) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0)); + + char buf[128] = {}; + ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf))); + EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf); + + /* The message has been consumed. */ + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_F(ns, fscontext_log_errorfc_after_fsmount) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0)); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + int mfd = fsmount(fsfd, FSMOUNT_CLOEXEC, MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NOSUID); + ASSERT_SUCCESS(mfd); + ASSERT_SUCCESS(move_mount(mfd, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH)); + + /* + * The fscontext log should still contain data even after + * FSCONFIG_CMD_CREATE and fsmount(). + */ + char buf[128] = {}; + ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf))); + EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf); + + /* The message has been consumed. */ + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_F(ns, fscontext_log_emsgsize) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0)); + + char buf[128] = {}; + /* + * Attempting to read a message with too small a buffer should not + * result in the message getting consumed. + */ + ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 0)); + ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 1)); + for (int i = 0; i < 16; i++) + ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 16)); + + ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf))); + EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf); + + /* The message has been consumed. */ + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/file_stressor.c b/tools/testing/selftests/filesystems/file_stressor.c index 1136f93a9977..141badd671a9 100644 --- a/tools/testing/selftests/filesystems/file_stressor.c +++ b/tools/testing/selftests/filesystems/file_stressor.c @@ -12,7 +12,7 @@ #include <sys/mount.h> #include <unistd.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" #include <linux/types.h> #include <linux/mount.h> @@ -156,7 +156,7 @@ TEST_F_TIMEOUT(file_stressor, slab_typesafe_by_rcu, 900 * 2) ssize_t nr_read; /* - * Concurrently read /proc/<pid>/fd/ which rougly does: + * Concurrently read /proc/<pid>/fd/ which roughly does: * * f = fget_task_next(p, &fd); * if (!f) diff --git a/tools/testing/selftests/filesystems/fsmount_ns/.gitignore b/tools/testing/selftests/filesystems/fsmount_ns/.gitignore new file mode 100644 index 000000000000..f1ecf6c6e37b --- /dev/null +++ b/tools/testing/selftests/filesystems/fsmount_ns/.gitignore @@ -0,0 +1 @@ +fsmount_ns_test diff --git a/tools/testing/selftests/filesystems/fsmount_ns/Makefile b/tools/testing/selftests/filesystems/fsmount_ns/Makefile new file mode 100644 index 000000000000..d9647efc0739 --- /dev/null +++ b/tools/testing/selftests/filesystems/fsmount_ns/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_GEN_PROGS := fsmount_ns_test + +CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +LDLIBS := -lcap + +include ../../lib.mk + +$(OUTPUT)/fsmount_ns_test: fsmount_ns_test.c ../utils.c + $(CC) $(CFLAGS) -o $@ $^ $(LDLIBS) diff --git a/tools/testing/selftests/filesystems/fsmount_ns/fsmount_ns_test.c b/tools/testing/selftests/filesystems/fsmount_ns/fsmount_ns_test.c new file mode 100644 index 000000000000..b70b3051eed4 --- /dev/null +++ b/tools/testing/selftests/filesystems/fsmount_ns/fsmount_ns_test.c @@ -0,0 +1,1135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2026 Christian Brauner <brauner@kernel.org> + * + * Test for FSMOUNT_NAMESPACE flag. + * + * Test that fsmount() with FSMOUNT_NAMESPACE creates a new mount + * namespace containing the specified mount. + */ +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/nsfs.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "../wrappers.h" +#include "../statmount/statmount.h" +#include "../utils.h" +#include "../../kselftest_harness.h" + +#ifndef FSMOUNT_NAMESPACE +#define FSMOUNT_NAMESPACE 0x00000002 +#endif + +#ifndef FSMOUNT_CLOEXEC +#define FSMOUNT_CLOEXEC 0x00000001 +#endif + +#ifndef FSCONFIG_CMD_CREATE +#define FSCONFIG_CMD_CREATE 6 +#endif + +static int get_mnt_ns_id(int fd, uint64_t *mnt_ns_id) +{ + if (ioctl(fd, NS_GET_MNTNS_ID, mnt_ns_id) < 0) + return -errno; + return 0; +} + +static int get_mnt_ns_id_from_path(const char *path, uint64_t *mnt_ns_id) +{ + int fd, ret; + + fd = open(path, O_RDONLY); + if (fd < 0) + return -errno; + + ret = get_mnt_ns_id(fd, mnt_ns_id); + close(fd); + return ret; +} + +static void log_mount(struct __test_metadata *_metadata, struct statmount *sm) +{ + const char *fs_type = ""; + const char *mnt_root = ""; + const char *mnt_point = ""; + + if (sm->mask & STATMOUNT_FS_TYPE) + fs_type = sm->str + sm->fs_type; + if (sm->mask & STATMOUNT_MNT_ROOT) + mnt_root = sm->str + sm->mnt_root; + if (sm->mask & STATMOUNT_MNT_POINT) + mnt_point = sm->str + sm->mnt_point; + + TH_LOG(" mnt_id: %llu, parent_id: %llu, fs_type: %s, root: %s, point: %s", + (unsigned long long)sm->mnt_id, + (unsigned long long)sm->mnt_parent_id, + fs_type, mnt_root, mnt_point); +} + +static void dump_mounts(struct __test_metadata *_metadata, uint64_t mnt_ns_id) +{ + uint64_t list[256]; + ssize_t nr_mounts; + + nr_mounts = listmount(LSMT_ROOT, mnt_ns_id, 0, list, 256, 0); + if (nr_mounts < 0) { + TH_LOG("listmount failed: %s", strerror(errno)); + return; + } + + TH_LOG("Mount namespace %llu contains %zd mount(s):", + (unsigned long long)mnt_ns_id, nr_mounts); + + for (ssize_t i = 0; i < nr_mounts; i++) { + struct statmount *sm; + + sm = statmount_alloc(list[i], mnt_ns_id, + STATMOUNT_MNT_BASIC | + STATMOUNT_FS_TYPE | + STATMOUNT_MNT_ROOT | + STATMOUNT_MNT_POINT, 0); + if (!sm) { + TH_LOG(" [%zd] mnt_id %llu: statmount failed: %s", + i, (unsigned long long)list[i], strerror(errno)); + continue; + } + + log_mount(_metadata, sm); + free(sm); + } +} + +static int create_tmpfs_fd(void) +{ + int fs_fd, ret; + + fs_fd = sys_fsopen("tmpfs", FSOPEN_CLOEXEC); + if (fs_fd < 0) + return -errno; + + ret = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); + if (ret < 0) { + close(fs_fd); + return -errno; + } + + return fs_fd; +} + +FIXTURE(fsmount_ns) +{ + int fd; + int fs_fd; + uint64_t current_ns_id; +}; + +FIXTURE_VARIANT(fsmount_ns) +{ + const char *fstype; + unsigned int flags; + bool expect_success; + bool expect_different_ns; + int min_mounts; +}; + +FIXTURE_VARIANT_ADD(fsmount_ns, basic_tmpfs) +{ + .fstype = "tmpfs", + .flags = FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC, + .expect_success = true, + .expect_different_ns = true, + .min_mounts = 1, +}; + +FIXTURE_VARIANT_ADD(fsmount_ns, cloexec_only) +{ + .fstype = "tmpfs", + .flags = FSMOUNT_CLOEXEC, + .expect_success = true, + .expect_different_ns = false, + .min_mounts = 1, +}; + +FIXTURE_VARIANT_ADD(fsmount_ns, namespace_only) +{ + .fstype = "tmpfs", + .flags = FSMOUNT_NAMESPACE, + .expect_success = true, + .expect_different_ns = true, + .min_mounts = 1, +}; + +FIXTURE_SETUP(fsmount_ns) +{ + int ret; + + self->fd = -1; + self->fs_fd = -1; + + /* Check if fsopen syscall is supported */ + ret = sys_fsopen("tmpfs", 0); + if (ret == -1 && errno == ENOSYS) + SKIP(return, "fsopen() syscall not supported"); + if (ret >= 0) + close(ret); + + /* Check if statmount/listmount are supported */ + ret = statmount(0, 0, 0, 0, NULL, 0, 0); + if (ret == -1 && errno == ENOSYS) + SKIP(return, "statmount() syscall not supported"); + + /* Get current mount namespace ID for comparison */ + ret = get_mnt_ns_id_from_path("/proc/self/ns/mnt", &self->current_ns_id); + if (ret < 0) + SKIP(return, "Failed to get current mount namespace ID"); +} + +FIXTURE_TEARDOWN(fsmount_ns) +{ + if (self->fd >= 0) + close(self->fd); + if (self->fs_fd >= 0) + close(self->fs_fd); +} + +TEST_F(fsmount_ns, create_namespace) +{ + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int ret; + + self->fs_fd = create_tmpfs_fd(); + ASSERT_GE(self->fs_fd, 0); + + self->fd = sys_fsmount(self->fs_fd, variant->flags, 0); + + if (!variant->expect_success) { + ASSERT_LT(self->fd, 0); + return; + } + + if (self->fd < 0 && errno == EINVAL) + SKIP(return, "FSMOUNT_NAMESPACE not supported"); + + ASSERT_GE(self->fd, 0); + + if (variant->expect_different_ns) { + /* Verify we can get the namespace ID from the fd */ + ret = get_mnt_ns_id(self->fd, &new_ns_id); + ASSERT_EQ(ret, 0); + + /* Verify it's a different namespace */ + ASSERT_NE(new_ns_id, self->current_ns_id); + + /* List mounts in the new namespace */ + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0); + ASSERT_GE(nr_mounts, 0) { + TH_LOG("%m - listmount failed"); + } + + /* Verify minimum expected mounts */ + ASSERT_GE(nr_mounts, variant->min_mounts); + TH_LOG("Namespace contains %zd mounts", nr_mounts); + } +} + +TEST_F(fsmount_ns, setns_into_namespace) +{ + uint64_t new_ns_id; + pid_t pid; + int status; + int ret; + + /* Only test with FSMOUNT_NAMESPACE flag */ + if (!(variant->flags & FSMOUNT_NAMESPACE)) + SKIP(return, "setns test only for FSMOUNT_NAMESPACE case"); + + self->fs_fd = create_tmpfs_fd(); + ASSERT_GE(self->fs_fd, 0); + + self->fd = sys_fsmount(self->fs_fd, variant->flags, 0); + if (self->fd < 0 && errno == EINVAL) + SKIP(return, "FSMOUNT_NAMESPACE not supported"); + + ASSERT_GE(self->fd, 0); + + /* Get namespace ID and dump all mounts */ + ret = get_mnt_ns_id(self->fd, &new_ns_id); + ASSERT_EQ(ret, 0); + + dump_mounts(_metadata, new_ns_id); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child: try to enter the namespace */ + if (setns(self->fd, CLONE_NEWNS) < 0) + _exit(1); + _exit(0); + } + + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); +} + +TEST_F(fsmount_ns, verify_mount_properties) +{ + struct statmount sm; + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int ret; + + /* Only test with basic FSMOUNT_NAMESPACE flags */ + if (variant->flags != (FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC)) + SKIP(return, "mount properties test only for basic case"); + + self->fs_fd = create_tmpfs_fd(); + ASSERT_GE(self->fs_fd, 0); + + self->fd = sys_fsmount(self->fs_fd, FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC, 0); + if (self->fd < 0 && errno == EINVAL) + SKIP(return, "FSMOUNT_NAMESPACE not supported"); + + ASSERT_GE(self->fd, 0); + + ret = get_mnt_ns_id(self->fd, &new_ns_id); + ASSERT_EQ(ret, 0); + + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0); + ASSERT_GE(nr_mounts, 1); + + /* Get info about the root mount */ + ret = statmount(list[0], new_ns_id, 0, STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0); + ASSERT_EQ(ret, 0); + + TH_LOG("Root mount id: %llu, parent: %llu", + (unsigned long long)sm.mnt_id, + (unsigned long long)sm.mnt_parent_id); +} + +TEST_F(fsmount_ns, verify_tmpfs_type) +{ + struct statmount *sm; + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + const char *fs_type; + int ret; + + /* Only test with basic FSMOUNT_NAMESPACE flags */ + if (variant->flags != (FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC)) + SKIP(return, "fs type test only for basic case"); + + self->fs_fd = create_tmpfs_fd(); + ASSERT_GE(self->fs_fd, 0); + + self->fd = sys_fsmount(self->fs_fd, FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC, 0); + if (self->fd < 0 && errno == EINVAL) + SKIP(return, "FSMOUNT_NAMESPACE not supported"); + + ASSERT_GE(self->fd, 0); + + ret = get_mnt_ns_id(self->fd, &new_ns_id); + ASSERT_EQ(ret, 0); + + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0); + ASSERT_GE(nr_mounts, 1); + + sm = statmount_alloc(list[0], new_ns_id, STATMOUNT_FS_TYPE, 0); + ASSERT_NE(sm, NULL); + + fs_type = sm->str + sm->fs_type; + ASSERT_STREQ(fs_type, "tmpfs"); + + free(sm); +} + +FIXTURE(fsmount_ns_caps) +{ + bool has_caps; +}; + +FIXTURE_SETUP(fsmount_ns_caps) +{ + int ret; + + /* Check if fsopen syscall is supported */ + ret = sys_fsopen("tmpfs", 0); + if (ret == -1 && errno == ENOSYS) + SKIP(return, "fsopen() syscall not supported"); + if (ret >= 0) + close(ret); + + self->has_caps = (geteuid() == 0); +} + +FIXTURE_TEARDOWN(fsmount_ns_caps) +{ +} + +TEST_F(fsmount_ns_caps, requires_cap_sys_admin) +{ + pid_t pid; + int status; + int fs_fd; + + /* + * Prepare the configured filesystem fd as root before forking. + * fsopen() requires CAP_SYS_ADMIN in the mount namespace's + * user_ns, which won't be available after enter_userns(). + */ + fs_fd = sys_fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_GE(fs_fd, 0); + + ASSERT_EQ(sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int fd; + + /* Child: drop privileges using utils.h helper */ + if (enter_userns() != 0) + _exit(2); + + /* Drop all caps using utils.h helper */ + if (caps_down() == 0) + _exit(3); + + fd = sys_fsmount(fs_fd, FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC, 0); + close(fs_fd); + + if (fd >= 0) { + close(fd); + /* Should have failed without caps */ + _exit(1); + } + + if (errno == EPERM) + _exit(0); + + /* EINVAL means FSMOUNT_NAMESPACE not supported */ + if (errno == EINVAL) + _exit(6); + + /* Unexpected error */ + _exit(7); + } + + close(fs_fd); + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_TRUE(WIFEXITED(status)); + + switch (WEXITSTATUS(status)) { + case 0: + /* Expected: EPERM without caps */ + break; + case 1: + ASSERT_FALSE(true) TH_LOG("FSMOUNT_NAMESPACE succeeded without caps"); + break; + case 2: + SKIP(return, "enter_userns failed"); + break; + case 3: + SKIP(return, "caps_down failed"); + break; + case 6: + SKIP(return, "FSMOUNT_NAMESPACE not supported"); + break; + default: + ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)", + WEXITSTATUS(status)); + break; + } +} + +FIXTURE(fsmount_ns_userns) +{ + int fd; + int fs_fd; +}; + +FIXTURE_SETUP(fsmount_ns_userns) +{ + int ret; + + self->fd = -1; + self->fs_fd = -1; + + /* Check if fsopen syscall is supported */ + ret = sys_fsopen("tmpfs", 0); + if (ret == -1 && errno == ENOSYS) + SKIP(return, "fsopen() syscall not supported"); + if (ret >= 0) + close(ret); + + /* Check if statmount/listmount are supported */ + ret = statmount(0, 0, 0, 0, NULL, 0, 0); + if (ret == -1 && errno == ENOSYS) + SKIP(return, "statmount() syscall not supported"); +} + +FIXTURE_TEARDOWN(fsmount_ns_userns) +{ + if (self->fd >= 0) + close(self->fd); + if (self->fs_fd >= 0) + close(self->fs_fd); +} + +TEST_F(fsmount_ns_userns, create_in_userns) +{ + pid_t pid; + int status; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int fs_fd, fd; + + /* Create new user namespace (also creates mount namespace) */ + if (setup_userns() != 0) + _exit(2); + + /* Now we have CAP_SYS_ADMIN in the user namespace */ + fs_fd = sys_fsopen("tmpfs", FSOPEN_CLOEXEC); + if (fs_fd < 0) + _exit(3); + + if (sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) { + close(fs_fd); + _exit(4); + } + + fd = sys_fsmount(fs_fd, FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC, 0); + close(fs_fd); + + if (fd < 0) { + if (errno == EINVAL) + _exit(6); /* FSMOUNT_NAMESPACE not supported */ + _exit(1); + } + + /* Verify we can get the namespace ID */ + if (get_mnt_ns_id(fd, &new_ns_id) != 0) + _exit(7); + + /* Verify we can list mounts in the new namespace */ + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0); + if (nr_mounts < 0) + _exit(8); + + /* Should have at least 1 mount (the tmpfs) */ + if (nr_mounts < 1) + _exit(9); + + close(fd); + _exit(0); + } + + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_TRUE(WIFEXITED(status)); + + switch (WEXITSTATUS(status)) { + case 0: + /* Success */ + break; + case 1: + ASSERT_FALSE(true) TH_LOG("fsmount(FSMOUNT_NAMESPACE) failed in userns"); + break; + case 2: + SKIP(return, "setup_userns failed"); + break; + case 3: + SKIP(return, "fsopen failed in userns"); + break; + case 4: + SKIP(return, "fsconfig CMD_CREATE failed in userns"); + break; + case 6: + SKIP(return, "FSMOUNT_NAMESPACE not supported"); + break; + case 7: + ASSERT_FALSE(true) TH_LOG("Failed to get mount namespace ID"); + break; + case 8: + ASSERT_FALSE(true) TH_LOG("listmount failed in new namespace"); + break; + case 9: + ASSERT_FALSE(true) TH_LOG("New namespace has no mounts"); + break; + default: + ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)", + WEXITSTATUS(status)); + break; + } +} + +TEST_F(fsmount_ns_userns, setns_in_userns) +{ + pid_t pid; + int status; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + uint64_t new_ns_id; + int fs_fd, fd; + pid_t inner_pid; + int inner_status; + + /* Create new user namespace */ + if (setup_userns() != 0) + _exit(2); + + fs_fd = sys_fsopen("tmpfs", FSOPEN_CLOEXEC); + if (fs_fd < 0) + _exit(3); + + if (sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) { + close(fs_fd); + _exit(4); + } + + fd = sys_fsmount(fs_fd, FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC, 0); + close(fs_fd); + + if (fd < 0) { + if (errno == EINVAL) + _exit(6); + _exit(1); + } + + if (get_mnt_ns_id(fd, &new_ns_id) != 0) + _exit(7); + + /* Fork again to test setns into the new namespace */ + inner_pid = fork(); + if (inner_pid < 0) + _exit(10); + + if (inner_pid == 0) { + /* Inner child: enter the new namespace */ + if (setns(fd, CLONE_NEWNS) < 0) + _exit(1); + _exit(0); + } + + if (waitpid(inner_pid, &inner_status, 0) != inner_pid) + _exit(11); + + if (!WIFEXITED(inner_status) || WEXITSTATUS(inner_status) != 0) + _exit(12); + + close(fd); + _exit(0); + } + + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_TRUE(WIFEXITED(status)); + + switch (WEXITSTATUS(status)) { + case 0: + /* Success */ + break; + case 1: + ASSERT_FALSE(true) TH_LOG("fsmount or setns failed in userns"); + break; + case 2: + SKIP(return, "setup_userns failed"); + break; + case 3: + SKIP(return, "fsopen failed in userns"); + break; + case 4: + SKIP(return, "fsconfig CMD_CREATE failed in userns"); + break; + case 6: + SKIP(return, "FSMOUNT_NAMESPACE not supported"); + break; + case 7: + ASSERT_FALSE(true) TH_LOG("Failed to get mount namespace ID"); + break; + case 10: + ASSERT_FALSE(true) TH_LOG("Inner fork failed"); + break; + case 11: + ASSERT_FALSE(true) TH_LOG("Inner waitpid failed"); + break; + case 12: + ASSERT_FALSE(true) TH_LOG("setns into new namespace failed"); + break; + default: + ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)", + WEXITSTATUS(status)); + break; + } +} + +TEST_F(fsmount_ns_userns, umount_fails_einval) +{ + pid_t pid; + int status; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int fs_fd, fd; + ssize_t i; + + /* Create new user namespace */ + if (setup_userns() != 0) + _exit(2); + + fs_fd = sys_fsopen("tmpfs", FSOPEN_CLOEXEC); + if (fs_fd < 0) + _exit(3); + + if (sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) { + close(fs_fd); + _exit(4); + } + + fd = sys_fsmount(fs_fd, FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC, 0); + close(fs_fd); + + if (fd < 0) { + if (errno == EINVAL) + _exit(6); + _exit(1); + } + + if (get_mnt_ns_id(fd, &new_ns_id) != 0) + _exit(7); + + /* Get all mounts in the new namespace */ + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, LISTMOUNT_REVERSE); + if (nr_mounts < 0) + _exit(13); + + if (nr_mounts < 1) + _exit(14); + + /* Enter the new namespace */ + if (setns(fd, CLONE_NEWNS) < 0) + _exit(8); + + for (i = 0; i < nr_mounts; i++) { + struct statmount *sm; + const char *mnt_point; + + sm = statmount_alloc(list[i], new_ns_id, + STATMOUNT_MNT_POINT, 0); + if (!sm) + _exit(15); + + mnt_point = sm->str + sm->mnt_point; + + if (umount2(mnt_point, MNT_DETACH) == 0) { + free(sm); + _exit(9); + } + + if (errno != EINVAL) { + /* Wrong error */ + free(sm); + _exit(10); + } + + free(sm); + } + + close(fd); + _exit(0); + } + + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_TRUE(WIFEXITED(status)); + + switch (WEXITSTATUS(status)) { + case 0: + break; + case 1: + ASSERT_FALSE(true) TH_LOG("fsmount(FSMOUNT_NAMESPACE) failed"); + break; + case 2: + SKIP(return, "setup_userns failed"); + break; + case 3: + SKIP(return, "fsopen failed in userns"); + break; + case 4: + SKIP(return, "fsconfig CMD_CREATE failed in userns"); + break; + case 6: + SKIP(return, "FSMOUNT_NAMESPACE not supported"); + break; + case 7: + ASSERT_FALSE(true) TH_LOG("Failed to get mount namespace ID"); + break; + case 8: + ASSERT_FALSE(true) TH_LOG("setns into new namespace failed"); + break; + case 9: + ASSERT_FALSE(true) TH_LOG("umount succeeded but should have failed with EINVAL"); + break; + case 10: + ASSERT_FALSE(true) TH_LOG("umount failed with wrong error (expected EINVAL)"); + break; + case 13: + ASSERT_FALSE(true) TH_LOG("listmount failed"); + break; + case 14: + ASSERT_FALSE(true) TH_LOG("No mounts in new namespace"); + break; + case 15: + ASSERT_FALSE(true) TH_LOG("statmount_alloc failed"); + break; + default: + ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)", + WEXITSTATUS(status)); + break; + } +} + +TEST_F(fsmount_ns_userns, umount_succeeds) +{ + pid_t pid; + int status; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int fs_fd, fd; + ssize_t i; + + if (unshare(CLONE_NEWNS)) + _exit(1); + + if (sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) != 0) + _exit(1); + + fs_fd = sys_fsopen("tmpfs", FSOPEN_CLOEXEC); + if (fs_fd < 0) + _exit(3); + + if (sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) { + close(fs_fd); + _exit(4); + } + + fd = sys_fsmount(fs_fd, FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC, 0); + close(fs_fd); + + if (fd < 0) { + if (errno == EINVAL) + _exit(6); + _exit(1); + } + + if (get_mnt_ns_id(fd, &new_ns_id) != 0) + _exit(7); + + /* Get all mounts in the new namespace */ + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, LISTMOUNT_REVERSE); + if (nr_mounts < 0) + _exit(13); + + if (nr_mounts < 1) + _exit(14); + + /* Enter the new namespace */ + if (setns(fd, CLONE_NEWNS) < 0) + _exit(8); + + for (i = 0; i < nr_mounts; i++) { + struct statmount *sm; + const char *mnt_point; + + sm = statmount_alloc(list[i], new_ns_id, + STATMOUNT_MNT_POINT, 0); + if (!sm) + _exit(15); + + mnt_point = sm->str + sm->mnt_point; + + if (umount2(mnt_point, MNT_DETACH) != 0) { + free(sm); + _exit(9); + } + + free(sm); + } + + close(fd); + _exit(0); + } + + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_TRUE(WIFEXITED(status)); + + switch (WEXITSTATUS(status)) { + case 0: + break; + case 1: + ASSERT_FALSE(true) TH_LOG("fsmount(FSMOUNT_NAMESPACE) failed or unshare failed"); + break; + case 3: + SKIP(return, "fsopen failed"); + break; + case 4: + SKIP(return, "fsconfig CMD_CREATE failed"); + break; + case 6: + SKIP(return, "FSMOUNT_NAMESPACE not supported"); + break; + case 7: + ASSERT_FALSE(true) TH_LOG("Failed to get mount namespace ID"); + break; + case 8: + ASSERT_FALSE(true) TH_LOG("setns into new namespace failed"); + break; + case 9: + ASSERT_FALSE(true) TH_LOG("umount failed but should have succeeded"); + break; + case 13: + ASSERT_FALSE(true) TH_LOG("listmount failed"); + break; + case 14: + ASSERT_FALSE(true) TH_LOG("No mounts in new namespace"); + break; + case 15: + ASSERT_FALSE(true) TH_LOG("statmount_alloc failed"); + break; + default: + ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)", + WEXITSTATUS(status)); + break; + } +} + +FIXTURE(fsmount_ns_mount_attrs) +{ + int fd; + int fs_fd; +}; + +FIXTURE_SETUP(fsmount_ns_mount_attrs) +{ + int ret; + + self->fd = -1; + self->fs_fd = -1; + + /* Check if fsopen syscall is supported */ + ret = sys_fsopen("tmpfs", 0); + if (ret == -1 && errno == ENOSYS) + SKIP(return, "fsopen() syscall not supported"); + if (ret >= 0) + close(ret); + + /* Check if statmount/listmount are supported */ + ret = statmount(0, 0, 0, 0, NULL, 0, 0); + if (ret == -1 && errno == ENOSYS) + SKIP(return, "statmount() syscall not supported"); +} + +FIXTURE_TEARDOWN(fsmount_ns_mount_attrs) +{ + if (self->fd >= 0) + close(self->fd); + if (self->fs_fd >= 0) + close(self->fs_fd); +} + +TEST_F(fsmount_ns_mount_attrs, readonly) +{ + struct statmount sm; + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int ret; + + self->fs_fd = create_tmpfs_fd(); + ASSERT_GE(self->fs_fd, 0); + + self->fd = sys_fsmount(self->fs_fd, FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC, + MOUNT_ATTR_RDONLY); + if (self->fd < 0 && errno == EINVAL) + SKIP(return, "FSMOUNT_NAMESPACE not supported"); + + ASSERT_GE(self->fd, 0); + + ret = get_mnt_ns_id(self->fd, &new_ns_id); + ASSERT_EQ(ret, 0); + + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0); + ASSERT_GE(nr_mounts, 1); + + ret = statmount(list[0], new_ns_id, 0, STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0); + ASSERT_EQ(ret, 0); + + /* Verify the mount is read-only */ + ASSERT_TRUE(sm.mnt_attr & MOUNT_ATTR_RDONLY); +} + +TEST_F(fsmount_ns_mount_attrs, noexec) +{ + struct statmount sm; + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int ret; + + self->fs_fd = create_tmpfs_fd(); + ASSERT_GE(self->fs_fd, 0); + + self->fd = sys_fsmount(self->fs_fd, FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC, + MOUNT_ATTR_NOEXEC); + if (self->fd < 0 && errno == EINVAL) + SKIP(return, "FSMOUNT_NAMESPACE not supported"); + + ASSERT_GE(self->fd, 0); + + ret = get_mnt_ns_id(self->fd, &new_ns_id); + ASSERT_EQ(ret, 0); + + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0); + ASSERT_GE(nr_mounts, 1); + + ret = statmount(list[0], new_ns_id, 0, STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0); + ASSERT_EQ(ret, 0); + + /* Verify the mount is noexec */ + ASSERT_TRUE(sm.mnt_attr & MOUNT_ATTR_NOEXEC); +} + +TEST_F(fsmount_ns_mount_attrs, nosuid) +{ + struct statmount sm; + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int ret; + + self->fs_fd = create_tmpfs_fd(); + ASSERT_GE(self->fs_fd, 0); + + self->fd = sys_fsmount(self->fs_fd, FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC, + MOUNT_ATTR_NOSUID); + if (self->fd < 0 && errno == EINVAL) + SKIP(return, "FSMOUNT_NAMESPACE not supported"); + + ASSERT_GE(self->fd, 0); + + ret = get_mnt_ns_id(self->fd, &new_ns_id); + ASSERT_EQ(ret, 0); + + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0); + ASSERT_GE(nr_mounts, 1); + + ret = statmount(list[0], new_ns_id, 0, STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0); + ASSERT_EQ(ret, 0); + + /* Verify the mount is nosuid */ + ASSERT_TRUE(sm.mnt_attr & MOUNT_ATTR_NOSUID); +} + +TEST_F(fsmount_ns_mount_attrs, noatime) +{ + struct statmount sm; + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int ret; + + self->fs_fd = create_tmpfs_fd(); + ASSERT_GE(self->fs_fd, 0); + + self->fd = sys_fsmount(self->fs_fd, FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC, + MOUNT_ATTR_NOATIME); + if (self->fd < 0 && errno == EINVAL) + SKIP(return, "FSMOUNT_NAMESPACE not supported"); + + ASSERT_GE(self->fd, 0); + + ret = get_mnt_ns_id(self->fd, &new_ns_id); + ASSERT_EQ(ret, 0); + + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0); + ASSERT_GE(nr_mounts, 1); + + ret = statmount(list[0], new_ns_id, 0, STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0); + ASSERT_EQ(ret, 0); + + /* Verify the mount is noatime */ + ASSERT_TRUE(sm.mnt_attr & MOUNT_ATTR_NOATIME); +} + +TEST_F(fsmount_ns_mount_attrs, combined) +{ + struct statmount sm; + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int ret; + + self->fs_fd = create_tmpfs_fd(); + ASSERT_GE(self->fs_fd, 0); + + self->fd = sys_fsmount(self->fs_fd, FSMOUNT_NAMESPACE | FSMOUNT_CLOEXEC, + MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | + MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOATIME); + if (self->fd < 0 && errno == EINVAL) + SKIP(return, "FSMOUNT_NAMESPACE not supported"); + + ASSERT_GE(self->fd, 0); + + ret = get_mnt_ns_id(self->fd, &new_ns_id); + ASSERT_EQ(ret, 0); + + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0); + ASSERT_GE(nr_mounts, 1); + + ret = statmount(list[0], new_ns_id, 0, STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0); + ASSERT_EQ(ret, 0); + + /* Verify all attributes are set */ + ASSERT_TRUE(sm.mnt_attr & MOUNT_ATTR_RDONLY); + ASSERT_TRUE(sm.mnt_attr & MOUNT_ATTR_NOEXEC); + ASSERT_TRUE(sm.mnt_attr & MOUNT_ATTR_NOSUID); + ASSERT_TRUE(sm.mnt_attr & MOUNT_ATTR_NOATIME); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/fuse/.gitignore b/tools/testing/selftests/filesystems/fuse/.gitignore new file mode 100644 index 000000000000..3e72e742d08e --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +fuse_mnt +fusectl_test diff --git a/tools/testing/selftests/filesystems/fuse/Makefile b/tools/testing/selftests/filesystems/fuse/Makefile new file mode 100644 index 000000000000..612aad69a93a --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) + +TEST_GEN_PROGS := fusectl_test +TEST_GEN_FILES := fuse_mnt + +include ../../lib.mk + +VAR_CFLAGS := $(shell pkg-config fuse --cflags 2>/dev/null) +ifeq ($(VAR_CFLAGS),) +VAR_CFLAGS := -D_FILE_OFFSET_BITS=64 -I/usr/include/fuse +endif + +VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null) +ifeq ($(VAR_LDLIBS),) +VAR_LDLIBS := -lfuse -pthread +endif + +$(OUTPUT)/fuse_mnt: CFLAGS += $(VAR_CFLAGS) +$(OUTPUT)/fuse_mnt: LDLIBS += $(VAR_LDLIBS) diff --git a/tools/testing/selftests/filesystems/fuse/fuse_mnt.c b/tools/testing/selftests/filesystems/fuse/fuse_mnt.c new file mode 100644 index 000000000000..d12b17f30fad --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/fuse_mnt.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fusectl test file-system + * Creates a simple FUSE filesystem with a single read-write file (/test) + */ + +#define FUSE_USE_VERSION 26 + +#include <fuse.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static char *content; +static size_t content_size = 0; +static const char test_path[] = "/test"; + +static int test_getattr(const char *path, struct stat *st) +{ + memset(st, 0, sizeof(*st)); + + if (!strcmp(path, "/")) { + st->st_mode = S_IFDIR | 0755; + st->st_nlink = 2; + return 0; + } + + if (!strcmp(path, test_path)) { + st->st_mode = S_IFREG | 0664; + st->st_nlink = 1; + st->st_size = content_size; + return 0; + } + + return -ENOENT; +} + +static int test_readdir(const char *path, void *buf, fuse_fill_dir_t filler, + off_t offset, struct fuse_file_info *fi) +{ + if (strcmp(path, "/")) + return -ENOENT; + + filler(buf, ".", NULL, 0); + filler(buf, "..", NULL, 0); + filler(buf, test_path + 1, NULL, 0); + + return 0; +} + +static int test_open(const char *path, struct fuse_file_info *fi) +{ + if (strcmp(path, test_path)) + return -ENOENT; + + return 0; +} + +static int test_read(const char *path, char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) +{ + if (strcmp(path, test_path) != 0) + return -ENOENT; + + if (!content || content_size == 0) + return 0; + + if (offset >= content_size) + return 0; + + if (offset + size > content_size) + size = content_size - offset; + + memcpy(buf, content + offset, size); + + return size; +} + +static int test_write(const char *path, const char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + size_t new_size; + + if (strcmp(path, test_path) != 0) + return -ENOENT; + + if(offset > content_size) + return -EINVAL; + + new_size = MAX(offset + size, content_size); + + if (new_size > content_size) + content = realloc(content, new_size); + + content_size = new_size; + + if (!content) + return -ENOMEM; + + memcpy(content + offset, buf, size); + + return size; +} + +static int test_truncate(const char *path, off_t size) +{ + if (strcmp(path, test_path) != 0) + return -ENOENT; + + if (size == 0) { + free(content); + content = NULL; + content_size = 0; + return 0; + } + + content = realloc(content, size); + + if (!content) + return -ENOMEM; + + if (size > content_size) + memset(content + content_size, 0, size - content_size); + + content_size = size; + return 0; +} + +static struct fuse_operations memfd_ops = { + .getattr = test_getattr, + .readdir = test_readdir, + .open = test_open, + .read = test_read, + .write = test_write, + .truncate = test_truncate, +}; + +int main(int argc, char *argv[]) +{ + return fuse_main(argc, argv, &memfd_ops, NULL); +} diff --git a/tools/testing/selftests/filesystems/fuse/fusectl_test.c b/tools/testing/selftests/filesystems/fuse/fusectl_test.c new file mode 100644 index 000000000000..0d1d012c35ed --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/fusectl_test.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2025 Chen Linxuan <chenlinxuan@uniontech.com> + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <dirent.h> +#include <sched.h> +#include <linux/limits.h> + +#include "kselftest_harness.h" + +#define FUSECTL_MOUNTPOINT "/sys/fs/fuse/connections" +#define FUSE_MOUNTPOINT "/tmp/fuse_mnt_XXXXXX" +#define FUSE_DEVICE "/dev/fuse" +#define FUSECTL_TEST_VALUE "1" + +static void write_file(struct __test_metadata *const _metadata, + const char *path, const char *val) +{ + int fd = open(path, O_WRONLY); + size_t len = strlen(val); + + ASSERT_GE(fd, 0); + ASSERT_EQ(write(fd, val, len), len); + ASSERT_EQ(close(fd), 0); +} + +FIXTURE(fusectl){ + char fuse_mountpoint[sizeof(FUSE_MOUNTPOINT)]; + int connection; +}; + +FIXTURE_SETUP(fusectl) +{ + const char *fuse_mnt_prog = "./fuse_mnt"; + int status, pid; + struct stat statbuf; + uid_t uid = getuid(); + gid_t gid = getgid(); + char buf[32]; + + /* Setup userns */ + ASSERT_EQ(unshare(CLONE_NEWNS|CLONE_NEWUSER), 0); + sprintf(buf, "0 %d 1", uid); + write_file(_metadata, "/proc/self/uid_map", buf); + write_file(_metadata, "/proc/self/setgroups", "deny"); + sprintf(buf, "0 %d 1", gid); + write_file(_metadata, "/proc/self/gid_map", buf); + ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0); + + strcpy(self->fuse_mountpoint, FUSE_MOUNTPOINT); + + if (!mkdtemp(self->fuse_mountpoint)) + SKIP(return, + "Failed to create FUSE mountpoint %s", + strerror(errno)); + + if (access(FUSECTL_MOUNTPOINT, F_OK)) + SKIP(return, + "FUSE control filesystem not mounted"); + + pid = fork(); + if (pid < 0) + SKIP(return, + "Failed to fork FUSE daemon process: %s", + strerror(errno)); + + if (pid == 0) { + execlp(fuse_mnt_prog, fuse_mnt_prog, self->fuse_mountpoint, NULL); + exit(errno); + } + + waitpid(pid, &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + SKIP(return, + "Failed to start FUSE daemon %s", + strerror(WEXITSTATUS(status))); + } + + if (stat(self->fuse_mountpoint, &statbuf)) + SKIP(return, + "Failed to stat FUSE mountpoint %s", + strerror(errno)); + + self->connection = statbuf.st_dev; +} + +FIXTURE_TEARDOWN(fusectl) +{ + umount2(self->fuse_mountpoint, MNT_DETACH); + rmdir(self->fuse_mountpoint); +} + +TEST_F(fusectl, abort) +{ + char path_buf[PATH_MAX]; + int abort_fd, test_fd, ret; + + sprintf(path_buf, "/sys/fs/fuse/connections/%d/abort", self->connection); + + ASSERT_EQ(0, access(path_buf, F_OK)); + + abort_fd = open(path_buf, O_WRONLY); + ASSERT_GE(abort_fd, 0); + + sprintf(path_buf, "%s/test", self->fuse_mountpoint); + + test_fd = open(path_buf, O_RDWR); + ASSERT_GE(test_fd, 0); + + ret = read(test_fd, path_buf, sizeof(path_buf)); + ASSERT_EQ(ret, 0); + + ret = write(test_fd, "test", sizeof("test")); + ASSERT_EQ(ret, sizeof("test")); + + ret = lseek(test_fd, 0, SEEK_SET); + ASSERT_GE(ret, 0); + + ret = write(abort_fd, FUSECTL_TEST_VALUE, sizeof(FUSECTL_TEST_VALUE)); + ASSERT_GT(ret, 0); + + close(abort_fd); + + ret = read(test_fd, path_buf, sizeof(path_buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENOTCONN); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/kernfs_test.c b/tools/testing/selftests/filesystems/kernfs_test.c new file mode 100644 index 000000000000..84c2b910a60d --- /dev/null +++ b/tools/testing/selftests/filesystems/kernfs_test.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ + +#include <fcntl.h> +#include <stdio.h> +#include <sys/stat.h> +#include <sys/xattr.h> + +#include "kselftest_harness.h" +#include "wrappers.h" + +TEST(kernfs_listxattr) +{ + int fd; + + /* Read-only file that can never have any extended attributes set. */ + fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + ASSERT_EQ(flistxattr(fd, NULL, 0), 0); + EXPECT_EQ(close(fd), 0); +} + +TEST(kernfs_getxattr) +{ + int fd; + char buf[1]; + + /* Read-only file that can never have any extended attributes set. */ + fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + ASSERT_LT(fgetxattr(fd, "user.foo", buf, sizeof(buf)), 0); + ASSERT_EQ(errno, ENODATA); + EXPECT_EQ(close(fd), 0); +} + +TEST_HARNESS_MAIN + diff --git a/tools/testing/selftests/filesystems/mount-notify/.gitignore b/tools/testing/selftests/filesystems/mount-notify/.gitignore index 82a4846cbc4b..124339ea7845 100644 --- a/tools/testing/selftests/filesystems/mount-notify/.gitignore +++ b/tools/testing/selftests/filesystems/mount-notify/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only /*_test +/*_test_ns diff --git a/tools/testing/selftests/filesystems/mount-notify/Makefile b/tools/testing/selftests/filesystems/mount-notify/Makefile index 10be0227b5ae..836a4eb7be06 100644 --- a/tools/testing/selftests/filesystems/mount-notify/Makefile +++ b/tools/testing/selftests/filesystems/mount-notify/Makefile @@ -1,6 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) -TEST_GEN_PROGS := mount-notify_test +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +LDLIBS += -lcap + +TEST_GEN_PROGS := mount-notify_test mount-notify_test_ns include ../../lib.mk + +$(OUTPUT)/mount-notify_test: ../utils.c +$(OUTPUT)/mount-notify_test_ns: ../utils.c diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c index 4a2d5c454fd1..6381af6a40e3 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -2,54 +2,39 @@ // Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu> #define _GNU_SOURCE + +// Needed for linux/fanotify.h +typedef struct { + int val[2]; +} __kernel_fsid_t; +#define __kernel_fsid_t __kernel_fsid_t + #include <fcntl.h> #include <sched.h> #include <stdio.h> #include <string.h> #include <sys/stat.h> #include <sys/mount.h> -#include <linux/fanotify.h> #include <unistd.h> -#include <sys/fanotify.h> #include <sys/syscall.h> +#include <sys/fanotify.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" #include "../statmount/statmount.h" +#include "../utils.h" -#ifndef FAN_MNT_ATTACH -struct fanotify_event_info_mnt { - struct fanotify_event_info_header hdr; - __u64 mnt_id; -}; -#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ -#endif - -#ifndef FAN_MNT_DETACH -#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ -#endif - -#ifndef FAN_REPORT_MNT -#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ -#endif - -#ifndef FAN_MARK_MNTNS -#define FAN_MARK_MNTNS 0x00000110 -#endif - -static uint64_t get_mnt_id(struct __test_metadata *const _metadata, - const char *path) -{ - struct statx sx; +static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; - ASSERT_EQ(statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx), 0); - ASSERT_TRUE(!!(sx.stx_mask & STATX_MNT_ID_UNIQUE)); - return sx.stx_mnt_id; -} +static const int mark_cmds[] = { + FAN_MARK_ADD, + FAN_MARK_REMOVE, + FAN_MARK_FLUSH +}; -static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; +#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds) FIXTURE(fanotify) { - int fan_fd; + int fan_fd[NUM_FAN_FDS]; char buf[256]; unsigned int rem; void *next; @@ -61,7 +46,7 @@ FIXTURE(fanotify) { FIXTURE_SETUP(fanotify) { - int ret; + int i, ret; ASSERT_EQ(unshare(CLONE_NEWNS), 0); @@ -86,23 +71,37 @@ FIXTURE_SETUP(fanotify) ASSERT_EQ(mkdir("b", 0700), 0); - self->root_id = get_mnt_id(_metadata, "/"); + self->root_id = get_unique_mnt_id("/"); ASSERT_NE(self->root_id, 0); - self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0); - ASSERT_GE(self->fan_fd, 0); - - ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS, - FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL); - ASSERT_EQ(ret, 0); + for (i = 0; i < NUM_FAN_FDS; i++) { + self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK, + 0); + ASSERT_GE(self->fan_fd[i], 0); + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + // On fd[0] we do an extra ADD that changes nothing. + // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark. + ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + } self->rem = 0; } FIXTURE_TEARDOWN(fanotify) { + int i; + ASSERT_EQ(self->rem, 0); - close(self->fan_fd); + for (i = 0; i < NUM_FAN_FDS; i++) + close(self->fan_fd[i]); ASSERT_EQ(fchdir(self->orig_root), 0); @@ -123,8 +122,21 @@ static uint64_t expect_notify(struct __test_metadata *const _metadata, unsigned int thislen; if (!self->rem) { - ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf)); - ASSERT_GT(len, 0); + ssize_t len; + int i; + + for (i = NUM_FAN_FDS - 1; i >= 0; i--) { + len = read(self->fan_fd[i], self->buf, + sizeof(self->buf)); + if (i > 0) { + // Groups 1,2 should get EAGAIN + ASSERT_EQ(len, -1); + ASSERT_EQ(errno, EAGAIN); + } else { + // Group 0 should get events + ASSERT_GT(len, 0); + } + } self->rem = len; self->next = (void *) self->buf; diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c new file mode 100644 index 000000000000..320ee25dc8a5 --- /dev/null +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c @@ -0,0 +1,555 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu> + +#define _GNU_SOURCE + +// Needed for linux/fanotify.h +typedef struct { + int val[2]; +} __kernel_fsid_t; +#define __kernel_fsid_t __kernel_fsid_t + +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/fanotify.h> + +#include "kselftest_harness.h" +#include "../statmount/statmount.h" +#include "../utils.h" + +static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; + +static const int mark_types[] = { + FAN_MARK_FILESYSTEM, + FAN_MARK_MOUNT, + FAN_MARK_INODE +}; + +static const int mark_cmds[] = { + FAN_MARK_ADD, + FAN_MARK_REMOVE, + FAN_MARK_FLUSH +}; + +#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds) + +FIXTURE(fanotify) { + int fan_fd[NUM_FAN_FDS]; + char buf[256]; + unsigned int rem; + void *next; + char root_mntpoint[sizeof(root_mntpoint_templ)]; + int orig_root; + int orig_ns_fd; + int ns_fd; + uint64_t root_id; +}; + +FIXTURE_SETUP(fanotify) +{ + int i, ret; + + self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(self->orig_ns_fd, 0); + + ret = setup_userns(); + ASSERT_EQ(ret, 0); + + self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(self->ns_fd, 0); + + strcpy(self->root_mntpoint, root_mntpoint_templ); + ASSERT_NE(mkdtemp(self->root_mntpoint), NULL); + + self->orig_root = open("/", O_PATH | O_CLOEXEC); + ASSERT_GE(self->orig_root, 0); + + ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0); + + ASSERT_EQ(chroot(self->root_mntpoint), 0); + + ASSERT_EQ(chdir("/"), 0); + + ASSERT_EQ(mkdir("a", 0700), 0); + + ASSERT_EQ(mkdir("b", 0700), 0); + + self->root_id = get_unique_mnt_id("/"); + ASSERT_NE(self->root_id, 0); + + for (i = 0; i < NUM_FAN_FDS; i++) { + int fan_fd = fanotify_init(FAN_REPORT_FID, 0); + // Verify that watching tmpfs mounted inside userns is allowed + ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i], + FAN_OPEN, AT_FDCWD, "/"); + ASSERT_EQ(ret, 0); + // ...but watching entire orig root filesystem is not allowed + ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM, + FAN_OPEN, self->orig_root, "."); + ASSERT_NE(ret, 0); + close(fan_fd); + + self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK, + 0); + ASSERT_GE(self->fan_fd[i], 0); + // Verify that watching mntns where group was created is allowed + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + // ...but watching orig mntns is not allowed + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->orig_ns_fd, NULL); + ASSERT_NE(ret, 0); + // On fd[0] we do an extra ADD that changes nothing. + // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark. + ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + } + + self->rem = 0; +} + +FIXTURE_TEARDOWN(fanotify) +{ + int i; + + ASSERT_EQ(self->rem, 0); + for (i = 0; i < NUM_FAN_FDS; i++) + close(self->fan_fd[i]); + + ASSERT_EQ(fchdir(self->orig_root), 0); + + ASSERT_EQ(chroot("."), 0); + + EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0); + EXPECT_EQ(chdir(self->root_mntpoint), 0); + EXPECT_EQ(chdir("/"), 0); + EXPECT_EQ(rmdir(self->root_mntpoint), 0); +} + +static uint64_t expect_notify(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t *mask) +{ + struct fanotify_event_metadata *meta; + struct fanotify_event_info_mnt *mnt; + unsigned int thislen; + + if (!self->rem) { + ssize_t len; + int i; + + for (i = NUM_FAN_FDS - 1; i >= 0; i--) { + len = read(self->fan_fd[i], self->buf, + sizeof(self->buf)); + if (i > 0) { + // Groups 1,2 should get EAGAIN + ASSERT_EQ(len, -1); + ASSERT_EQ(errno, EAGAIN); + } else { + // Group 0 should get events + ASSERT_GT(len, 0); + } + } + + self->rem = len; + self->next = (void *) self->buf; + } + + meta = self->next; + ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem)); + + thislen = meta->event_len; + self->rem -= thislen; + self->next += thislen; + + *mask = meta->mask; + thislen -= sizeof(*meta); + + mnt = ((void *) meta) + meta->event_len - thislen; + + ASSERT_EQ(thislen, sizeof(*mnt)); + + return mnt->mnt_id; +} + +static void expect_notify_n(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + unsigned int n, uint64_t mask[], uint64_t mnts[]) +{ + unsigned int i; + + for (i = 0; i < n; i++) + mnts[i] = expect_notify(_metadata, self, &mask[i]); +} + +static uint64_t expect_notify_mask(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t expect_mask) +{ + uint64_t mntid, mask; + + mntid = expect_notify(_metadata, self, &mask); + ASSERT_EQ(expect_mask, mask); + + return mntid; +} + + +static void expect_notify_mask_n(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t mask, unsigned int n, uint64_t mnts[]) +{ + unsigned int i; + + for (i = 0; i < n; i++) + mnts[i] = expect_notify_mask(_metadata, self, mask); +} + +static void verify_mount_ids(struct __test_metadata *const _metadata, + const uint64_t list1[], const uint64_t list2[], + size_t num) +{ + unsigned int i, j; + + // Check that neither list has any duplicates + for (i = 0; i < num; i++) { + for (j = 0; j < num; j++) { + if (i != j) { + ASSERT_NE(list1[i], list1[j]); + ASSERT_NE(list2[i], list2[j]); + } + } + } + // Check that all list1 memebers can be found in list2. Together with + // the above it means that the list1 and list2 represent the same sets. + for (i = 0; i < num; i++) { + for (j = 0; j < num; j++) { + if (list1[i] == list2[j]) + break; + } + ASSERT_NE(j, num); + } +} + +static void check_mounted(struct __test_metadata *const _metadata, + const uint64_t mnts[], size_t num) +{ + ssize_t ret; + uint64_t *list; + + list = malloc((num + 1) * sizeof(list[0])); + ASSERT_NE(list, NULL); + + ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0); + ASSERT_EQ(ret, num); + + verify_mount_ids(_metadata, mnts, list, num); + + free(list); +} + +static void setup_mount_tree(struct __test_metadata *const _metadata, + int log2_num) +{ + int ret, i; + + ret = mount("", "/", NULL, MS_SHARED, NULL); + ASSERT_EQ(ret, 0); + + for (i = 0; i < log2_num; i++) { + ret = mount("/", "/", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + } +} + +TEST_F(fanotify, bind) +{ + int ret; + uint64_t mnts[2] = { self->root_id }; + + ret = mount("/", "/", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + // Cleanup + uint64_t detach_id; + ret = umount("/"); + ASSERT_EQ(ret, 0); + + detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); + ASSERT_EQ(detach_id, mnts[1]); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, move) +{ + int ret; + uint64_t mnts[2] = { self->root_id }; + uint64_t move_id; + + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0); + ASSERT_EQ(ret, 0); + + move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH); + ASSERT_EQ(move_id, mnts[1]); + + // Cleanup + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, propagate) +{ + const unsigned int log2_num = 4; + const unsigned int num = (1 << log2_num); + uint64_t mnts[num]; + + setup_mount_tree(_metadata, log2_num); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1); + + mnts[0] = self->root_id; + check_mounted(_metadata, mnts, num); + + // Cleanup + int ret; + uint64_t mnts2[num]; + ret = umount2("/", MNT_DETACH); + ASSERT_EQ(ret, 0); + + ret = mount("", "/", NULL, MS_PRIVATE, NULL); + ASSERT_EQ(ret, 0); + + mnts2[0] = self->root_id; + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1); + verify_mount_ids(_metadata, mnts, mnts2, num); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, fsmount) +{ + int ret, fs, mnt; + uint64_t mnts[2] = { self->root_id }; + + fs = fsopen("tmpfs", 0); + ASSERT_GE(fs, 0); + + ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0); + ASSERT_EQ(ret, 0); + + mnt = fsmount(fs, 0, 0); + ASSERT_GE(mnt, 0); + + close(fs); + + ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH); + ASSERT_EQ(ret, 0); + + close(mnt); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + // Cleanup + uint64_t detach_id; + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); + ASSERT_EQ(detach_id, mnts[1]); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, reparent) +{ + uint64_t mnts[6] = { self->root_id }; + uint64_t dmnts[3]; + uint64_t masks[3]; + unsigned int i; + int ret; + + // Create setup with a[1] -> b[2] propagation + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("", "/a", NULL, MS_SHARED, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("/a", "/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("", "/b", NULL, MS_SLAVE, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); + + check_mounted(_metadata, mnts, 3); + + // Mount on a[3], which is propagated to b[4] + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3); + + check_mounted(_metadata, mnts, 5); + + // Mount on b[5], not propagated + ret = mount("/", "/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + + check_mounted(_metadata, mnts, 6); + + // Umount a[3], which is propagated to b[4], but not b[5] + // This will result in b[5] "falling" on b[2] + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + expect_notify_n(_metadata, self, 3, masks, dmnts); + verify_mount_ids(_metadata, mnts + 3, dmnts, 3); + + for (i = 0; i < 3; i++) { + if (dmnts[i] == mnts[5]) { + ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH); + } else { + ASSERT_EQ(masks[i], FAN_MNT_DETACH); + } + } + + mnts[3] = mnts[5]; + check_mounted(_metadata, mnts, 4); + + // Cleanup + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts); + verify_mount_ids(_metadata, mnts + 1, dmnts, 3); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, rmdir) +{ + uint64_t mnts[3] = { self->root_id }; + int ret; + + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("/", "/a/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); + + check_mounted(_metadata, mnts, 3); + + ret = chdir("/a"); + ASSERT_EQ(ret, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret == 0) { + chdir("/"); + unshare(CLONE_NEWNS); + mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + umount2("/a", MNT_DETACH); + // This triggers a detach in the other namespace + rmdir("/a"); + exit(0); + } + wait(NULL); + + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1); + check_mounted(_metadata, mnts, 1); + + // Cleanup + ret = chdir("/"); + ASSERT_EQ(ret, 0); +} + +TEST_F(fanotify, pivot_root) +{ + uint64_t mnts[3] = { self->root_id }; + uint64_t mnts2[3]; + int ret; + + ret = mount("tmpfs", "/a", "tmpfs", 0, NULL); + ASSERT_EQ(ret, 0); + + mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + + ret = mkdir("/a/new", 0700); + ASSERT_EQ(ret, 0); + + ret = mkdir("/a/old", 0700); + ASSERT_EQ(ret, 0); + + ret = mount("/a", "/a/new", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + check_mounted(_metadata, mnts, 3); + + ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old"); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2); + verify_mount_ids(_metadata, mnts, mnts2, 2); + check_mounted(_metadata, mnts, 3); + + // Cleanup + ret = syscall(SYS_pivot_root, "/old", "/old/a/new"); + ASSERT_EQ(ret, 0); + + ret = umount("/a/new"); + ASSERT_EQ(ret, 0); + + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + check_mounted(_metadata, mnts, 1); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/move_mount/.gitignore b/tools/testing/selftests/filesystems/move_mount/.gitignore new file mode 100644 index 000000000000..c7557db30671 --- /dev/null +++ b/tools/testing/selftests/filesystems/move_mount/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +move_mount_test diff --git a/tools/testing/selftests/filesystems/move_mount/Makefile b/tools/testing/selftests/filesystems/move_mount/Makefile new file mode 100644 index 000000000000..5c5b199b464b --- /dev/null +++ b/tools/testing/selftests/filesystems/move_mount/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +LDLIBS += -lcap + +TEST_GEN_PROGS := move_mount_test + +include ../../lib.mk + +$(OUTPUT)/move_mount_test: ../utils.c diff --git a/tools/testing/selftests/filesystems/move_mount/move_mount_test.c b/tools/testing/selftests/filesystems/move_mount/move_mount_test.c new file mode 100644 index 000000000000..f08f94b1f0ec --- /dev/null +++ b/tools/testing/selftests/filesystems/move_mount/move_mount_test.c @@ -0,0 +1,492 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2026 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> +#include <sys/syscall.h> + +#include "../wrappers.h" +#include "../utils.h" +#include "../statmount/statmount.h" +#include "../../kselftest_harness.h" + +#include <linux/stat.h> + +#ifndef MOVE_MOUNT_BENEATH +#define MOVE_MOUNT_BENEATH 0x00000200 +#endif + +static uint64_t get_unique_mnt_id_fd(int fd) +{ + struct statx sx; + int ret; + + ret = statx(fd, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &sx); + if (ret) + return 0; + + if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) + return 0; + + return sx.stx_mnt_id; +} + +/* + * Create a locked overmount stack at /mnt_dir for testing MNT_LOCKED + * transfer on non-rootfs mounts. + * + * Mounts tmpfs A at /mnt_dir, overmounts with tmpfs B, then enters a + * new user+mount namespace where both become locked. Returns the exit + * code to use on failure, or 0 on success. + */ +static int setup_locked_overmount(void) +{ + /* Isolate so mounts don't leak. */ + if (unshare(CLONE_NEWNS)) + return 1; + if (mount("", "/", NULL, MS_REC | MS_PRIVATE, NULL)) + return 2; + + /* + * Create mounts while still in the initial user namespace so + * they become locked after the subsequent user namespace + * unshare. + */ + rmdir("/mnt_dir"); + if (mkdir("/mnt_dir", 0755)) + return 3; + + /* Mount tmpfs A */ + if (mount("tmpfs", "/mnt_dir", "tmpfs", 0, NULL)) + return 4; + + /* Overmount with tmpfs B */ + if (mount("tmpfs", "/mnt_dir", "tmpfs", 0, NULL)) + return 5; + + /* + * Create user+mount namespace. Mounts A and B become locked + * because they might be covering something that is not supposed + * to be revealed. + */ + if (setup_userns()) + return 6; + + /* Sanity check: B must be locked */ + if (!umount2("/mnt_dir", MNT_DETACH) || errno != EINVAL) + return 7; + + return 0; +} + +/* + * Create a detached tmpfs mount and return its fd, or -1 on failure. + */ +static int create_detached_tmpfs(void) +{ + int fs_fd, mnt_fd; + + fs_fd = sys_fsopen("tmpfs", FSOPEN_CLOEXEC); + if (fs_fd < 0) + return -1; + + if (sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)) { + close(fs_fd); + return -1; + } + + mnt_fd = sys_fsmount(fs_fd, FSMOUNT_CLOEXEC, 0); + close(fs_fd); + return mnt_fd; +} + +FIXTURE(move_mount) { + uint64_t orig_root_id; +}; + +FIXTURE_SETUP(move_mount) +{ + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + + ASSERT_EQ(mount("", "/", NULL, MS_REC | MS_PRIVATE, NULL), 0); + + self->orig_root_id = get_unique_mnt_id("/"); + ASSERT_NE(self->orig_root_id, 0); +} + +FIXTURE_TEARDOWN(move_mount) +{ +} + +/* + * Test successful MOVE_MOUNT_BENEATH on the rootfs. + * Mount a clone beneath /, fchdir to the clone, chroot to switch root, + * then detach the old root. + */ +TEST_F(move_mount, beneath_rootfs_success) +{ + int fd_tree, ret; + uint64_t clone_id, root_id; + + fd_tree = sys_open_tree(AT_FDCWD, "/", + OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); + ASSERT_GE(fd_tree, 0); + + clone_id = get_unique_mnt_id_fd(fd_tree); + ASSERT_NE(clone_id, 0); + ASSERT_NE(clone_id, self->orig_root_id); + + ASSERT_EQ(fchdir(fd_tree), 0); + + ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/", + MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_BENEATH); + ASSERT_EQ(ret, 0); + + close(fd_tree); + + /* Switch root to the clone */ + ASSERT_EQ(chroot("."), 0); + + /* Verify "/" is now the clone */ + root_id = get_unique_mnt_id("/"); + ASSERT_NE(root_id, 0); + ASSERT_EQ(root_id, clone_id); + + /* Detach old root */ + ASSERT_EQ(umount2(".", MNT_DETACH), 0); +} + +/* + * Test that after MOVE_MOUNT_BENEATH on the rootfs the old root is + * stacked on top of the clone. Verify via statmount that the old + * root's parent is the clone. + */ +TEST_F(move_mount, beneath_rootfs_old_root_stacked) +{ + int fd_tree, ret; + uint64_t clone_id; + struct statmount sm; + + fd_tree = sys_open_tree(AT_FDCWD, "/", + OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); + ASSERT_GE(fd_tree, 0); + + clone_id = get_unique_mnt_id_fd(fd_tree); + ASSERT_NE(clone_id, 0); + ASSERT_NE(clone_id, self->orig_root_id); + + ASSERT_EQ(fchdir(fd_tree), 0); + + ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/", + MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_BENEATH); + ASSERT_EQ(ret, 0); + + close(fd_tree); + + ASSERT_EQ(chroot("."), 0); + + /* Old root's parent should now be the clone */ + ASSERT_EQ(statmount(self->orig_root_id, 0, 0, + STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0), 0); + ASSERT_EQ(sm.mnt_parent_id, clone_id); + + ASSERT_EQ(umount2(".", MNT_DETACH), 0); +} + +/* + * Test that MOVE_MOUNT_BENEATH on rootfs fails when chroot'd into a + * subdirectory of the same mount. The caller's fs->root.dentry doesn't + * match mnt->mnt_root so the kernel rejects it. + */ +TEST_F(move_mount, beneath_rootfs_in_chroot_fail) +{ + int fd_tree, ret; + uint64_t chroot_id, clone_id; + + rmdir("/chroot_dir"); + ASSERT_EQ(mkdir("/chroot_dir", 0755), 0); + + chroot_id = get_unique_mnt_id("/chroot_dir"); + ASSERT_NE(chroot_id, 0); + ASSERT_EQ(self->orig_root_id, chroot_id); + + ASSERT_EQ(chdir("/chroot_dir"), 0); + ASSERT_EQ(chroot("."), 0); + + fd_tree = sys_open_tree(AT_FDCWD, "/", + OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); + ASSERT_GE(fd_tree, 0); + + clone_id = get_unique_mnt_id_fd(fd_tree); + ASSERT_NE(clone_id, 0); + ASSERT_NE(clone_id, chroot_id); + + ASSERT_EQ(fchdir(fd_tree), 0); + + /* + * Should fail: fs->root.dentry (/chroot_dir) doesn't match + * the mount's mnt_root (/). + */ + ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/", + MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_BENEATH); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); + + close(fd_tree); +} + +/* + * Test that MOVE_MOUNT_BENEATH on rootfs succeeds when chroot'd into a + * separate tmpfs mount. The caller's root dentry matches the mount's + * mnt_root since it's a dedicated mount. + */ +TEST_F(move_mount, beneath_rootfs_in_chroot_success) +{ + int fd_tree, ret; + uint64_t chroot_id, clone_id, root_id; + struct statmount sm; + + rmdir("/chroot_dir"); + ASSERT_EQ(mkdir("/chroot_dir", 0755), 0); + ASSERT_EQ(mount("tmpfs", "/chroot_dir", "tmpfs", 0, NULL), 0); + + chroot_id = get_unique_mnt_id("/chroot_dir"); + ASSERT_NE(chroot_id, 0); + + ASSERT_EQ(chdir("/chroot_dir"), 0); + ASSERT_EQ(chroot("."), 0); + + ASSERT_EQ(get_unique_mnt_id("/"), chroot_id); + + fd_tree = sys_open_tree(AT_FDCWD, "/", + OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); + ASSERT_GE(fd_tree, 0); + + clone_id = get_unique_mnt_id_fd(fd_tree); + ASSERT_NE(clone_id, 0); + ASSERT_NE(clone_id, chroot_id); + + ASSERT_EQ(fchdir(fd_tree), 0); + + ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/", + MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_BENEATH); + ASSERT_EQ(ret, 0); + + close(fd_tree); + + ASSERT_EQ(chroot("."), 0); + + root_id = get_unique_mnt_id("/"); + ASSERT_NE(root_id, 0); + ASSERT_EQ(root_id, clone_id); + + ASSERT_EQ(statmount(chroot_id, 0, 0, + STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0), 0); + ASSERT_EQ(sm.mnt_parent_id, clone_id); + + ASSERT_EQ(umount2(".", MNT_DETACH), 0); +} + +/* + * Test MNT_LOCKED transfer when mounting beneath rootfs in a user+mount + * namespace. After mount-beneath the new root gets MNT_LOCKED and the + * old root has MNT_LOCKED cleared so it can be unmounted. + */ +TEST_F(move_mount, beneath_rootfs_locked_transfer) +{ + int fd_tree, ret; + uint64_t clone_id, root_id; + + ASSERT_EQ(setup_userns(), 0); + + ASSERT_EQ(mount("", "/", NULL, MS_REC | MS_PRIVATE, NULL), 0); + + fd_tree = sys_open_tree(AT_FDCWD, "/", + OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | + AT_RECURSIVE); + ASSERT_GE(fd_tree, 0); + + clone_id = get_unique_mnt_id_fd(fd_tree); + ASSERT_NE(clone_id, 0); + + ASSERT_EQ(fchdir(fd_tree), 0); + + ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/", + MOVE_MOUNT_F_EMPTY_PATH | + MOVE_MOUNT_BENEATH); + ASSERT_EQ(ret, 0); + + close(fd_tree); + + ASSERT_EQ(chroot("."), 0); + + root_id = get_unique_mnt_id("/"); + ASSERT_EQ(root_id, clone_id); + + /* + * The old root should be unmountable (MNT_LOCKED was + * transferred to the clone). If MNT_LOCKED wasn't + * cleared, this would fail with EINVAL. + */ + ASSERT_EQ(umount2(".", MNT_DETACH), 0); + + /* Verify "/" is still the clone after detaching old root */ + root_id = get_unique_mnt_id("/"); + ASSERT_EQ(root_id, clone_id); +} + +/* + * Test containment invariant: after mount-beneath rootfs in a user+mount + * namespace, the new root must be MNT_LOCKED. The lock transfer from the + * old root preserves containment -- the process cannot unmount the new root + * to escape the namespace. + */ +TEST_F(move_mount, beneath_rootfs_locked_containment) +{ + int fd_tree, ret; + uint64_t clone_id, root_id; + + ASSERT_EQ(setup_userns(), 0); + + ASSERT_EQ(mount("", "/", NULL, MS_REC | MS_PRIVATE, NULL), 0); + + /* Sanity: rootfs must be locked in the new userns */ + ASSERT_EQ(umount2("/", MNT_DETACH), -1); + ASSERT_EQ(errno, EINVAL); + + fd_tree = sys_open_tree(AT_FDCWD, "/", + OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | + AT_RECURSIVE); + ASSERT_GE(fd_tree, 0); + + clone_id = get_unique_mnt_id_fd(fd_tree); + ASSERT_NE(clone_id, 0); + + ASSERT_EQ(fchdir(fd_tree), 0); + + ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/", + MOVE_MOUNT_F_EMPTY_PATH | + MOVE_MOUNT_BENEATH); + ASSERT_EQ(ret, 0); + + close(fd_tree); + + ASSERT_EQ(chroot("."), 0); + + root_id = get_unique_mnt_id("/"); + ASSERT_EQ(root_id, clone_id); + + /* Detach old root (MNT_LOCKED was cleared from it) */ + ASSERT_EQ(umount2(".", MNT_DETACH), 0); + + /* Verify "/" is still the clone after detaching old root */ + root_id = get_unique_mnt_id("/"); + ASSERT_EQ(root_id, clone_id); + + /* + * The new root must be locked (MNT_LOCKED was transferred + * from the old root). Attempting to unmount it must fail + * with EINVAL, preserving the containment invariant. + */ + ASSERT_EQ(umount2("/", MNT_DETACH), -1); + ASSERT_EQ(errno, EINVAL); +} + +/* + * Test MNT_LOCKED transfer when mounting beneath a non-rootfs locked mount. + * Mounts created before unshare(CLONE_NEWUSER | CLONE_NEWNS) become locked + * in the new namespace. Mount-beneath transfers the lock from the displaced + * mount to the new mount, so the displaced mount can be unmounted. + */ +TEST_F(move_mount, beneath_non_rootfs_locked_transfer) +{ + int mnt_fd, ret; + uint64_t mnt_new_id, mnt_visible_id; + + ASSERT_EQ(setup_locked_overmount(), 0); + + mnt_fd = create_detached_tmpfs(); + ASSERT_GE(mnt_fd, 0); + + mnt_new_id = get_unique_mnt_id_fd(mnt_fd); + ASSERT_NE(mnt_new_id, 0); + + /* Move mount beneath B (which is locked) */ + ret = sys_move_mount(mnt_fd, "", AT_FDCWD, "/mnt_dir", + MOVE_MOUNT_F_EMPTY_PATH | + MOVE_MOUNT_BENEATH); + ASSERT_EQ(ret, 0); + + close(mnt_fd); + + /* + * B should now be unmountable (MNT_LOCKED was transferred + * to the new mount beneath it). If MNT_LOCKED wasn't + * cleared from B, this would fail with EINVAL. + */ + ASSERT_EQ(umount2("/mnt_dir", MNT_DETACH), 0); + + /* Verify the new mount is now visible */ + mnt_visible_id = get_unique_mnt_id("/mnt_dir"); + ASSERT_EQ(mnt_visible_id, mnt_new_id); +} + +/* + * Test MNT_LOCKED containment when mounting beneath a non-rootfs mount + * that was locked during unshare(CLONE_NEWUSER | CLONE_NEWNS). + * Mounts created before unshare become locked in the new namespace. + * Mount-beneath transfers the lock, preserving containment: the new + * mount cannot be unmounted, but the displaced mount can. + */ +TEST_F(move_mount, beneath_non_rootfs_locked_containment) +{ + int mnt_fd, ret; + uint64_t mnt_new_id, mnt_visible_id; + + ASSERT_EQ(setup_locked_overmount(), 0); + + mnt_fd = create_detached_tmpfs(); + ASSERT_GE(mnt_fd, 0); + + mnt_new_id = get_unique_mnt_id_fd(mnt_fd); + ASSERT_NE(mnt_new_id, 0); + + /* + * Move new tmpfs beneath B at /mnt_dir. + * Stack becomes: A -> new -> B + * Lock transfers from B to new. + */ + ret = sys_move_mount(mnt_fd, "", AT_FDCWD, "/mnt_dir", + MOVE_MOUNT_F_EMPTY_PATH | + MOVE_MOUNT_BENEATH); + ASSERT_EQ(ret, 0); + + close(mnt_fd); + + /* + * B lost MNT_LOCKED -- unmounting it must succeed. + * This reveals the new mount at /mnt_dir. + */ + ASSERT_EQ(umount2("/mnt_dir", MNT_DETACH), 0); + + /* Verify the new mount is now visible */ + mnt_visible_id = get_unique_mnt_id("/mnt_dir"); + ASSERT_EQ(mnt_visible_id, mnt_new_id); + + /* + * The new mount gained MNT_LOCKED -- unmounting it must + * fail with EINVAL, preserving the containment invariant. + */ + ASSERT_EQ(umount2("/mnt_dir", MNT_DETACH), -1); + ASSERT_EQ(errno, EINVAL); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c index a3d8015897e9..e19ff8168baf 100644 --- a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c +++ b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c @@ -12,7 +12,7 @@ #include <sys/mount.h> #include <unistd.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" #define MNT_NS_COUNT 11 #define MNT_NS_LAST_INDEX 10 @@ -37,17 +37,20 @@ FIXTURE(iterate_mount_namespaces) { __u64 mnt_ns_id[MNT_NS_COUNT]; }; +static inline bool mntns_in_list(__u64 *mnt_ns_id, struct mnt_ns_info *info) +{ + for (int i = 0; i < MNT_NS_COUNT; i++) { + if (mnt_ns_id[i] == info->mnt_ns_id) + return true; + } + return false; +} + FIXTURE_SETUP(iterate_mount_namespaces) { for (int i = 0; i < MNT_NS_COUNT; i++) self->fd_mnt_ns[i] = -EBADF; - /* - * Creating a new user namespace let's us guarantee that we only see - * mount namespaces that we did actually create. - */ - ASSERT_EQ(unshare(CLONE_NEWUSER), 0); - for (int i = 0; i < MNT_NS_COUNT; i++) { struct mnt_ns_info info = {}; @@ -75,13 +78,15 @@ TEST_F(iterate_mount_namespaces, iterate_all_forward) fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[0], F_DUPFD_CLOEXEC); ASSERT_GE(fd_mnt_ns_cur, 0); - for (;; count++) { + for (;;) { struct mnt_ns_info info = {}; int fd_mnt_ns_next; fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info); if (fd_mnt_ns_next < 0 && errno == ENOENT) break; + if (mntns_in_list(self->mnt_ns_id, &info)) + count++; ASSERT_GE(fd_mnt_ns_next, 0); ASSERT_EQ(close(fd_mnt_ns_cur), 0); fd_mnt_ns_cur = fd_mnt_ns_next; @@ -96,13 +101,15 @@ TEST_F(iterate_mount_namespaces, iterate_all_backwards) fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[MNT_NS_LAST_INDEX], F_DUPFD_CLOEXEC); ASSERT_GE(fd_mnt_ns_cur, 0); - for (;; count++) { + for (;;) { struct mnt_ns_info info = {}; int fd_mnt_ns_prev; fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info); if (fd_mnt_ns_prev < 0 && errno == ENOENT) break; + if (mntns_in_list(self->mnt_ns_id, &info)) + count++; ASSERT_GE(fd_mnt_ns_prev, 0); ASSERT_EQ(close(fd_mnt_ns_cur), 0); fd_mnt_ns_cur = fd_mnt_ns_prev; @@ -125,7 +132,6 @@ TEST_F(iterate_mount_namespaces, iterate_forward) ASSERT_GE(fd_mnt_ns_next, 0); ASSERT_EQ(close(fd_mnt_ns_cur), 0); fd_mnt_ns_cur = fd_mnt_ns_next; - ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]); } } @@ -144,7 +150,6 @@ TEST_F(iterate_mount_namespaces, iterate_backward) ASSERT_GE(fd_mnt_ns_prev, 0); ASSERT_EQ(close(fd_mnt_ns_cur), 0); fd_mnt_ns_cur = fd_mnt_ns_prev; - ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]); } } diff --git a/tools/testing/selftests/filesystems/open_tree_ns/.gitignore b/tools/testing/selftests/filesystems/open_tree_ns/.gitignore new file mode 100644 index 000000000000..fb12b93fbcaa --- /dev/null +++ b/tools/testing/selftests/filesystems/open_tree_ns/.gitignore @@ -0,0 +1 @@ +open_tree_ns_test diff --git a/tools/testing/selftests/filesystems/open_tree_ns/Makefile b/tools/testing/selftests/filesystems/open_tree_ns/Makefile new file mode 100644 index 000000000000..4976ed1d7d4a --- /dev/null +++ b/tools/testing/selftests/filesystems/open_tree_ns/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_GEN_PROGS := open_tree_ns_test + +CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +LDLIBS := -lcap + +include ../../lib.mk + +$(OUTPUT)/open_tree_ns_test: open_tree_ns_test.c ../utils.c + $(CC) $(CFLAGS) -o $@ $^ $(LDLIBS) diff --git a/tools/testing/selftests/filesystems/open_tree_ns/open_tree_ns_test.c b/tools/testing/selftests/filesystems/open_tree_ns/open_tree_ns_test.c new file mode 100644 index 000000000000..82f3c8c02c9a --- /dev/null +++ b/tools/testing/selftests/filesystems/open_tree_ns/open_tree_ns_test.c @@ -0,0 +1,1007 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2026 Christian Brauner <brauner@kernel.org> + * + * Test for OPEN_TREE_NAMESPACE flag. + * + * Test that open_tree() with OPEN_TREE_NAMESPACE creates a new mount + * namespace containing the specified mount tree. + */ +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/nsfs.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "../wrappers.h" +#include "../statmount/statmount.h" +#include "../utils.h" +#include "../../kselftest_harness.h" + +#ifndef OPEN_TREE_NAMESPACE +#define OPEN_TREE_NAMESPACE (1 << 1) +#endif + +static int get_mnt_ns_id(int fd, uint64_t *mnt_ns_id) +{ + if (ioctl(fd, NS_GET_MNTNS_ID, mnt_ns_id) < 0) + return -errno; + return 0; +} + +static int get_mnt_ns_id_from_path(const char *path, uint64_t *mnt_ns_id) +{ + int fd, ret; + + fd = open(path, O_RDONLY); + if (fd < 0) + return -errno; + + ret = get_mnt_ns_id(fd, mnt_ns_id); + close(fd); + return ret; +} + +static void log_mount(struct __test_metadata *_metadata, struct statmount *sm) +{ + const char *fs_type = ""; + const char *mnt_root = ""; + const char *mnt_point = ""; + + if (sm->mask & STATMOUNT_FS_TYPE) + fs_type = sm->str + sm->fs_type; + if (sm->mask & STATMOUNT_MNT_ROOT) + mnt_root = sm->str + sm->mnt_root; + if (sm->mask & STATMOUNT_MNT_POINT) + mnt_point = sm->str + sm->mnt_point; + + TH_LOG(" mnt_id: %llu, parent_id: %llu, fs_type: %s, root: %s, point: %s", + (unsigned long long)sm->mnt_id, + (unsigned long long)sm->mnt_parent_id, + fs_type, mnt_root, mnt_point); +} + +static void dump_mounts(struct __test_metadata *_metadata, uint64_t mnt_ns_id) +{ + uint64_t list[256]; + ssize_t nr_mounts; + + nr_mounts = listmount(LSMT_ROOT, mnt_ns_id, 0, list, 256, 0); + if (nr_mounts < 0) { + TH_LOG("listmount failed: %s", strerror(errno)); + return; + } + + TH_LOG("Mount namespace %llu contains %zd mount(s):", + (unsigned long long)mnt_ns_id, nr_mounts); + + for (ssize_t i = 0; i < nr_mounts; i++) { + struct statmount *sm; + + sm = statmount_alloc(list[i], mnt_ns_id, + STATMOUNT_MNT_BASIC | + STATMOUNT_FS_TYPE | + STATMOUNT_MNT_ROOT | + STATMOUNT_MNT_POINT, 0); + if (!sm) { + TH_LOG(" [%zd] mnt_id %llu: statmount failed: %s", + i, (unsigned long long)list[i], strerror(errno)); + continue; + } + + log_mount(_metadata, sm); + free(sm); + } +} + +FIXTURE(open_tree_ns) +{ + int fd; + uint64_t current_ns_id; +}; + +FIXTURE_VARIANT(open_tree_ns) +{ + const char *path; + unsigned int flags; + bool expect_success; + bool expect_different_ns; + int min_mounts; +}; + +FIXTURE_VARIANT_ADD(open_tree_ns, basic_root) +{ + .path = "/", + .flags = OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC, + .expect_success = true, + .expect_different_ns = true, + /* + * The empty rootfs is hidden from listmount()/mountinfo, + * so we only see the bind mount on top of it. + */ + .min_mounts = 1, +}; + +FIXTURE_VARIANT_ADD(open_tree_ns, recursive_root) +{ + .path = "/", + .flags = OPEN_TREE_NAMESPACE | AT_RECURSIVE | OPEN_TREE_CLOEXEC, + .expect_success = true, + .expect_different_ns = true, + .min_mounts = 1, +}; + +FIXTURE_VARIANT_ADD(open_tree_ns, subdir_tmp) +{ + .path = "/tmp", + .flags = OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC, + .expect_success = true, + .expect_different_ns = true, + .min_mounts = 1, +}; + +FIXTURE_VARIANT_ADD(open_tree_ns, subdir_proc) +{ + .path = "/proc", + .flags = OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC, + .expect_success = true, + .expect_different_ns = true, + .min_mounts = 1, +}; + +FIXTURE_VARIANT_ADD(open_tree_ns, recursive_tmp) +{ + .path = "/tmp", + .flags = OPEN_TREE_NAMESPACE | AT_RECURSIVE | OPEN_TREE_CLOEXEC, + .expect_success = true, + .expect_different_ns = true, + .min_mounts = 1, +}; + +FIXTURE_VARIANT_ADD(open_tree_ns, recursive_run) +{ + .path = "/run", + .flags = OPEN_TREE_NAMESPACE | AT_RECURSIVE | OPEN_TREE_CLOEXEC, + .expect_success = true, + .expect_different_ns = true, + .min_mounts = 1, +}; + +FIXTURE_VARIANT_ADD(open_tree_ns, invalid_recursive_alone) +{ + .path = "/", + .flags = AT_RECURSIVE | OPEN_TREE_CLOEXEC, + .expect_success = false, + .expect_different_ns = false, + .min_mounts = 0, +}; + +FIXTURE_SETUP(open_tree_ns) +{ + int ret; + + self->fd = -1; + + /* Check if open_tree syscall is supported */ + ret = sys_open_tree(-1, NULL, 0); + if (ret == -1 && errno == ENOSYS) + SKIP(return, "open_tree() syscall not supported"); + + /* Check if statmount/listmount are supported */ + ret = statmount(0, 0, 0, 0, NULL, 0, 0); + if (ret == -1 && errno == ENOSYS) + SKIP(return, "statmount() syscall not supported"); + + /* Get current mount namespace ID for comparison */ + ret = get_mnt_ns_id_from_path("/proc/self/ns/mnt", &self->current_ns_id); + if (ret < 0) + SKIP(return, "Failed to get current mount namespace ID"); +} + +FIXTURE_TEARDOWN(open_tree_ns) +{ + if (self->fd >= 0) + close(self->fd); +} + +TEST_F(open_tree_ns, create_namespace) +{ + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int ret; + + self->fd = sys_open_tree(AT_FDCWD, variant->path, variant->flags); + + if (!variant->expect_success) { + ASSERT_LT(self->fd, 0); + ASSERT_EQ(errno, EINVAL); + return; + } + + if (self->fd < 0 && errno == EINVAL) + SKIP(return, "OPEN_TREE_NAMESPACE not supported"); + + ASSERT_GE(self->fd, 0); + + /* Verify we can get the namespace ID */ + ret = get_mnt_ns_id(self->fd, &new_ns_id); + ASSERT_EQ(ret, 0); + + /* Verify it's a different namespace */ + if (variant->expect_different_ns) + ASSERT_NE(new_ns_id, self->current_ns_id); + + /* List mounts in the new namespace */ + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0); + ASSERT_GE(nr_mounts, 0) { + TH_LOG("%m - listmount failed"); + } + + /* Verify minimum expected mounts */ + ASSERT_GE(nr_mounts, variant->min_mounts); + TH_LOG("Namespace contains %zd mounts", nr_mounts); +} + +TEST_F(open_tree_ns, setns_into_namespace) +{ + uint64_t new_ns_id; + pid_t pid; + int status; + int ret; + + /* Only test with basic flags */ + if (!(variant->flags & OPEN_TREE_NAMESPACE)) + SKIP(return, "setns test only for basic / case"); + + self->fd = sys_open_tree(AT_FDCWD, variant->path, variant->flags); + if (self->fd < 0 && errno == EINVAL) + SKIP(return, "OPEN_TREE_NAMESPACE not supported"); + + ASSERT_GE(self->fd, 0); + + /* Get namespace ID and dump all mounts */ + ret = get_mnt_ns_id(self->fd, &new_ns_id); + ASSERT_EQ(ret, 0); + + dump_mounts(_metadata, new_ns_id); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child: try to enter the namespace */ + if (setns(self->fd, CLONE_NEWNS) < 0) + _exit(1); + _exit(0); + } + + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); +} + +TEST_F(open_tree_ns, verify_mount_properties) +{ + struct statmount sm; + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int ret; + + /* Only test with basic flags on root */ + if (variant->flags != (OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC) || + strcmp(variant->path, "/") != 0) + SKIP(return, "mount properties test only for basic / case"); + + self->fd = sys_open_tree(AT_FDCWD, "/", OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC); + if (self->fd < 0 && errno == EINVAL) + SKIP(return, "OPEN_TREE_NAMESPACE not supported"); + + ASSERT_GE(self->fd, 0); + + ret = get_mnt_ns_id(self->fd, &new_ns_id); + ASSERT_EQ(ret, 0); + + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0); + ASSERT_GE(nr_mounts, 1); + + /* Get info about the root mount (the bind mount, rootfs is hidden) */ + ret = statmount(list[0], new_ns_id, 0, STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0); + ASSERT_EQ(ret, 0); + + ASSERT_NE(sm.mnt_id, sm.mnt_parent_id); + + TH_LOG("Root mount id: %llu, parent: %llu", + (unsigned long long)sm.mnt_id, + (unsigned long long)sm.mnt_parent_id); +} + +FIXTURE(open_tree_ns_caps) +{ + bool has_caps; +}; + +FIXTURE_SETUP(open_tree_ns_caps) +{ + int ret; + + /* Check if open_tree syscall is supported */ + ret = sys_open_tree(-1, NULL, 0); + if (ret == -1 && errno == ENOSYS) + SKIP(return, "open_tree() syscall not supported"); + + self->has_caps = (geteuid() == 0); +} + +FIXTURE_TEARDOWN(open_tree_ns_caps) +{ +} + +TEST_F(open_tree_ns_caps, requires_cap_sys_admin) +{ + pid_t pid; + int status; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int fd; + + /* Child: drop privileges using utils.h helper */ + if (enter_userns() != 0) + _exit(2); + + /* Drop all caps using utils.h helper */ + if (caps_down() == 0) + _exit(3); + + fd = sys_open_tree(AT_FDCWD, "/", + OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC); + if (fd >= 0) { + close(fd); + /* Should have failed without caps */ + _exit(1); + } + + if (errno == EPERM) + _exit(0); + + /* EINVAL means OPEN_TREE_NAMESPACE not supported */ + if (errno == EINVAL) + _exit(4); + + /* Unexpected error */ + _exit(5); + } + + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_TRUE(WIFEXITED(status)); + + switch (WEXITSTATUS(status)) { + case 0: + /* Expected: EPERM without caps */ + break; + case 1: + ASSERT_FALSE(true) TH_LOG("OPEN_TREE_NAMESPACE succeeded without caps"); + break; + case 2: + SKIP(return, "setup_userns failed"); + break; + case 3: + SKIP(return, "caps_down failed"); + break; + case 4: + SKIP(return, "OPEN_TREE_NAMESPACE not supported"); + break; + default: + ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)", + WEXITSTATUS(status)); + break; + } +} + +FIXTURE(open_tree_ns_userns) +{ + int fd; +}; + +FIXTURE_SETUP(open_tree_ns_userns) +{ + int ret; + + self->fd = -1; + + /* Check if open_tree syscall is supported */ + ret = sys_open_tree(-1, NULL, 0); + if (ret == -1 && errno == ENOSYS) + SKIP(return, "open_tree() syscall not supported"); + + /* Check if statmount/listmount are supported */ + ret = statmount(0, 0, 0, 0, NULL, 0, 0); + if (ret == -1 && errno == ENOSYS) + SKIP(return, "statmount() syscall not supported"); +} + +FIXTURE_TEARDOWN(open_tree_ns_userns) +{ + if (self->fd >= 0) + close(self->fd); +} + +TEST_F(open_tree_ns_userns, create_in_userns) +{ + pid_t pid; + int status; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int fd; + + /* Create new user namespace (also creates mount namespace) */ + if (enter_userns() != 0) + _exit(2); + + /* Now we have CAP_SYS_ADMIN in the user namespace */ + fd = sys_open_tree(AT_FDCWD, "/", + OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC); + if (fd < 0) { + if (errno == EINVAL) + _exit(4); /* OPEN_TREE_NAMESPACE not supported */ + _exit(1); + } + + /* Verify we can get the namespace ID */ + if (get_mnt_ns_id(fd, &new_ns_id) != 0) + _exit(5); + + /* Verify we can list mounts in the new namespace */ + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0); + if (nr_mounts < 0) + _exit(6); + + /* Should have at least 1 mount */ + if (nr_mounts < 1) + _exit(7); + + close(fd); + _exit(0); + } + + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_TRUE(WIFEXITED(status)); + + switch (WEXITSTATUS(status)) { + case 0: + /* Success */ + break; + case 1: + ASSERT_FALSE(true) TH_LOG("open_tree(OPEN_TREE_NAMESPACE) failed in userns"); + break; + case 2: + SKIP(return, "setup_userns failed"); + break; + case 4: + SKIP(return, "OPEN_TREE_NAMESPACE not supported"); + break; + case 5: + ASSERT_FALSE(true) TH_LOG("Failed to get mount namespace ID"); + break; + case 6: + ASSERT_FALSE(true) TH_LOG("listmount failed in new namespace"); + break; + case 7: + ASSERT_FALSE(true) TH_LOG("New namespace has no mounts"); + break; + default: + ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)", + WEXITSTATUS(status)); + break; + } +} + +TEST_F(open_tree_ns_userns, setns_in_userns) +{ + pid_t pid; + int status; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + uint64_t new_ns_id; + int fd; + pid_t inner_pid; + int inner_status; + + /* Create new user namespace */ + if (enter_userns() != 0) + _exit(2); + + fd = sys_open_tree(AT_FDCWD, "/", + OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC); + if (fd < 0) { + if (errno == EINVAL) + _exit(4); + _exit(1); + } + + if (get_mnt_ns_id(fd, &new_ns_id) != 0) + _exit(5); + + /* Fork again to test setns into the new namespace */ + inner_pid = fork(); + if (inner_pid < 0) + _exit(8); + + if (inner_pid == 0) { + /* Inner child: enter the new namespace */ + if (setns(fd, CLONE_NEWNS) < 0) + _exit(1); + _exit(0); + } + + if (waitpid(inner_pid, &inner_status, 0) != inner_pid) + _exit(9); + + if (!WIFEXITED(inner_status) || WEXITSTATUS(inner_status) != 0) + _exit(10); + + close(fd); + _exit(0); + } + + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_TRUE(WIFEXITED(status)); + + switch (WEXITSTATUS(status)) { + case 0: + /* Success */ + break; + case 1: + ASSERT_FALSE(true) TH_LOG("open_tree or setns failed in userns"); + break; + case 2: + SKIP(return, "setup_userns failed"); + break; + case 4: + SKIP(return, "OPEN_TREE_NAMESPACE not supported"); + break; + case 5: + ASSERT_FALSE(true) TH_LOG("Failed to get mount namespace ID"); + break; + case 8: + ASSERT_FALSE(true) TH_LOG("Inner fork failed"); + break; + case 9: + ASSERT_FALSE(true) TH_LOG("Inner waitpid failed"); + break; + case 10: + ASSERT_FALSE(true) TH_LOG("setns into new namespace failed"); + break; + default: + ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)", + WEXITSTATUS(status)); + break; + } +} + +TEST_F(open_tree_ns_userns, recursive_in_userns) +{ + pid_t pid; + int status; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int fd; + + /* Create new user namespace */ + if (enter_userns() != 0) + _exit(2); + + /* Test recursive flag in userns */ + fd = sys_open_tree(AT_FDCWD, "/", + OPEN_TREE_NAMESPACE | AT_RECURSIVE | OPEN_TREE_CLOEXEC); + if (fd < 0) { + if (errno == EINVAL) + _exit(4); + _exit(1); + } + + if (get_mnt_ns_id(fd, &new_ns_id) != 0) + _exit(5); + + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0); + if (nr_mounts < 0) + _exit(6); + + /* Recursive should copy submounts too */ + if (nr_mounts < 1) + _exit(7); + + close(fd); + _exit(0); + } + + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_TRUE(WIFEXITED(status)); + + switch (WEXITSTATUS(status)) { + case 0: + /* Success */ + break; + case 1: + ASSERT_FALSE(true) TH_LOG("open_tree(OPEN_TREE_NAMESPACE|AT_RECURSIVE) failed in userns"); + break; + case 2: + SKIP(return, "setup_userns failed"); + break; + case 4: + SKIP(return, "OPEN_TREE_NAMESPACE not supported"); + break; + case 5: + ASSERT_FALSE(true) TH_LOG("Failed to get mount namespace ID"); + break; + case 6: + ASSERT_FALSE(true) TH_LOG("listmount failed in new namespace"); + break; + case 7: + ASSERT_FALSE(true) TH_LOG("New namespace has no mounts"); + break; + default: + ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)", + WEXITSTATUS(status)); + break; + } +} + +TEST_F(open_tree_ns_userns, umount_fails_einval) +{ + pid_t pid; + int status; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int fd; + ssize_t i; + + /* Create new user namespace */ + if (enter_userns() != 0) + _exit(2); + + fd = sys_open_tree(AT_FDCWD, "/", + OPEN_TREE_NAMESPACE | AT_RECURSIVE | OPEN_TREE_CLOEXEC); + if (fd < 0) { + if (errno == EINVAL) + _exit(4); + _exit(1); + } + + if (get_mnt_ns_id(fd, &new_ns_id) != 0) + _exit(5); + + /* Get all mounts in the new namespace */ + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, LISTMOUNT_REVERSE); + if (nr_mounts < 0) + _exit(9); + + if (nr_mounts < 1) + _exit(10); + + /* Enter the new namespace */ + if (setns(fd, CLONE_NEWNS) < 0) + _exit(6); + + for (i = 0; i < nr_mounts; i++) { + struct statmount *sm; + const char *mnt_point; + + sm = statmount_alloc(list[i], new_ns_id, + STATMOUNT_MNT_POINT, 0); + if (!sm) + _exit(11); + + mnt_point = sm->str + sm->mnt_point; + + TH_LOG("Trying to umount %s", mnt_point); + if (umount2(mnt_point, MNT_DETACH) == 0) { + free(sm); + _exit(7); + } + + if (errno != EINVAL) { + /* Wrong error */ + free(sm); + _exit(8); + } + + free(sm); + } + + close(fd); + _exit(0); + } + + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_TRUE(WIFEXITED(status)); + + switch (WEXITSTATUS(status)) { + case 0: + break; + case 1: + ASSERT_FALSE(true) TH_LOG("open_tree(OPEN_TREE_NAMESPACE) failed"); + break; + case 2: + SKIP(return, "setup_userns failed"); + break; + case 4: + SKIP(return, "OPEN_TREE_NAMESPACE not supported"); + break; + case 5: + ASSERT_FALSE(true) TH_LOG("Failed to get mount namespace ID"); + break; + case 6: + ASSERT_FALSE(true) TH_LOG("setns into new namespace failed"); + break; + case 7: + ASSERT_FALSE(true) TH_LOG("umount succeeded but should have failed with EINVAL"); + break; + case 8: + ASSERT_FALSE(true) TH_LOG("umount failed with wrong error (expected EINVAL)"); + break; + case 9: + ASSERT_FALSE(true) TH_LOG("listmount failed"); + break; + case 10: + ASSERT_FALSE(true) TH_LOG("No mounts in new namespace"); + break; + case 11: + ASSERT_FALSE(true) TH_LOG("statmount_alloc failed"); + break; + default: + ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)", + WEXITSTATUS(status)); + break; + } +} + +TEST_F(open_tree_ns_userns, umount_succeeds) +{ + pid_t pid; + int status; + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int fd; + ssize_t i; + + if (unshare(CLONE_NEWNS)) + _exit(1); + + if (sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) != 0) + _exit(1); + + fd = sys_open_tree(AT_FDCWD, "/", + OPEN_TREE_NAMESPACE | AT_RECURSIVE | OPEN_TREE_CLOEXEC); + if (fd < 0) { + if (errno == EINVAL) + _exit(4); + _exit(1); + } + + if (get_mnt_ns_id(fd, &new_ns_id) != 0) + _exit(5); + + /* Get all mounts in the new namespace */ + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, LISTMOUNT_REVERSE); + if (nr_mounts < 0) + _exit(9); + + if (nr_mounts < 1) + _exit(10); + + /* Enter the new namespace */ + if (setns(fd, CLONE_NEWNS) < 0) + _exit(6); + + for (i = 0; i < nr_mounts; i++) { + struct statmount *sm; + const char *mnt_point; + + sm = statmount_alloc(list[i], new_ns_id, + STATMOUNT_MNT_POINT, 0); + if (!sm) + _exit(11); + + mnt_point = sm->str + sm->mnt_point; + + TH_LOG("Trying to umount %s", mnt_point); + if (umount2(mnt_point, MNT_DETACH) != 0) { + free(sm); + _exit(7); + } + + free(sm); + } + + close(fd); + _exit(0); + } + + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_TRUE(WIFEXITED(status)); + + switch (WEXITSTATUS(status)) { + case 0: + break; + case 1: + ASSERT_FALSE(true) TH_LOG("open_tree(OPEN_TREE_NAMESPACE) failed"); + break; + case 2: + SKIP(return, "setup_userns failed"); + break; + case 4: + SKIP(return, "OPEN_TREE_NAMESPACE not supported"); + break; + case 5: + ASSERT_FALSE(true) TH_LOG("Failed to get mount namespace ID"); + break; + case 6: + ASSERT_FALSE(true) TH_LOG("setns into new namespace failed"); + break; + case 7: + ASSERT_FALSE(true) TH_LOG("umount failed but should have succeeded"); + break; + case 9: + ASSERT_FALSE(true) TH_LOG("listmount failed"); + break; + case 10: + ASSERT_FALSE(true) TH_LOG("No mounts in new namespace"); + break; + case 11: + ASSERT_FALSE(true) TH_LOG("statmount_alloc failed"); + break; + default: + ASSERT_FALSE(true) TH_LOG("Unexpected error in child (exit %d)", + WEXITSTATUS(status)); + break; + } +} + +FIXTURE(open_tree_ns_unbindable) +{ + char tmpdir[PATH_MAX]; + bool mounted; +}; + +FIXTURE_SETUP(open_tree_ns_unbindable) +{ + int ret; + + self->mounted = false; + + /* Check if open_tree syscall is supported */ + ret = sys_open_tree(-1, NULL, 0); + if (ret == -1 && errno == ENOSYS) + SKIP(return, "open_tree() syscall not supported"); + + /* Create a temporary directory for the test mount */ + snprintf(self->tmpdir, sizeof(self->tmpdir), + "/tmp/open_tree_ns_test.XXXXXX"); + ASSERT_NE(mkdtemp(self->tmpdir), NULL); + + /* Mount tmpfs there */ + ret = mount("tmpfs", self->tmpdir, "tmpfs", 0, NULL); + if (ret < 0) { + rmdir(self->tmpdir); + SKIP(return, "Failed to mount tmpfs"); + } + self->mounted = true; + + ret = mount(NULL, self->tmpdir, NULL, MS_UNBINDABLE, NULL); + if (ret < 0) { + rmdir(self->tmpdir); + SKIP(return, "Failed to make tmpfs unbindable"); + } +} + +FIXTURE_TEARDOWN(open_tree_ns_unbindable) +{ + if (self->mounted) + umount2(self->tmpdir, MNT_DETACH); + rmdir(self->tmpdir); +} + +TEST_F(open_tree_ns_unbindable, fails_on_unbindable) +{ + int fd; + + fd = sys_open_tree(AT_FDCWD, self->tmpdir, + OPEN_TREE_NAMESPACE | OPEN_TREE_CLOEXEC); + ASSERT_LT(fd, 0); +} + +TEST_F(open_tree_ns_unbindable, recursive_skips_on_unbindable) +{ + uint64_t new_ns_id; + uint64_t list[256]; + ssize_t nr_mounts; + int fd; + ssize_t i; + bool found_unbindable = false; + + fd = sys_open_tree(AT_FDCWD, "/", + OPEN_TREE_NAMESPACE | AT_RECURSIVE | OPEN_TREE_CLOEXEC); + ASSERT_GT(fd, 0); + + ASSERT_EQ(get_mnt_ns_id(fd, &new_ns_id), 0); + + nr_mounts = listmount(LSMT_ROOT, new_ns_id, 0, list, 256, 0); + ASSERT_GE(nr_mounts, 0) { + TH_LOG("listmount failed: %m"); + } + + /* + * Iterate through all mounts in the new namespace and verify + * the unbindable tmpfs mount was silently dropped. + */ + for (i = 0; i < nr_mounts; i++) { + struct statmount *sm; + const char *mnt_point; + + sm = statmount_alloc(list[i], new_ns_id, STATMOUNT_MNT_POINT, 0); + ASSERT_NE(sm, NULL) { + TH_LOG("statmount_alloc failed for mnt_id %llu", + (unsigned long long)list[i]); + } + + mnt_point = sm->str + sm->mnt_point; + + if (strcmp(mnt_point, self->tmpdir) == 0) { + TH_LOG("Found unbindable mount at %s (should have been dropped)", + mnt_point); + found_unbindable = true; + } + + free(sm); + } + + ASSERT_FALSE(found_unbindable) { + TH_LOG("Unbindable mount at %s was not dropped", self->tmpdir); + } + + close(fd); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile index 6c661232b3b5..d3ad4a77db9b 100644 --- a/tools/testing/selftests/filesystems/overlayfs/Makefile +++ b/tools/testing/selftests/filesystems/overlayfs/Makefile @@ -4,7 +4,7 @@ CFLAGS += -Wall CFLAGS += $(KHDR_INCLUDES) LDLIBS += -lcap -LOCAL_HDRS += wrappers.h log.h +LOCAL_HDRS += ../wrappers.h log.h TEST_GEN_PROGS := dev_in_maps TEST_GEN_PROGS += set_layers_via_fds diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c index 3b796264223f..8924cea6aa4b 100644 --- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c +++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c @@ -15,9 +15,9 @@ #include <sched.h> #include <fcntl.h> -#include "../../kselftest.h" +#include "kselftest.h" #include "log.h" -#include "wrappers.h" +#include "../wrappers.h" static long get_file_dev_and_inode(void *addr, struct statx *stx) { diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c index 5074e64e74a8..3c0b93183348 100644 --- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c +++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c @@ -12,11 +12,11 @@ #include <sys/mount.h> #include <unistd.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" #include "../../pidfd/pidfd.h" #include "log.h" #include "../utils.h" -#include "wrappers.h" +#include "../wrappers.h" FIXTURE(set_layers_via_fds) { int pidfd; diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile index 14ee91a41650..8e354fe99b44 100644 --- a/tools/testing/selftests/filesystems/statmount/Makefile +++ b/tools/testing/selftests/filesystems/statmount/Makefile @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +LDLIBS += -lcap + TEST_GEN_PROGS := statmount_test statmount_test_ns listmount_test include ../../lib.mk + +$(OUTPUT)/statmount_test_ns: ../utils.c diff --git a/tools/testing/selftests/filesystems/statmount/listmount_test.c b/tools/testing/selftests/filesystems/statmount/listmount_test.c index 15f0834f7557..8bc82f38c42f 100644 --- a/tools/testing/selftests/filesystems/statmount/listmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/listmount_test.c @@ -11,7 +11,7 @@ #include <unistd.h> #include "statmount.h" -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" #ifndef LISTMOUNT_REVERSE #define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h index a7a5289ddae9..675f7cc00076 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount.h +++ b/tools/testing/selftests/filesystems/statmount/statmount.h @@ -3,23 +3,68 @@ #ifndef __STATMOUNT_H #define __STATMOUNT_H +#include <errno.h> #include <stdint.h> +#include <stdlib.h> #include <linux/mount.h> #include <asm/unistd.h> -static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask, - struct statmount *buf, size_t bufsize, +#define STATMOUNT_BUFSIZE (1 << 15) + +#ifndef __NR_statmount + #if defined __alpha__ + #define __NR_statmount 567 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_statmount 4457 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_statmount 6457 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_statmount 5457 + #endif + #else + #define __NR_statmount 457 + #endif +#endif + +#ifndef __NR_listmount + #if defined __alpha__ + #define __NR_listmount 568 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_listmount 4458 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_listmount 6458 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_listmount 5458 + #endif + #else + #define __NR_listmount 458 + #endif +#endif + +static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint32_t fd, + uint64_t mask, struct statmount *buf, size_t bufsize, unsigned int flags) { struct mnt_id_req req = { .size = MNT_ID_REQ_SIZE_VER0, - .mnt_id = mnt_id, .param = mask, }; - if (mnt_ns_id) { + if (flags & STATMOUNT_BY_FD) { req.size = MNT_ID_REQ_SIZE_VER1; - req.mnt_ns_id = mnt_ns_id; + req.mnt_fd = fd; + } else { + req.mnt_id = mnt_id; + if (mnt_ns_id) { + req.size = MNT_ID_REQ_SIZE_VER1; + req.mnt_ns_id = mnt_ns_id; + } } return syscall(__NR_statmount, &req, buf, bufsize, flags); @@ -43,4 +88,51 @@ static inline ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id, return syscall(__NR_listmount, &req, list, num, flags); } +static inline struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mnt_ns_id, + uint64_t mask, unsigned int flags) +{ + struct statmount *buf; + size_t bufsize = STATMOUNT_BUFSIZE; + int ret; + + for (;;) { + buf = malloc(bufsize); + if (!buf) + return NULL; + + ret = statmount(mnt_id, mnt_ns_id, 0, mask, buf, bufsize, flags); + if (ret == 0) + return buf; + + free(buf); + if (errno != EOVERFLOW) + return NULL; + + bufsize <<= 1; + } +} + +static inline struct statmount *statmount_alloc_by_fd(int fd, uint64_t mask) +{ + struct statmount *buf; + size_t bufsize = STATMOUNT_BUFSIZE; + int ret; + + for (;;) { + buf = malloc(bufsize); + if (!buf) + return NULL; + + ret = statmount(0, 0, fd, mask, buf, bufsize, STATMOUNT_BY_FD); + if (ret == 0) + return buf; + + free(buf); + if (errno != EOVERFLOW) + return NULL; + + bufsize <<= 1; + } +} + #endif /* __STATMOUNT_H */ diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index 46d289611ce8..8dc018d47a93 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -13,7 +13,7 @@ #include <linux/stat.h> #include "statmount.h" -#include "../../kselftest.h" +#include "kselftest.h" static const char *const known_fs[] = { "9p", "adfs", "affs", "afs", "aio", "anon_inodefs", "apparmorfs", @@ -26,43 +26,12 @@ static const char *const known_fs[] = { "hfsplus", "hostfs", "hpfs", "hugetlbfs", "ibmasmfs", "iomem", "ipathfs", "iso9660", "jffs2", "jfs", "minix", "mqueue", "msdos", "nfs", "nfs4", "nfsd", "nilfs2", "nsfs", "ntfs", "ntfs3", "ocfs2", - "ocfs2_dlmfs", "ocxlflash", "omfs", "openpromfs", "overlay", "pipefs", - "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", - "resctrl", "romfs", "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", - "securityfs", "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", - "squashfs", "sysfs", "sysv", "tmpfs", "tracefs", "ubifs", "udf", - "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs", - "zonefs", NULL }; - -static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags) -{ - size_t bufsize = 1 << 15; - struct statmount *buf = NULL, *tmp = alloca(bufsize); - int tofree = 0; - int ret; - - for (;;) { - ret = statmount(mnt_id, 0, mask, tmp, bufsize, flags); - if (ret != -1) - break; - if (tofree) - free(tmp); - if (errno != EOVERFLOW) - return NULL; - bufsize <<= 1; - tofree = 1; - tmp = malloc(bufsize); - if (!tmp) - return NULL; - } - buf = malloc(tmp->size); - if (buf) - memcpy(buf, tmp, tmp->size); - if (tofree) - free(tmp); - - return buf; -} + "ocfs2_dlmfs", "omfs", "openpromfs", "overlay", "pipefs", "proc", + "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "resctrl", "romfs", + "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", "securityfs", + "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", "squashfs", "sysfs", + "sysv", "tmpfs", "tracefs", "ubifs", "udf", "ufs", "v7", "vboxsf", + "vfat", "virtiofs", "vxfs", "xenfs", "xfs", "zonefs", NULL }; static void write_file(const char *path, const char *val) { @@ -238,7 +207,7 @@ static void test_statmount_zero_mask(void) struct statmount sm; int ret; - ret = statmount(root_id, 0, 0, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, 0, 0, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount zero mask: %s\n", strerror(errno)); @@ -264,7 +233,7 @@ static void test_statmount_mnt_basic(void) int ret; uint64_t mask = STATMOUNT_MNT_BASIC; - ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, 0, mask, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount mnt basic: %s\n", strerror(errno)); @@ -324,7 +293,7 @@ static void test_statmount_sb_basic(void) struct statx sx; struct statfs sf; - ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, 0, mask, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount sb basic: %s\n", strerror(errno)); @@ -376,7 +345,7 @@ static void test_statmount_mnt_point(void) { struct statmount *sm; - sm = statmount_alloc(root_id, STATMOUNT_MNT_POINT, 0); + sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_POINT, 0); if (!sm) { ksft_test_result_fail("statmount mount point: %s\n", strerror(errno)); @@ -406,7 +375,7 @@ static void test_statmount_mnt_root(void) assert(last_dir); last_dir++; - sm = statmount_alloc(root_id, STATMOUNT_MNT_ROOT, 0); + sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_ROOT, 0); if (!sm) { ksft_test_result_fail("statmount mount root: %s\n", strerror(errno)); @@ -439,7 +408,7 @@ static void test_statmount_fs_type(void) const char *fs_type; const char *const *s; - sm = statmount_alloc(root_id, STATMOUNT_FS_TYPE, 0); + sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0); if (!sm) { ksft_test_result_fail("statmount fs type: %s\n", strerror(errno)); @@ -468,7 +437,7 @@ static void test_statmount_mnt_opts(void) char *line = NULL; size_t len = 0; - sm = statmount_alloc(root_id, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS, + sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS, 0); if (!sm) { ksft_test_result_fail("statmount mnt opts: %s\n", @@ -558,7 +527,7 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name) uint32_t start, i; int ret; - sm = statmount_alloc(root_id, mask, 0); + sm = statmount_alloc(root_id, 0, mask, 0); if (!sm) { ksft_test_result_fail("statmount %s: %s\n", name, strerror(errno)); @@ -587,14 +556,14 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name) exactsize = sm->size; shortsize = sizeof(*sm) + i; - ret = statmount(root_id, 0, mask, sm, exactsize, 0); + ret = statmount(root_id, 0, 0, mask, sm, exactsize, 0); if (ret == -1) { ksft_test_result_fail("statmount exact size: %s\n", strerror(errno)); goto out; } errno = 0; - ret = statmount(root_id, 0, mask, sm, shortsize, 0); + ret = statmount(root_id, 0, 0, mask, sm, shortsize, 0); if (ret != -1 || errno != EOVERFLOW) { ksft_test_result_fail("should have failed with EOVERFLOW: %s\n", strerror(errno)); @@ -659,6 +628,226 @@ static void test_listmount_tree(void) ksft_test_result_pass("listmount tree\n"); } +static void test_statmount_by_fd(void) +{ + struct statmount *sm = NULL; + char tmpdir[] = "/statmount.fd.XXXXXX"; + const char root[] = "/test"; + char subdir[PATH_MAX], tmproot[PATH_MAX]; + int fd; + + if (!mkdtemp(tmpdir)) { + ksft_perror("mkdtemp"); + return; + } + + if (mount("statmount.test", tmpdir, "tmpfs", 0, NULL)) { + ksft_perror("mount"); + rmdir(tmpdir); + return; + } + + snprintf(subdir, PATH_MAX, "%s%s", tmpdir, root); + snprintf(tmproot, PATH_MAX, "%s/%s", tmpdir, "chroot"); + + if (mkdir(subdir, 0755)) { + ksft_perror("mkdir"); + goto err_tmpdir; + } + + if (mount(subdir, subdir, NULL, MS_BIND, 0)) { + ksft_perror("mount"); + goto err_subdir; + } + + if (mkdir(tmproot, 0755)) { + ksft_perror("mkdir"); + goto err_subdir; + } + + fd = open(subdir, O_PATH); + if (fd < 0) { + ksft_perror("open"); + goto err_tmproot; + } + + if (chroot(tmproot)) { + ksft_perror("chroot"); + goto err_fd; + } + + sm = statmount_alloc_by_fd(fd, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT); + if (!sm) { + ksft_test_result_fail("statmount by fd failed: %s\n", strerror(errno)); + goto err_chroot; + } + + if (sm->size < sizeof(*sm)) { + ksft_test_result_fail("unexpected size: %u < %u\n", + sm->size, (uint32_t) sizeof(*sm)); + goto err_chroot; + } + + if (sm->mask & STATMOUNT_MNT_POINT) { + ksft_test_result_fail("STATMOUNT_MNT_POINT unexpectedly set in statmount\n"); + goto err_chroot; + } + + if (!(sm->mask & STATMOUNT_MNT_ROOT)) { + ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in statmount\n"); + goto err_chroot; + } + + if (strcmp(root, sm->str + sm->mnt_root) != 0) { + ksft_test_result_fail("statmount returned incorrect mnt_root," + "statmount mnt_root: %s != %s\n", + sm->str + sm->mnt_root, root); + goto err_chroot; + } + + if (chroot(".")) { + ksft_perror("chroot"); + goto out; + } + + free(sm); + sm = statmount_alloc_by_fd(fd, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT); + if (!sm) { + ksft_test_result_fail("statmount by fd failed: %s\n", strerror(errno)); + goto err_fd; + } + + if (sm->size < sizeof(*sm)) { + ksft_test_result_fail("unexpected size: %u < %u\n", + sm->size, (uint32_t) sizeof(*sm)); + goto out; + } + + if (!(sm->mask & STATMOUNT_MNT_POINT)) { + ksft_test_result_fail("STATMOUNT_MNT_POINT not set in statmount\n"); + goto out; + } + + if (!(sm->mask & STATMOUNT_MNT_ROOT)) { + ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in statmount\n"); + goto out; + } + + if (strcmp(subdir, sm->str + sm->mnt_point) != 0) { + ksft_test_result_fail("statmount returned incorrect mnt_point," + "statmount mnt_point: %s != %s\n", sm->str + sm->mnt_point, subdir); + goto out; + } + + if (strcmp(root, sm->str + sm->mnt_root) != 0) { + ksft_test_result_fail("statmount returned incorrect mnt_root," + "statmount mnt_root: %s != %s\n", sm->str + sm->mnt_root, root); + goto out; + } + + ksft_test_result_pass("statmount by fd\n"); + goto out; +err_chroot: + chroot("."); +out: + free(sm); +err_fd: + close(fd); +err_tmproot: + rmdir(tmproot); +err_subdir: + umount2(subdir, MNT_DETACH); + rmdir(subdir); +err_tmpdir: + umount2(tmpdir, MNT_DETACH); + rmdir(tmpdir); +} + +static void test_statmount_by_fd_unmounted(void) +{ + const char root[] = "/test.unmounted"; + char tmpdir[] = "/statmount.fd.XXXXXX"; + char subdir[PATH_MAX]; + int fd; + struct statmount *sm = NULL; + + if (!mkdtemp(tmpdir)) { + ksft_perror("mkdtemp"); + return; + } + + if (mount("statmount.test", tmpdir, "tmpfs", 0, NULL)) { + ksft_perror("mount"); + rmdir(tmpdir); + return; + } + + snprintf(subdir, PATH_MAX, "%s%s", tmpdir, root); + + if (mkdir(subdir, 0755)) { + ksft_perror("mkdir"); + goto err_tmpdir; + } + + if (mount(subdir, subdir, 0, MS_BIND, NULL)) { + ksft_perror("mount"); + goto err_subdir; + } + + fd = open(subdir, O_PATH); + if (fd < 0) { + ksft_perror("open"); + goto err_subdir; + } + + if (umount2(tmpdir, MNT_DETACH)) { + ksft_perror("umount2"); + goto err_fd; + } + + sm = statmount_alloc_by_fd(fd, STATMOUNT_MNT_POINT | STATMOUNT_MNT_ROOT); + if (!sm) { + ksft_test_result_fail("statmount by fd unmounted: %s\n", + strerror(errno)); + goto err_sm; + } + + if (sm->size < sizeof(*sm)) { + ksft_test_result_fail("unexpected size: %u < %u\n", + sm->size, (uint32_t) sizeof(*sm)); + goto err_sm; + } + + if (sm->mask & STATMOUNT_MNT_POINT) { + ksft_test_result_fail("STATMOUNT_MNT_POINT unexpectedly set in mask\n"); + goto err_sm; + } + + if (!(sm->mask & STATMOUNT_MNT_ROOT)) { + ksft_test_result_fail("STATMOUNT_MNT_ROOT not set in mask\n"); + goto err_sm; + } + + if (strcmp(sm->str + sm->mnt_root, root) != 0) { + ksft_test_result_fail("statmount returned incorrect mnt_root," + "statmount mnt_root: %s != %s\n", + sm->str + sm->mnt_root, root); + goto err_sm; + } + + ksft_test_result_pass("statmount by fd on unmounted mount\n"); +err_sm: + free(sm); +err_fd: + close(fd); +err_subdir: + umount2(subdir, MNT_DETACH); + rmdir(subdir); +err_tmpdir: + umount2(tmpdir, MNT_DETACH); + rmdir(tmpdir); +} + #define str_off(memb) (offsetof(struct statmount, memb) / sizeof(uint32_t)) int main(void) @@ -670,14 +859,14 @@ int main(void) ksft_print_header(); - ret = statmount(0, 0, 0, NULL, 0, 0); + ret = statmount(0, 0, 0, 0, NULL, 0, 0); assert(ret == -1); if (errno == ENOSYS) ksft_exit_skip("statmount() syscall not supported\n"); setup_namespace(); - ksft_set_plan(15); + ksft_set_plan(17); test_listmount_empty_root(); test_statmount_zero_mask(); test_statmount_mnt_basic(); @@ -694,6 +883,8 @@ int main(void) test_statmount_string(all_mask, str_off(fs_type), "fs type & all"); test_listmount_tree(); + test_statmount_by_fd_unmounted(); + test_statmount_by_fd(); if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c index 70cb0c8b21cf..e500905e4c07 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c @@ -14,7 +14,8 @@ #include <linux/stat.h> #include "statmount.h" -#include "../../kselftest.h" +#include "../utils.h" +#include "kselftest.h" #define NSID_PASS 0 #define NSID_FAIL 1 @@ -33,31 +34,6 @@ static void handle_result(int ret, const char *testname) ksft_test_result_skip("%s\n", testname); } -static inline int wait_for_pid(pid_t pid) -{ - int status, ret; - -again: - ret = waitpid(pid, &status, 0); - if (ret == -1) { - if (errno == EINTR) - goto again; - - ksft_print_msg("waitpid returned -1, errno=%d\n", errno); - return -1; - } - - if (!WIFEXITED(status)) { - ksft_print_msg( - "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n", - WIFSIGNALED(status), WTERMSIG(status)); - return -1; - } - - ret = WEXITSTATUS(status); - return ret; -} - static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id) { int fd = open(mnt_ns, O_RDONLY); @@ -78,132 +54,147 @@ static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id) return NSID_PASS; } -static int get_mnt_id(const char *path, uint64_t *mnt_id) +static int setup_namespace(void) { - struct statx sx; - int ret; - - ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx); - if (ret == -1) { - ksft_print_msg("retrieving unique mount ID for %s: %s\n", path, - strerror(errno)); - return NSID_ERROR; - } - - if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) { - ksft_print_msg("no unique mount ID available for %s\n", path); + if (setup_userns() != 0) return NSID_ERROR; - } - *mnt_id = sx.stx_mnt_id; return NSID_PASS; } -static int write_file(const char *path, const char *val) +static int _test_statmount_mnt_ns_id(void) { - int fd = open(path, O_WRONLY); - size_t len = strlen(val); + struct statmount sm; + uint64_t mnt_ns_id; + uint64_t root_id; int ret; - if (fd == -1) { - ksft_print_msg("opening %s for write: %s\n", path, strerror(errno)); - return NSID_ERROR; - } + ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id); + if (ret != NSID_PASS) + return ret; - ret = write(fd, val, len); - if (ret == -1) { - ksft_print_msg("writing to %s: %s\n", path, strerror(errno)); + root_id = get_unique_mnt_id("/"); + if (!root_id) return NSID_ERROR; - } - if (ret != len) { - ksft_print_msg("short write to %s\n", path); - return NSID_ERROR; - } - ret = close(fd); + ret = statmount(root_id, 0, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0); if (ret == -1) { - ksft_print_msg("closing %s\n", path); + ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno)); return NSID_ERROR; } - return NSID_PASS; -} - -static int setup_namespace(void) -{ - int ret; - char buf[32]; - uid_t uid = getuid(); - gid_t gid = getgid(); - - ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID); - if (ret == -1) - ksft_exit_fail_msg("unsharing mountns and userns: %s\n", - strerror(errno)); - - sprintf(buf, "0 %d 1", uid); - ret = write_file("/proc/self/uid_map", buf); - if (ret != NSID_PASS) - return ret; - ret = write_file("/proc/self/setgroups", "deny"); - if (ret != NSID_PASS) - return ret; - sprintf(buf, "0 %d 1", gid); - ret = write_file("/proc/self/gid_map", buf); - if (ret != NSID_PASS) - return ret; + if (sm.size != sizeof(sm)) { + ksft_print_msg("unexpected size: %u != %u\n", sm.size, + (uint32_t)sizeof(sm)); + return NSID_FAIL; + } + if (sm.mask != STATMOUNT_MNT_NS_ID) { + ksft_print_msg("statmount mnt ns id unavailable\n"); + return NSID_SKIP; + } - ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); - if (ret == -1) { - ksft_print_msg("making mount tree private: %s\n", - strerror(errno)); - return NSID_ERROR; + if (sm.mnt_ns_id != mnt_ns_id) { + ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n", + (unsigned long long)sm.mnt_ns_id, + (unsigned long long)mnt_ns_id); + return NSID_FAIL; } return NSID_PASS; } -static int _test_statmount_mnt_ns_id(void) +static int _test_statmount_mnt_ns_id_by_fd(void) { struct statmount sm; uint64_t mnt_ns_id; - uint64_t root_id; - int ret; + int ret, fd, mounted = 1, status = NSID_ERROR; + char mnt[] = "/statmount.fd.XXXXXX"; ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id); if (ret != NSID_PASS) return ret; - ret = get_mnt_id("/", &root_id); - if (ret != NSID_PASS) - return ret; + if (!mkdtemp(mnt)) { + ksft_print_msg("statmount by fd mnt ns id mkdtemp: %s\n", strerror(errno)); + return NSID_ERROR; + } + + if (mount(mnt, mnt, NULL, MS_BIND, 0)) { + ksft_print_msg("statmount by fd mnt ns id mount: %s\n", strerror(errno)); + status = NSID_ERROR; + goto err; + } + + fd = open(mnt, O_PATH); + if (fd < 0) { + ksft_print_msg("statmount by fd mnt ns id open: %s\n", strerror(errno)); + goto err; + } - ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0); + ret = statmount(0, 0, fd, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), STATMOUNT_BY_FD); if (ret == -1) { - ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno)); - return NSID_ERROR; + ksft_print_msg("statmount mnt ns id statmount: %s\n", strerror(errno)); + status = NSID_ERROR; + goto out; } if (sm.size != sizeof(sm)) { ksft_print_msg("unexpected size: %u != %u\n", sm.size, (uint32_t)sizeof(sm)); - return NSID_FAIL; + status = NSID_FAIL; + goto out; } if (sm.mask != STATMOUNT_MNT_NS_ID) { ksft_print_msg("statmount mnt ns id unavailable\n"); - return NSID_SKIP; + status = NSID_SKIP; + goto out; } if (sm.mnt_ns_id != mnt_ns_id) { ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n", (unsigned long long)sm.mnt_ns_id, (unsigned long long)mnt_ns_id); - return NSID_FAIL; + status = NSID_FAIL; + goto out; } - return NSID_PASS; + mounted = 0; + if (umount2(mnt, MNT_DETACH)) { + ksft_print_msg("statmount by fd mnt ns id umount2: %s\n", strerror(errno)); + goto out; + } + + ret = statmount(0, 0, fd, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), STATMOUNT_BY_FD); + if (ret == -1) { + ksft_print_msg("statmount mnt ns id statmount: %s\n", strerror(errno)); + status = NSID_ERROR; + goto out; + } + + if (sm.size != sizeof(sm)) { + ksft_print_msg("unexpected size: %u != %u\n", sm.size, + (uint32_t)sizeof(sm)); + status = NSID_FAIL; + goto out; + } + + if (sm.mask == STATMOUNT_MNT_NS_ID) { + ksft_print_msg("unexpected STATMOUNT_MNT_NS_ID in mask\n"); + status = NSID_FAIL; + goto out; + } + + status = NSID_PASS; +out: + close(fd); + if (mounted) + umount2(mnt, MNT_DETACH); +err: + rmdir(mnt); + return status; } + static void test_statmount_mnt_ns_id(void) { pid_t pid; @@ -224,6 +215,9 @@ static void test_statmount_mnt_ns_id(void) if (ret != NSID_PASS) exit(ret); ret = _test_statmount_mnt_ns_id(); + if (ret != NSID_PASS) + exit(ret); + ret = _test_statmount_mnt_ns_id_by_fd(); exit(ret); } @@ -255,7 +249,7 @@ static int validate_external_listmount(pid_t pid, uint64_t child_nr_mounts) for (int i = 0; i < nr_mounts; i++) { struct statmount sm; - ret = statmount(list[i], mnt_ns_id, STATMOUNT_MNT_NS_ID, &sm, + ret = statmount(list[i], mnt_ns_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0); if (ret < 0) { ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno)); @@ -351,7 +345,7 @@ int main(void) int ret; ksft_print_header(); - ret = statmount(0, 0, 0, NULL, 0, 0); + ret = statmount(0, 0, 0, 0, NULL, 0, 0); assert(ret == -1); if (errno == ENOSYS) ksft_exit_skip("statmount() syscall not supported\n"); diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c index e553c89c5b19..d73d7d8171db 100644 --- a/tools/testing/selftests/filesystems/utils.c +++ b/tools/testing/selftests/filesystems/utils.c @@ -18,7 +18,10 @@ #include <sys/types.h> #include <sys/wait.h> #include <sys/xattr.h> +#include <sys/mount.h> +#include "kselftest.h" +#include "wrappers.h" #include "utils.h" #define MAX_USERNS_LEVEL 32 @@ -155,7 +158,7 @@ static int get_userns_fd_cb(void *data) _exit(0); } -static int wait_for_pid(pid_t pid) +int wait_for_pid(pid_t pid) { int status, ret; @@ -447,6 +450,97 @@ out_close: return fret; } +int write_file(const char *path, const char *val) +{ + int fd = open(path, O_WRONLY); + size_t len = strlen(val); + int ret; + + if (fd == -1) { + ksft_print_msg("opening %s for write: %s\n", path, strerror(errno)); + return -1; + } + + ret = write(fd, val, len); + if (ret == -1) { + ksft_print_msg("writing to %s: %s\n", path, strerror(errno)); + return -1; + } + if (ret != len) { + ksft_print_msg("short write to %s\n", path); + return -1; + } + + ret = close(fd); + if (ret == -1) { + ksft_print_msg("closing %s\n", path); + return -1; + } + + return 0; +} + +int setup_userns(void) +{ + int ret; + char buf[32]; + uid_t uid = getuid(); + gid_t gid = getgid(); + + ret = unshare(CLONE_NEWNS|CLONE_NEWUSER); + if (ret) { + ksft_exit_fail_msg("unsharing mountns and userns: %s\n", + strerror(errno)); + return ret; + } + + sprintf(buf, "0 %d 1", uid); + ret = write_file("/proc/self/uid_map", buf); + if (ret) + return ret; + ret = write_file("/proc/self/setgroups", "deny"); + if (ret) + return ret; + sprintf(buf, "0 %d 1", gid); + ret = write_file("/proc/self/gid_map", buf); + if (ret) + return ret; + + ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + if (ret) { + ksft_print_msg("making mount tree private: %s\n", strerror(errno)); + return ret; + } + + return 0; +} + +int enter_userns(void) +{ + int ret; + char buf[32]; + uid_t uid = getuid(); + gid_t gid = getgid(); + + ret = unshare(CLONE_NEWUSER); + if (ret) + return ret; + + sprintf(buf, "0 %d 1", uid); + ret = write_file("/proc/self/uid_map", buf); + if (ret) + return ret; + ret = write_file("/proc/self/setgroups", "deny"); + if (ret) + return ret; + sprintf(buf, "0 %d 1", gid); + ret = write_file("/proc/self/gid_map", buf); + if (ret) + return ret; + + return 0; +} + /* caps_down - lower all effective caps */ int caps_down(void) { @@ -499,3 +593,23 @@ out: cap_free(caps); return fret; } + +uint64_t get_unique_mnt_id(const char *path) +{ + struct statx sx; + int ret; + + ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx); + if (ret == -1) { + ksft_print_msg("retrieving unique mount ID for %s: %s\n", path, + strerror(errno)); + return 0; + } + + if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) { + ksft_print_msg("no unique mount ID available for %s\n", path); + return 0; + } + + return sx.stx_mnt_id; +} diff --git a/tools/testing/selftests/filesystems/utils.h b/tools/testing/selftests/filesystems/utils.h index 7f1df2a3e94c..d03085cef5cb 100644 --- a/tools/testing/selftests/filesystems/utils.h +++ b/tools/testing/selftests/filesystems/utils.h @@ -27,6 +27,8 @@ extern int caps_down(void); extern int cap_down(cap_value_t down); extern bool switch_ids(uid_t uid, gid_t gid); +extern int setup_userns(void); +extern int enter_userns(void); static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps) { @@ -42,4 +44,8 @@ static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps) return true; } +extern int wait_for_pid(pid_t pid); +extern int write_file(const char *path, const char *val); +extern uint64_t get_unique_mnt_id(const char *path); + #endif /* __IDMAP_UTILS_H */ diff --git a/tools/testing/selftests/filesystems/overlayfs/wrappers.h b/tools/testing/selftests/filesystems/wrappers.h index c38bc48e0cfa..420ae4f908cf 100644 --- a/tools/testing/selftests/filesystems/overlayfs/wrappers.h +++ b/tools/testing/selftests/filesystems/wrappers.h @@ -9,6 +9,10 @@ #include <linux/mount.h> #include <sys/syscall.h> +#ifndef STATX_MNT_ID_UNIQUE +#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ +#endif + static inline int sys_fsopen(const char *fsname, unsigned int flags) { return syscall(__NR_fsopen, fsname, flags); @@ -36,6 +40,28 @@ static inline int sys_mount(const char *src, const char *tgt, const char *fst, #define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ #endif +#ifndef MOVE_MOUNT_T_EMPTY_PATH +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#endif + +#ifndef __NR_move_mount + #if defined __alpha__ + #define __NR_move_mount 539 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_move_mount 4429 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_move_mount 6429 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_move_mount 5429 + #endif + #else + #define __NR_move_mount 429 + #endif +#endif + static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, unsigned int flags) @@ -53,7 +79,25 @@ static inline int sys_move_mount(int from_dfd, const char *from_pathname, #endif #ifndef AT_RECURSIVE -#define AT_RECURSIVE 0x8000 +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +#endif + +#ifndef __NR_open_tree + #if defined __alpha__ + #define __NR_open_tree 538 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree 4428 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree 6428 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree 5428 + #endif + #else + #define __NR_open_tree 428 + #endif #endif static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) diff --git a/tools/testing/selftests/filesystems/xattr/.gitignore b/tools/testing/selftests/filesystems/xattr/.gitignore new file mode 100644 index 000000000000..092d14094c0f --- /dev/null +++ b/tools/testing/selftests/filesystems/xattr/.gitignore @@ -0,0 +1,3 @@ +xattr_socket_test +xattr_sockfs_test +xattr_socket_types_test diff --git a/tools/testing/selftests/filesystems/xattr/Makefile b/tools/testing/selftests/filesystems/xattr/Makefile new file mode 100644 index 000000000000..95364ffb10e9 --- /dev/null +++ b/tools/testing/selftests/filesystems/xattr/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += $(KHDR_INCLUDES) +TEST_GEN_PROGS := xattr_socket_test xattr_sockfs_test xattr_socket_types_test + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/xattr/xattr_socket_test.c b/tools/testing/selftests/filesystems/xattr/xattr_socket_test.c new file mode 100644 index 000000000000..fac0a4c6bc05 --- /dev/null +++ b/tools/testing/selftests/filesystems/xattr/xattr_socket_test.c @@ -0,0 +1,470 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2026 Christian Brauner <brauner@kernel.org> +/* + * Test extended attributes on path-based Unix domain sockets. + * + * Path-based Unix domain sockets are bound to a filesystem path and their + * inodes live on the underlying filesystem (e.g. tmpfs). These tests verify + * that user.* and trusted.* xattr operations work correctly on them using + * path-based syscalls (setxattr, getxattr, etc.). + * + * Covers SOCK_STREAM, SOCK_DGRAM, and SOCK_SEQPACKET socket types. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/xattr.h> +#include <unistd.h> + +#include "../../kselftest_harness.h" + +#define TEST_XATTR_NAME "user.testattr" +#define TEST_XATTR_VALUE "testvalue" +#define TEST_XATTR_VALUE2 "newvalue" + +/* + * Fixture for path-based Unix domain socket tests. + * Creates a SOCK_STREAM socket bound to a path in /tmp (typically tmpfs). + */ +FIXTURE(xattr_socket) +{ + char socket_path[PATH_MAX]; + int sockfd; +}; + +FIXTURE_VARIANT(xattr_socket) +{ + int sock_type; + const char *name; +}; + +FIXTURE_VARIANT_ADD(xattr_socket, stream) { + .sock_type = SOCK_STREAM, + .name = "stream", +}; + +FIXTURE_VARIANT_ADD(xattr_socket, dgram) { + .sock_type = SOCK_DGRAM, + .name = "dgram", +}; + +FIXTURE_VARIANT_ADD(xattr_socket, seqpacket) { + .sock_type = SOCK_SEQPACKET, + .name = "seqpacket", +}; + +FIXTURE_SETUP(xattr_socket) +{ + struct sockaddr_un addr; + int ret; + + self->sockfd = -1; + + snprintf(self->socket_path, sizeof(self->socket_path), + "/tmp/xattr_socket_test_%s.%d", variant->name, getpid()); + unlink(self->socket_path); + + self->sockfd = socket(AF_UNIX, variant->sock_type, 0); + ASSERT_GE(self->sockfd, 0) { + TH_LOG("Failed to create socket: %s", strerror(errno)); + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, self->socket_path, sizeof(addr.sun_path) - 1); + + ret = bind(self->sockfd, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(ret, 0) { + TH_LOG("Failed to bind socket to %s: %s", + self->socket_path, strerror(errno)); + } +} + +FIXTURE_TEARDOWN(xattr_socket) +{ + if (self->sockfd >= 0) + close(self->sockfd); + unlink(self->socket_path); +} + +TEST_F(xattr_socket, set_user_xattr) +{ + int ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr failed: %s (errno=%d)", strerror(errno), errno); + } +} + +TEST_F(xattr_socket, get_user_xattr) +{ + char buf[256]; + ssize_t ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr failed: %s", strerror(errno)); + } + + memset(buf, 0, sizeof(buf)); + ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)) { + TH_LOG("getxattr returned %zd, expected %zu: %s", + ret, strlen(TEST_XATTR_VALUE), strerror(errno)); + } + ASSERT_STREQ(buf, TEST_XATTR_VALUE); +} + +TEST_F(xattr_socket, list_user_xattr) +{ + char list[1024]; + ssize_t ret; + bool found = false; + char *ptr; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr failed: %s", strerror(errno)); + } + + memset(list, 0, sizeof(list)); + ret = listxattr(self->socket_path, list, sizeof(list)); + ASSERT_GT(ret, 0) { + TH_LOG("listxattr failed: %s", strerror(errno)); + } + + for (ptr = list; ptr < list + ret; ptr += strlen(ptr) + 1) { + if (strcmp(ptr, TEST_XATTR_NAME) == 0) { + found = true; + break; + } + } + ASSERT_TRUE(found) { + TH_LOG("xattr %s not found in list", TEST_XATTR_NAME); + } +} + +TEST_F(xattr_socket, remove_user_xattr) +{ + char buf[256]; + ssize_t ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr failed: %s", strerror(errno)); + } + + ret = removexattr(self->socket_path, TEST_XATTR_NAME); + ASSERT_EQ(ret, 0) { + TH_LOG("removexattr failed: %s", strerror(errno)); + } + + ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA) { + TH_LOG("Expected ENODATA, got %s", strerror(errno)); + } +} + +/* + * Test that xattrs persist across socket close and reopen. + * The xattr is on the filesystem inode, not the socket fd. + */ +TEST_F(xattr_socket, xattr_persistence) +{ + char buf[256]; + ssize_t ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr failed: %s", strerror(errno)); + } + + close(self->sockfd); + self->sockfd = -1; + + memset(buf, 0, sizeof(buf)); + ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)) { + TH_LOG("getxattr after close failed: %s", strerror(errno)); + } + ASSERT_STREQ(buf, TEST_XATTR_VALUE); +} + +TEST_F(xattr_socket, update_user_xattr) +{ + char buf[256]; + ssize_t ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), 0); + ASSERT_EQ(ret, 0); + + memset(buf, 0, sizeof(buf)); + ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE2)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE2); +} + +TEST_F(xattr_socket, xattr_create_flag) +{ + int ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), XATTR_CREATE); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EEXIST); +} + +TEST_F(xattr_socket, xattr_replace_flag) +{ + int ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), XATTR_REPLACE); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); +} + +TEST_F(xattr_socket, multiple_xattrs) +{ + char buf[256]; + ssize_t ret; + int i; + char name[64], value[64]; + const int num_xattrs = 5; + + for (i = 0; i < num_xattrs; i++) { + snprintf(name, sizeof(name), "user.test%d", i); + snprintf(value, sizeof(value), "value%d", i); + ret = setxattr(self->socket_path, name, value, strlen(value), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr %s failed: %s", name, strerror(errno)); + } + } + + for (i = 0; i < num_xattrs; i++) { + snprintf(name, sizeof(name), "user.test%d", i); + snprintf(value, sizeof(value), "value%d", i); + memset(buf, 0, sizeof(buf)); + ret = getxattr(self->socket_path, name, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(value)); + ASSERT_STREQ(buf, value); + } +} + +TEST_F(xattr_socket, xattr_empty_value) +{ + char buf[256]; + ssize_t ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, "", 0, 0); + ASSERT_EQ(ret, 0); + + ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, 0); +} + +TEST_F(xattr_socket, xattr_get_size) +{ + ssize_t ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = getxattr(self->socket_path, TEST_XATTR_NAME, NULL, 0); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)); +} + +TEST_F(xattr_socket, xattr_buffer_too_small) +{ + char buf[2]; + ssize_t ret; + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ERANGE); +} + +TEST_F(xattr_socket, xattr_nonexistent) +{ + char buf[256]; + ssize_t ret; + + ret = getxattr(self->socket_path, "user.nonexistent", buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); +} + +TEST_F(xattr_socket, remove_nonexistent_xattr) +{ + int ret; + + ret = removexattr(self->socket_path, "user.nonexistent"); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); +} + +TEST_F(xattr_socket, large_xattr_value) +{ + char large_value[4096]; + char read_buf[4096]; + ssize_t ret; + + memset(large_value, 'A', sizeof(large_value)); + + ret = setxattr(self->socket_path, TEST_XATTR_NAME, + large_value, sizeof(large_value), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr with large value failed: %s", strerror(errno)); + } + + memset(read_buf, 0, sizeof(read_buf)); + ret = getxattr(self->socket_path, TEST_XATTR_NAME, + read_buf, sizeof(read_buf)); + ASSERT_EQ(ret, (ssize_t)sizeof(large_value)); + ASSERT_EQ(memcmp(large_value, read_buf, sizeof(large_value)), 0); +} + +/* + * Test lsetxattr/lgetxattr (don't follow symlinks). + * Socket files aren't symlinks, so this should work the same. + */ +TEST_F(xattr_socket, lsetxattr_lgetxattr) +{ + char buf[256]; + ssize_t ret; + + ret = lsetxattr(self->socket_path, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("lsetxattr failed: %s", strerror(errno)); + } + + memset(buf, 0, sizeof(buf)); + ret = lgetxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE); +} + +/* + * Fixture for trusted.* xattr tests. + * These require CAP_SYS_ADMIN. + */ +FIXTURE(xattr_socket_trusted) +{ + char socket_path[PATH_MAX]; + int sockfd; +}; + +FIXTURE_VARIANT(xattr_socket_trusted) +{ + int sock_type; + const char *name; +}; + +FIXTURE_VARIANT_ADD(xattr_socket_trusted, stream) { + .sock_type = SOCK_STREAM, + .name = "stream", +}; + +FIXTURE_VARIANT_ADD(xattr_socket_trusted, dgram) { + .sock_type = SOCK_DGRAM, + .name = "dgram", +}; + +FIXTURE_VARIANT_ADD(xattr_socket_trusted, seqpacket) { + .sock_type = SOCK_SEQPACKET, + .name = "seqpacket", +}; + +FIXTURE_SETUP(xattr_socket_trusted) +{ + struct sockaddr_un addr; + int ret; + + self->sockfd = -1; + + snprintf(self->socket_path, sizeof(self->socket_path), + "/tmp/xattr_socket_trusted_%s.%d", variant->name, getpid()); + unlink(self->socket_path); + + self->sockfd = socket(AF_UNIX, variant->sock_type, 0); + ASSERT_GE(self->sockfd, 0); + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, self->socket_path, sizeof(addr.sun_path) - 1); + + ret = bind(self->sockfd, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(xattr_socket_trusted) +{ + if (self->sockfd >= 0) + close(self->sockfd); + unlink(self->socket_path); +} + +TEST_F(xattr_socket_trusted, set_trusted_xattr) +{ + char buf[256]; + ssize_t len; + int ret; + + ret = setxattr(self->socket_path, "trusted.testattr", + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + if (ret == -1 && errno == EPERM) + SKIP(return, "Need CAP_SYS_ADMIN for trusted.* xattrs"); + ASSERT_EQ(ret, 0) { + TH_LOG("setxattr trusted.testattr failed: %s", strerror(errno)); + } + + memset(buf, 0, sizeof(buf)); + len = getxattr(self->socket_path, "trusted.testattr", + buf, sizeof(buf)); + ASSERT_EQ(len, (ssize_t)strlen(TEST_XATTR_VALUE)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE); +} + +TEST_F(xattr_socket_trusted, get_trusted_xattr_unprivileged) +{ + char buf[256]; + ssize_t ret; + + ret = getxattr(self->socket_path, "trusted.testattr", buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_TRUE(errno == ENODATA || errno == EPERM) { + TH_LOG("Expected ENODATA or EPERM, got %s", strerror(errno)); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c b/tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c new file mode 100644 index 000000000000..bfabe91b2ed1 --- /dev/null +++ b/tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2026 Christian Brauner <brauner@kernel.org> +/* + * Test user.* xattrs on various socket families. + * + * All socket types use sockfs for their inodes, so user.* xattrs should + * work on any socket regardless of address family. This tests AF_INET, + * AF_INET6, AF_NETLINK, AF_PACKET, and abstract namespace AF_UNIX sockets. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/xattr.h> +#include <linux/netlink.h> +#include <unistd.h> + +#include "../../kselftest_harness.h" + +#define TEST_XATTR_NAME "user.testattr" +#define TEST_XATTR_VALUE "testvalue" + +FIXTURE(xattr_socket_types) +{ + int sockfd; +}; + +FIXTURE_VARIANT(xattr_socket_types) +{ + int family; + int type; + int protocol; +}; + +FIXTURE_VARIANT_ADD(xattr_socket_types, inet) { + .family = AF_INET, + .type = SOCK_STREAM, + .protocol = 0, +}; + +FIXTURE_VARIANT_ADD(xattr_socket_types, inet6) { + .family = AF_INET6, + .type = SOCK_STREAM, + .protocol = 0, +}; + +FIXTURE_VARIANT_ADD(xattr_socket_types, netlink) { + .family = AF_NETLINK, + .type = SOCK_RAW, + .protocol = NETLINK_USERSOCK, +}; + +FIXTURE_VARIANT_ADD(xattr_socket_types, packet) { + .family = AF_PACKET, + .type = SOCK_DGRAM, + .protocol = 0, +}; + +FIXTURE_SETUP(xattr_socket_types) +{ + self->sockfd = socket(variant->family, variant->type, + variant->protocol); + if (self->sockfd < 0 && + (errno == EAFNOSUPPORT || errno == EPERM || errno == EACCES)) + SKIP(return, "socket(%d, %d, %d) not available: %s", + variant->family, variant->type, variant->protocol, + strerror(errno)); + ASSERT_GE(self->sockfd, 0) { + TH_LOG("Failed to create socket(%d, %d, %d): %s", + variant->family, variant->type, variant->protocol, + strerror(errno)); + } +} + +FIXTURE_TEARDOWN(xattr_socket_types) +{ + if (self->sockfd >= 0) + close(self->sockfd); +} + +TEST_F(xattr_socket_types, set_get_list_remove) +{ + char buf[256], list[4096], *ptr; + ssize_t ret; + bool found; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("fsetxattr failed: %s", strerror(errno)); + } + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE); + + memset(list, 0, sizeof(list)); + ret = flistxattr(self->sockfd, list, sizeof(list)); + ASSERT_GT(ret, 0); + found = false; + for (ptr = list; ptr < list + ret; ptr += strlen(ptr) + 1) { + if (strcmp(ptr, TEST_XATTR_NAME) == 0) + found = true; + } + ASSERT_TRUE(found); + + ret = fremovexattr(self->sockfd, TEST_XATTR_NAME); + ASSERT_EQ(ret, 0); + + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); +} + +/* + * Test abstract namespace AF_UNIX socket. + * Abstract sockets don't have a filesystem path; their inodes live in + * sockfs so user.* xattrs should work via fsetxattr/fgetxattr. + */ +FIXTURE(xattr_abstract) +{ + int sockfd; +}; + +FIXTURE_SETUP(xattr_abstract) +{ + struct sockaddr_un addr; + char name[64]; + int ret, len; + + self->sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_GE(self->sockfd, 0); + + len = snprintf(name, sizeof(name), "xattr_test_abstract_%d", getpid()); + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + addr.sun_path[0] = '\0'; + memcpy(&addr.sun_path[1], name, len); + + ret = bind(self->sockfd, (struct sockaddr *)&addr, + offsetof(struct sockaddr_un, sun_path) + 1 + len); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(xattr_abstract) +{ + if (self->sockfd >= 0) + close(self->sockfd); +} + +TEST_F(xattr_abstract, set_get) +{ + char buf[256]; + ssize_t ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("fsetxattr on abstract socket failed: %s", + strerror(errno)); + } + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c b/tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c new file mode 100644 index 000000000000..b4824b01a86d --- /dev/null +++ b/tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c @@ -0,0 +1,363 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2026 Christian Brauner <brauner@kernel.org> +/* + * Test extended attributes on sockfs sockets. + * + * Sockets created via socket() have their inodes in sockfs, which supports + * user.* xattrs with per-inode limits: up to 128 xattrs and 128KB total + * value size. These tests verify xattr operations via fsetxattr/fgetxattr/ + * flistxattr/fremovexattr on the socket fd, as well as limit enforcement. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/xattr.h> +#include <unistd.h> + +#include "../../kselftest_harness.h" + +#define TEST_XATTR_NAME "user.testattr" +#define TEST_XATTR_VALUE "testvalue" +#define TEST_XATTR_VALUE2 "newvalue" + +/* Per-inode limits for user.* xattrs on sockfs (from include/linux/xattr.h) */ +#define SIMPLE_XATTR_MAX_NR 128 +#define SIMPLE_XATTR_MAX_SIZE (128 << 10) /* 128 KB */ + +#ifndef XATTR_SIZE_MAX +#define XATTR_SIZE_MAX 65536 +#endif + +/* + * Fixture for sockfs socket xattr tests. + * Creates an AF_UNIX socket (lives in sockfs, not bound to any path). + */ +FIXTURE(xattr_sockfs) +{ + int sockfd; +}; + +FIXTURE_SETUP(xattr_sockfs) +{ + self->sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_GE(self->sockfd, 0) { + TH_LOG("Failed to create socket: %s", strerror(errno)); + } +} + +FIXTURE_TEARDOWN(xattr_sockfs) +{ + if (self->sockfd >= 0) + close(self->sockfd); +} + +TEST_F(xattr_sockfs, set_get_user_xattr) +{ + char buf[256]; + ssize_t ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("fsetxattr failed: %s", strerror(errno)); + } + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)) { + TH_LOG("fgetxattr returned %zd: %s", ret, strerror(errno)); + } + ASSERT_STREQ(buf, TEST_XATTR_VALUE); +} + +/* + * Test listing xattrs on a sockfs socket. + * Should include user.* xattrs and system.sockprotoname. + */ +TEST_F(xattr_sockfs, list_user_xattr) +{ + char list[4096]; + ssize_t ret; + char *ptr; + bool found_user = false; + bool found_proto = false; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0) { + TH_LOG("fsetxattr failed: %s", strerror(errno)); + } + + memset(list, 0, sizeof(list)); + ret = flistxattr(self->sockfd, list, sizeof(list)); + ASSERT_GT(ret, 0) { + TH_LOG("flistxattr failed: %s", strerror(errno)); + } + + for (ptr = list; ptr < list + ret; ptr += strlen(ptr) + 1) { + if (strcmp(ptr, TEST_XATTR_NAME) == 0) + found_user = true; + if (strcmp(ptr, "system.sockprotoname") == 0) + found_proto = true; + } + ASSERT_TRUE(found_user) { + TH_LOG("user xattr not found in list"); + } + ASSERT_TRUE(found_proto) { + TH_LOG("system.sockprotoname not found in list"); + } +} + +TEST_F(xattr_sockfs, remove_user_xattr) +{ + char buf[256]; + ssize_t ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = fremovexattr(self->sockfd, TEST_XATTR_NAME); + ASSERT_EQ(ret, 0) { + TH_LOG("fremovexattr failed: %s", strerror(errno)); + } + + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); +} + +TEST_F(xattr_sockfs, update_user_xattr) +{ + char buf[256]; + ssize_t ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), 0); + ASSERT_EQ(ret, 0); + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE2)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE2); +} + +TEST_F(xattr_sockfs, xattr_create_flag) +{ + int ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), + XATTR_CREATE); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EEXIST); +} + +TEST_F(xattr_sockfs, xattr_replace_flag) +{ + int ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), + XATTR_REPLACE); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); +} + +TEST_F(xattr_sockfs, get_nonexistent) +{ + char buf[256]; + ssize_t ret; + + ret = fgetxattr(self->sockfd, "user.nonexistent", buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); +} + +TEST_F(xattr_sockfs, empty_value) +{ + ssize_t ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, "", 0, 0); + ASSERT_EQ(ret, 0); + + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, NULL, 0); + ASSERT_EQ(ret, 0); +} + +TEST_F(xattr_sockfs, get_size) +{ + ssize_t ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, NULL, 0); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)); +} + +TEST_F(xattr_sockfs, buffer_too_small) +{ + char buf[2]; + ssize_t ret; + + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ERANGE); +} + +/* + * Test maximum number of user.* xattrs per socket. + * The kernel enforces SIMPLE_XATTR_MAX_NR (128), so the 129th should + * fail with ENOSPC. + */ +TEST_F(xattr_sockfs, max_nr_xattrs) +{ + char name[32]; + int i, ret; + + for (i = 0; i < SIMPLE_XATTR_MAX_NR; i++) { + snprintf(name, sizeof(name), "user.test%03d", i); + ret = fsetxattr(self->sockfd, name, "v", 1, 0); + ASSERT_EQ(ret, 0) { + TH_LOG("fsetxattr %s failed at i=%d: %s", + name, i, strerror(errno)); + } + } + + ret = fsetxattr(self->sockfd, "user.overflow", "v", 1, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENOSPC) { + TH_LOG("Expected ENOSPC for xattr %d, got %s", + SIMPLE_XATTR_MAX_NR + 1, strerror(errno)); + } +} + +/* + * Test maximum total value size for user.* xattrs. + * The kernel enforces SIMPLE_XATTR_MAX_SIZE (128KB). Individual xattr + * values are limited to XATTR_SIZE_MAX (64KB) by the VFS, so we need + * at least two xattrs to hit the total limit. + */ +TEST_F(xattr_sockfs, max_xattr_size) +{ + char *value; + int ret; + + value = malloc(XATTR_SIZE_MAX); + ASSERT_NE(value, NULL); + memset(value, 'A', XATTR_SIZE_MAX); + + /* First 64KB xattr - total = 64KB */ + ret = fsetxattr(self->sockfd, "user.big1", value, XATTR_SIZE_MAX, 0); + ASSERT_EQ(ret, 0) { + TH_LOG("first large xattr failed: %s", strerror(errno)); + } + + /* Second 64KB xattr - total = 128KB (exactly at limit) */ + ret = fsetxattr(self->sockfd, "user.big2", value, XATTR_SIZE_MAX, 0); + free(value); + ASSERT_EQ(ret, 0) { + TH_LOG("second large xattr failed: %s", strerror(errno)); + } + + /* Third xattr with 1 byte - total > 128KB, should fail */ + ret = fsetxattr(self->sockfd, "user.big3", "v", 1, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENOSPC) { + TH_LOG("Expected ENOSPC when exceeding size limit, got %s", + strerror(errno)); + } +} + +/* + * Test that removing an xattr frees limit space, allowing re-addition. + */ +TEST_F(xattr_sockfs, limit_remove_readd) +{ + char name[32]; + int i, ret; + + /* Fill up to the maximum count */ + for (i = 0; i < SIMPLE_XATTR_MAX_NR; i++) { + snprintf(name, sizeof(name), "user.test%03d", i); + ret = fsetxattr(self->sockfd, name, "v", 1, 0); + ASSERT_EQ(ret, 0); + } + + /* Verify we're at the limit */ + ret = fsetxattr(self->sockfd, "user.overflow", "v", 1, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENOSPC); + + /* Remove one xattr */ + ret = fremovexattr(self->sockfd, "user.test000"); + ASSERT_EQ(ret, 0); + + /* Now we should be able to add one more */ + ret = fsetxattr(self->sockfd, "user.newattr", "v", 1, 0); + ASSERT_EQ(ret, 0) { + TH_LOG("re-add after remove failed: %s", strerror(errno)); + } +} + +/* + * Test that two different sockets have independent xattr limits. + */ +TEST_F(xattr_sockfs, limits_per_inode) +{ + char buf[256]; + int sock2; + ssize_t ret; + + sock2 = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_GE(sock2, 0); + + /* Set xattr on first socket */ + ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, + TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0); + ASSERT_EQ(ret, 0); + + /* First socket's xattr should not be visible on second socket */ + ret = fgetxattr(sock2, TEST_XATTR_NAME, NULL, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); + + /* Second socket should independently accept xattrs */ + ret = fsetxattr(sock2, TEST_XATTR_NAME, + TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), 0); + ASSERT_EQ(ret, 0); + + /* Verify each socket has its own value */ + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE); + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(sock2, TEST_XATTR_NAME, buf, sizeof(buf)); + ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE2)); + ASSERT_STREQ(buf, TEST_XATTR_VALUE2); + + close(sock2); +} + +TEST_HARNESS_MAIN |
