diff options
Diffstat (limited to 'tools/testing/selftests/pidfd')
18 files changed, 2192 insertions, 175 deletions
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index bf92481f925c..4cd8ec7fd349 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -8,3 +8,8 @@ pidfd_getfd_test pidfd_setns_test pidfd_file_handle_test pidfd_bind_mount +pidfd_info_test +pidfd_exec_helper +pidfd_xattr_test +pidfd_setattr_test +pidfd_autoreap_test diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index 301343a11b62..4211f91e9af8 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,9 +1,12 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall +CFLAGS += -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \ - pidfd_file_handle_test pidfd_bind_mount + pidfd_file_handle_test pidfd_bind_mount pidfd_info_test \ + pidfd_xattr_test pidfd_setattr_test pidfd_autoreap_test + +TEST_GEN_PROGS_EXTENDED := pidfd_exec_helper include ../lib.mk diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config index 6133524710f7..cf7cc0ce0248 100644 --- a/tools/testing/selftests/pidfd/config +++ b/tools/testing/selftests/pidfd/config @@ -4,6 +4,5 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_TIME_NS=y -CONFIG_GENERIC_VDSO_TIME_NS=y CONFIG_CGROUPS=y CONFIG_CHECKPOINT_RESTORE=y diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 0b96ac4b8ce5..5a4e78c10f43 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -12,12 +12,26 @@ #include <stdlib.h> #include <string.h> #include <syscall.h> +#include <sys/ioctl.h> #include <sys/types.h> #include <sys/wait.h> -#include "../kselftest.h" +/* + * Remove the userspace definitions of the following preprocessor symbols + * to avoid duplicate-definition warnings from the subsequent in-kernel + * definitions. + */ +#undef SCHED_NORMAL +#undef SCHED_FLAG_KEEP_ALL +#undef SCHED_FLAG_UTIL_CLAMP + +#include "kselftest.h" #include "../clone3/clone3_selftests.h" +#ifndef FD_PIDFS_ROOT +#define FD_PIDFS_ROOT -10002 +#endif + #ifndef P_PIDFD #define P_PIDFD 3 #endif @@ -31,25 +45,164 @@ #endif #ifndef __NR_pidfd_open -#define __NR_pidfd_open -1 +#define __NR_pidfd_open 434 #endif #ifndef __NR_pidfd_send_signal -#define __NR_pidfd_send_signal -1 +#define __NR_pidfd_send_signal 424 #endif #ifndef __NR_clone3 -#define __NR_clone3 -1 +#define __NR_clone3 435 #endif #ifndef __NR_pidfd_getfd -#define __NR_pidfd_getfd -1 +#define __NR_pidfd_getfd 438 #endif #ifndef PIDFD_NONBLOCK #define PIDFD_NONBLOCK O_NONBLOCK #endif +#ifndef PIDFD_SELF_THREAD +#define PIDFD_SELF_THREAD -10000 /* Current thread. */ +#endif + +#ifndef PIDFD_SELF_THREAD_GROUP +#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ +#endif + +#ifndef PIDFD_SELF +#define PIDFD_SELF PIDFD_SELF_THREAD +#endif + +#ifndef PIDFD_SELF_PROCESS +#define PIDFD_SELF_PROCESS PIDFD_SELF_THREAD_GROUP +#endif + +#ifndef PIDFS_IOCTL_MAGIC +#define PIDFS_IOCTL_MAGIC 0xFF +#endif + +#ifndef PIDFD_GET_CGROUP_NAMESPACE +#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) +#endif + +#ifndef PIDFD_GET_IPC_NAMESPACE +#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) +#endif + +#ifndef PIDFD_GET_MNT_NAMESPACE +#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) +#endif + +#ifndef PIDFD_GET_NET_NAMESPACE +#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) +#endif + +#ifndef PIDFD_GET_PID_NAMESPACE +#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) +#endif + +#ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE +#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) +#endif + +#ifndef PIDFD_GET_TIME_NAMESPACE +#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) +#endif + +#ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE +#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) +#endif + +#ifndef PIDFD_GET_USER_NAMESPACE +#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) +#endif + +#ifndef PIDFD_GET_UTS_NAMESPACE +#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) +#endif + +#ifndef PIDFD_GET_INFO +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) +#endif + +#ifndef PIDFD_INFO_PID +#define PIDFD_INFO_PID (1UL << 0) /* Always returned, even if not requested */ +#endif + +#ifndef PIDFD_INFO_CREDS +#define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */ +#endif + +#ifndef PIDFD_INFO_CGROUPID +#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */ +#endif + +#ifndef PIDFD_INFO_EXIT +#define PIDFD_INFO_EXIT (1UL << 3) /* Always returned if available, even if not requested */ +#endif + +#ifndef PIDFD_INFO_COREDUMP +#define PIDFD_INFO_COREDUMP (1UL << 4) +#endif + +#ifndef PIDFD_INFO_SUPPORTED_MASK +#define PIDFD_INFO_SUPPORTED_MASK (1UL << 5) +#endif + +#ifndef PIDFD_INFO_COREDUMP_SIGNAL +#define PIDFD_INFO_COREDUMP_SIGNAL (1UL << 6) +#endif + +#ifndef PIDFD_INFO_COREDUMP_CODE +#define PIDFD_INFO_COREDUMP_CODE (1UL << 7) +#endif + +#ifndef PIDFD_COREDUMPED +#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */ +#endif + +#ifndef PIDFD_COREDUMP_SKIP +#define PIDFD_COREDUMP_SKIP (1U << 1) /* coredumping generation was skipped. */ +#endif + +#ifndef PIDFD_COREDUMP_USER +#define PIDFD_COREDUMP_USER (1U << 2) /* coredump was done as the user. */ +#endif + +#ifndef PIDFD_COREDUMP_ROOT +#define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */ +#endif + +#ifndef PIDFD_THREAD +#define PIDFD_THREAD O_EXCL +#endif + +struct pidfd_info { + __u64 mask; + __u64 cgroupid; + __u32 pid; + __u32 tgid; + __u32 ppid; + __u32 ruid; + __u32 rgid; + __u32 euid; + __u32 egid; + __u32 suid; + __u32 sgid; + __u32 fsuid; + __u32 fsgid; + __s32 exit_code; + struct { + __u32 coredump_mask; + __u32 coredump_signal; + __u32 coredump_code; + }; + __u64 supported_mask; +}; + /* * The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c * That means, when it wraps around any pid < 300 will be skipped. @@ -152,4 +305,11 @@ static inline ssize_t write_nointr(int fd, const void *buf, size_t count) return ret; } +static inline int sys_execveat(int dirfd, const char *pathname, + char *const argv[], char *const envp[], + int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} + #endif /* __PIDFD_H */ diff --git a/tools/testing/selftests/pidfd/pidfd_autoreap_test.c b/tools/testing/selftests/pidfd/pidfd_autoreap_test.c new file mode 100644 index 000000000000..1c586482f25b --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_autoreap_test.c @@ -0,0 +1,900 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2026 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <errno.h> +#include <linux/types.h> +#include <poll.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/ioctl.h> +#include <sys/prctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "pidfd.h" +#include "kselftest_harness.h" + +#ifndef CLONE_AUTOREAP +#define CLONE_AUTOREAP (1ULL << 34) +#endif + +#ifndef CLONE_NNP +#define CLONE_NNP (1ULL << 35) +#endif + +#ifndef CLONE_PIDFD_AUTOKILL +#define CLONE_PIDFD_AUTOKILL (1ULL << 36) +#endif + +#ifndef _LINUX_CAPABILITY_VERSION_3 +#define _LINUX_CAPABILITY_VERSION_3 0x20080522 +#endif + +struct cap_header { + __u32 version; + int pid; +}; + +struct cap_data { + __u32 effective; + __u32 permitted; + __u32 inheritable; +}; + +static int drop_all_caps(void) +{ + struct cap_header hdr = { .version = _LINUX_CAPABILITY_VERSION_3 }; + struct cap_data data[2] = {}; + + return syscall(__NR_capset, &hdr, data); +} + +static pid_t create_autoreap_child(int *pidfd) +{ + struct __clone_args args = { + .flags = CLONE_PIDFD | CLONE_AUTOREAP, + .exit_signal = 0, + .pidfd = ptr_to_u64(pidfd), + }; + + return sys_clone3(&args, sizeof(args)); +} + +/* + * Test that CLONE_AUTOREAP works without CLONE_PIDFD (fire-and-forget). + */ +TEST(autoreap_without_pidfd) +{ + struct __clone_args args = { + .flags = CLONE_AUTOREAP, + .exit_signal = 0, + }; + pid_t pid; + int ret; + + pid = sys_clone3(&args, sizeof(args)); + if (pid < 0 && errno == EINVAL) + SKIP(return, "CLONE_AUTOREAP not supported"); + ASSERT_GE(pid, 0); + + if (pid == 0) + _exit(0); + + /* + * Give the child a moment to exit and be autoreaped. + * Then verify no zombie remains. + */ + usleep(200000); + ret = waitpid(pid, NULL, WNOHANG); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ECHILD); +} + +/* + * Test that CLONE_AUTOREAP with a non-zero exit_signal fails. + */ +TEST(autoreap_rejects_exit_signal) +{ + struct __clone_args args = { + .flags = CLONE_AUTOREAP, + .exit_signal = SIGCHLD, + }; + pid_t pid; + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_EQ(pid, -1); + ASSERT_EQ(errno, EINVAL); +} + +/* + * Test that CLONE_AUTOREAP with CLONE_PARENT fails. + */ +TEST(autoreap_rejects_parent) +{ + struct __clone_args args = { + .flags = CLONE_AUTOREAP | CLONE_PARENT, + .exit_signal = 0, + }; + pid_t pid; + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_EQ(pid, -1); + ASSERT_EQ(errno, EINVAL); +} + +/* + * Test that CLONE_AUTOREAP with CLONE_THREAD fails. + */ +TEST(autoreap_rejects_thread) +{ + struct __clone_args args = { + .flags = CLONE_AUTOREAP | CLONE_THREAD | + CLONE_SIGHAND | CLONE_VM, + .exit_signal = 0, + }; + pid_t pid; + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_EQ(pid, -1); + ASSERT_EQ(errno, EINVAL); +} + +/* + * Basic test: create an autoreap child, let it exit, verify: + * - pidfd becomes readable (poll returns POLLIN) + * - PIDFD_GET_INFO returns the correct exit code + * - waitpid() returns -1/ECHILD (no zombie) + */ +TEST(autoreap_basic) +{ + struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; + int pidfd = -1, ret; + struct pollfd pfd; + pid_t pid; + + pid = create_autoreap_child(&pidfd); + if (pid < 0 && errno == EINVAL) + SKIP(return, "CLONE_AUTOREAP not supported"); + ASSERT_GE(pid, 0); + + if (pid == 0) + _exit(42); + + ASSERT_GE(pidfd, 0); + + /* Wait for the child to exit via pidfd poll. */ + pfd.fd = pidfd; + pfd.events = POLLIN; + ret = poll(&pfd, 1, 5000); + ASSERT_EQ(ret, 1); + ASSERT_TRUE(pfd.revents & POLLIN); + + /* Verify exit info via PIDFD_GET_INFO. */ + ret = ioctl(pidfd, PIDFD_GET_INFO, &info); + ASSERT_EQ(ret, 0); + ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); + /* + * exit_code is in waitpid format: for _exit(42), + * WIFEXITED is true and WEXITSTATUS is 42. + */ + ASSERT_TRUE(WIFEXITED(info.exit_code)); + ASSERT_EQ(WEXITSTATUS(info.exit_code), 42); + + /* Verify no zombie: waitpid should fail with ECHILD. */ + ret = waitpid(pid, NULL, WNOHANG); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ECHILD); + + close(pidfd); +} + +/* + * Test that an autoreap child killed by a signal reports + * the correct exit info. + */ +TEST(autoreap_signaled) +{ + struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; + int pidfd = -1, ret; + struct pollfd pfd; + pid_t pid; + + pid = create_autoreap_child(&pidfd); + if (pid < 0 && errno == EINVAL) + SKIP(return, "CLONE_AUTOREAP not supported"); + ASSERT_GE(pid, 0); + + if (pid == 0) { + pause(); + _exit(1); + } + + ASSERT_GE(pidfd, 0); + + /* Kill the child. */ + ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0); + ASSERT_EQ(ret, 0); + + /* Wait for exit via pidfd. */ + pfd.fd = pidfd; + pfd.events = POLLIN; + ret = poll(&pfd, 1, 5000); + ASSERT_EQ(ret, 1); + ASSERT_TRUE(pfd.revents & POLLIN); + + /* Verify signal info. */ + ret = ioctl(pidfd, PIDFD_GET_INFO, &info); + ASSERT_EQ(ret, 0); + ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); + ASSERT_TRUE(WIFSIGNALED(info.exit_code)); + ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL); + + /* No zombie. */ + ret = waitpid(pid, NULL, WNOHANG); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ECHILD); + + close(pidfd); +} + +/* + * Test autoreap survives reparenting: middle process creates an + * autoreap grandchild, then exits. The grandchild gets reparented + * to us (the grandparent, which is a subreaper). When the grandchild + * exits, it should still be autoreaped - no zombie under us. + */ +TEST(autoreap_reparent) +{ + int ipc_sockets[2], ret; + int pidfd = -1; + struct pollfd pfd; + pid_t mid_pid, grandchild_pid; + char buf[32] = {}; + + /* Make ourselves a subreaper so reparented children come to us. */ + ret = prctl(PR_SET_CHILD_SUBREAPER, 1); + ASSERT_EQ(ret, 0); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + mid_pid = fork(); + ASSERT_GE(mid_pid, 0); + + if (mid_pid == 0) { + /* Middle child: create an autoreap grandchild. */ + int gc_pidfd = -1; + + close(ipc_sockets[0]); + + grandchild_pid = create_autoreap_child(&gc_pidfd); + if (grandchild_pid < 0) { + write_nointr(ipc_sockets[1], "E", 1); + close(ipc_sockets[1]); + _exit(1); + } + + if (grandchild_pid == 0) { + /* Grandchild: wait for signal to exit. */ + close(ipc_sockets[1]); + if (gc_pidfd >= 0) + close(gc_pidfd); + pause(); + _exit(0); + } + + /* Send grandchild PID to grandparent. */ + snprintf(buf, sizeof(buf), "%d", grandchild_pid); + write_nointr(ipc_sockets[1], buf, strlen(buf)); + close(ipc_sockets[1]); + if (gc_pidfd >= 0) + close(gc_pidfd); + + /* Middle child exits, grandchild gets reparented. */ + _exit(0); + } + + close(ipc_sockets[1]); + + /* Read grandchild's PID. */ + ret = read_nointr(ipc_sockets[0], buf, sizeof(buf) - 1); + close(ipc_sockets[0]); + ASSERT_GT(ret, 0); + + if (buf[0] == 'E') { + waitpid(mid_pid, NULL, 0); + prctl(PR_SET_CHILD_SUBREAPER, 0); + SKIP(return, "CLONE_AUTOREAP not supported"); + } + + grandchild_pid = atoi(buf); + ASSERT_GT(grandchild_pid, 0); + + /* Wait for the middle child to exit. */ + ret = waitpid(mid_pid, NULL, 0); + ASSERT_EQ(ret, mid_pid); + + /* + * Now the grandchild is reparented to us (subreaper). + * Open a pidfd for the grandchild and kill it. + */ + pidfd = sys_pidfd_open(grandchild_pid, 0); + ASSERT_GE(pidfd, 0); + + ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0); + ASSERT_EQ(ret, 0); + + /* Wait for it to exit via pidfd poll. */ + pfd.fd = pidfd; + pfd.events = POLLIN; + ret = poll(&pfd, 1, 5000); + ASSERT_EQ(ret, 1); + ASSERT_TRUE(pfd.revents & POLLIN); + + /* + * The grandchild should have been autoreaped even though + * we (the new parent) haven't set SA_NOCLDWAIT. + * waitpid should return -1/ECHILD. + */ + ret = waitpid(grandchild_pid, NULL, WNOHANG); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, ECHILD); + + close(pidfd); + + /* Clean up subreaper status. */ + prctl(PR_SET_CHILD_SUBREAPER, 0); +} + +static int thread_sock_fd; + +static void *thread_func(void *arg) +{ + /* Signal parent we're running. */ + write_nointr(thread_sock_fd, "1", 1); + + /* Give main thread time to call _exit() first. */ + usleep(200000); + + return NULL; +} + +/* + * Test that an autoreap child with multiple threads is properly + * autoreaped only after all threads have exited. + */ +TEST(autoreap_multithreaded) +{ + struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; + int ipc_sockets[2], ret; + int pidfd = -1; + struct pollfd pfd; + pid_t pid; + char c; + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid = create_autoreap_child(&pidfd); + if (pid < 0 && errno == EINVAL) { + close(ipc_sockets[0]); + close(ipc_sockets[1]); + SKIP(return, "CLONE_AUTOREAP not supported"); + } + ASSERT_GE(pid, 0); + + if (pid == 0) { + pthread_t thread; + + close(ipc_sockets[0]); + + /* + * Create a sub-thread that outlives the main thread. + * The thread signals readiness, then sleeps. + * The main thread waits briefly, then calls _exit(). + */ + thread_sock_fd = ipc_sockets[1]; + pthread_create(&thread, NULL, thread_func, NULL); + pthread_detach(thread); + + /* Wait for thread to be running. */ + usleep(100000); + + /* Main thread exits; sub-thread is still alive. */ + _exit(99); + } + + close(ipc_sockets[1]); + + /* Wait for the sub-thread to signal readiness. */ + ret = read_nointr(ipc_sockets[0], &c, 1); + close(ipc_sockets[0]); + ASSERT_EQ(ret, 1); + + /* Wait for the process to fully exit via pidfd poll. */ + pfd.fd = pidfd; + pfd.events = POLLIN; + ret = poll(&pfd, 1, 5000); + ASSERT_EQ(ret, 1); + ASSERT_TRUE(pfd.revents & POLLIN); + + /* Verify exit info. */ + ret = ioctl(pidfd, PIDFD_GET_INFO, &info); + ASSERT_EQ(ret, 0); + ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); + ASSERT_TRUE(WIFEXITED(info.exit_code)); + ASSERT_EQ(WEXITSTATUS(info.exit_code), 99); + + /* No zombie. */ + ret = waitpid(pid, NULL, WNOHANG); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ECHILD); + + close(pidfd); +} + +/* + * Test that autoreap is NOT inherited by grandchildren. + */ +TEST(autoreap_no_inherit) +{ + int ipc_sockets[2], ret; + int pidfd = -1; + pid_t pid; + char buf[2] = {}; + struct pollfd pfd; + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid = create_autoreap_child(&pidfd); + if (pid < 0 && errno == EINVAL) { + close(ipc_sockets[0]); + close(ipc_sockets[1]); + SKIP(return, "CLONE_AUTOREAP not supported"); + } + ASSERT_GE(pid, 0); + + if (pid == 0) { + pid_t gc; + int status; + + close(ipc_sockets[0]); + + /* Autoreap child forks a grandchild (without autoreap). */ + gc = fork(); + if (gc < 0) { + write_nointr(ipc_sockets[1], "E", 1); + _exit(1); + } + if (gc == 0) { + /* Grandchild: exit immediately. */ + close(ipc_sockets[1]); + _exit(77); + } + + /* + * The grandchild should become a regular zombie + * since it was NOT created with CLONE_AUTOREAP. + * Wait for it to verify. + */ + ret = waitpid(gc, &status, 0); + if (ret == gc && WIFEXITED(status) && + WEXITSTATUS(status) == 77) { + write_nointr(ipc_sockets[1], "P", 1); + } else { + write_nointr(ipc_sockets[1], "F", 1); + } + close(ipc_sockets[1]); + _exit(0); + } + + close(ipc_sockets[1]); + + ret = read_nointr(ipc_sockets[0], buf, 1); + close(ipc_sockets[0]); + ASSERT_EQ(ret, 1); + + /* + * 'P' means the autoreap child was able to waitpid() its + * grandchild (correct - grandchild should be a normal zombie, + * not autoreaped). + */ + ASSERT_EQ(buf[0], 'P'); + + /* Wait for the autoreap child to exit. */ + pfd.fd = pidfd; + pfd.events = POLLIN; + ret = poll(&pfd, 1, 5000); + ASSERT_EQ(ret, 1); + + /* Autoreap child itself should be autoreaped. */ + ret = waitpid(pid, NULL, WNOHANG); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ECHILD); + + close(pidfd); +} + +/* + * Test that CLONE_NNP sets no_new_privs on the child. + * The child checks via prctl(PR_GET_NO_NEW_PRIVS) and reports back. + * The parent must NOT have no_new_privs set afterwards. + */ +TEST(nnp_sets_no_new_privs) +{ + struct __clone_args args = { + .flags = CLONE_PIDFD | CLONE_AUTOREAP | CLONE_NNP, + .exit_signal = 0, + }; + struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; + int pidfd = -1, ret; + struct pollfd pfd; + pid_t pid; + + /* Ensure parent does not already have no_new_privs. */ + ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); + ASSERT_EQ(ret, 0) { + TH_LOG("Parent already has no_new_privs set, cannot run test"); + } + + args.pidfd = ptr_to_u64(&pidfd); + + pid = sys_clone3(&args, sizeof(args)); + if (pid < 0 && errno == EINVAL) + SKIP(return, "CLONE_NNP not supported"); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* + * Child: check no_new_privs. Exit 0 if set, 1 if not. + */ + ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); + _exit(ret == 1 ? 0 : 1); + } + + ASSERT_GE(pidfd, 0); + + /* Parent must still NOT have no_new_privs. */ + ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); + ASSERT_EQ(ret, 0) { + TH_LOG("Parent got no_new_privs after creating CLONE_NNP child"); + } + + /* Wait for child to exit. */ + pfd.fd = pidfd; + pfd.events = POLLIN; + ret = poll(&pfd, 1, 5000); + ASSERT_EQ(ret, 1); + + /* Verify child exited with 0 (no_new_privs was set). */ + ret = ioctl(pidfd, PIDFD_GET_INFO, &info); + ASSERT_EQ(ret, 0); + ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); + ASSERT_TRUE(WIFEXITED(info.exit_code)); + ASSERT_EQ(WEXITSTATUS(info.exit_code), 0) { + TH_LOG("Child did not have no_new_privs set"); + } + + close(pidfd); +} + +/* + * Test that CLONE_NNP with CLONE_THREAD fails with EINVAL. + */ +TEST(nnp_rejects_thread) +{ + struct __clone_args args = { + .flags = CLONE_NNP | CLONE_THREAD | + CLONE_SIGHAND | CLONE_VM, + .exit_signal = 0, + }; + pid_t pid; + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_EQ(pid, -1); + ASSERT_EQ(errno, EINVAL); +} + +/* + * Test that a plain CLONE_AUTOREAP child does NOT get no_new_privs. + * Only CLONE_NNP should set it. + */ +TEST(autoreap_no_new_privs_unset) +{ + struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; + int pidfd = -1, ret; + struct pollfd pfd; + pid_t pid; + + pid = create_autoreap_child(&pidfd); + if (pid < 0 && errno == EINVAL) + SKIP(return, "CLONE_AUTOREAP not supported"); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* + * Child: check no_new_privs. Exit 0 if NOT set, 1 if set. + */ + ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); + _exit(ret == 0 ? 0 : 1); + } + + ASSERT_GE(pidfd, 0); + + pfd.fd = pidfd; + pfd.events = POLLIN; + ret = poll(&pfd, 1, 5000); + ASSERT_EQ(ret, 1); + + ret = ioctl(pidfd, PIDFD_GET_INFO, &info); + ASSERT_EQ(ret, 0); + ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); + ASSERT_TRUE(WIFEXITED(info.exit_code)); + ASSERT_EQ(WEXITSTATUS(info.exit_code), 0) { + TH_LOG("Plain autoreap child unexpectedly has no_new_privs"); + } + + close(pidfd); +} + +/* + * Helper: create a child with CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP | CLONE_NNP. + */ +static pid_t create_autokill_child(int *pidfd) +{ + struct __clone_args args = { + .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | + CLONE_AUTOREAP | CLONE_NNP, + .exit_signal = 0, + .pidfd = ptr_to_u64(pidfd), + }; + + return sys_clone3(&args, sizeof(args)); +} + +/* + * Basic autokill test: child blocks in pause(), parent closes the + * clone3 pidfd, child should be killed and autoreaped. + */ +TEST(autokill_basic) +{ + int pidfd = -1, pollfd_fd = -1, ret; + struct pollfd pfd; + pid_t pid; + + pid = create_autokill_child(&pidfd); + if (pid < 0 && errno == EINVAL) + SKIP(return, "CLONE_PIDFD_AUTOKILL not supported"); + ASSERT_GE(pid, 0); + + if (pid == 0) { + pause(); + _exit(1); + } + + ASSERT_GE(pidfd, 0); + + /* + * Open a second pidfd via pidfd_open() so we can observe the + * child's death after closing the clone3 pidfd. + */ + pollfd_fd = sys_pidfd_open(pid, 0); + ASSERT_GE(pollfd_fd, 0); + + /* Close the clone3 pidfd — this should trigger autokill. */ + close(pidfd); + + /* Wait for the child to die via the pidfd_open'd fd. */ + pfd.fd = pollfd_fd; + pfd.events = POLLIN; + ret = poll(&pfd, 1, 5000); + ASSERT_EQ(ret, 1); + ASSERT_TRUE(pfd.revents & POLLIN); + + /* Child should be autoreaped — no zombie. */ + usleep(100000); + ret = waitpid(pid, NULL, WNOHANG); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ECHILD); + + close(pollfd_fd); +} + +/* + * CLONE_PIDFD_AUTOKILL without CLONE_PIDFD must fail with EINVAL. + */ +TEST(autokill_requires_pidfd) +{ + struct __clone_args args = { + .flags = CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP, + .exit_signal = 0, + }; + pid_t pid; + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_EQ(pid, -1); + ASSERT_EQ(errno, EINVAL); +} + +/* + * CLONE_PIDFD_AUTOKILL without CLONE_AUTOREAP must fail with EINVAL. + */ +TEST(autokill_requires_autoreap) +{ + int pidfd = -1; + struct __clone_args args = { + .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL, + .exit_signal = 0, + .pidfd = ptr_to_u64(&pidfd), + }; + pid_t pid; + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_EQ(pid, -1); + ASSERT_EQ(errno, EINVAL); +} + +/* + * CLONE_PIDFD_AUTOKILL with CLONE_THREAD must fail with EINVAL. + */ +TEST(autokill_rejects_thread) +{ + int pidfd = -1; + struct __clone_args args = { + .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | + CLONE_AUTOREAP | CLONE_THREAD | + CLONE_SIGHAND | CLONE_VM, + .exit_signal = 0, + .pidfd = ptr_to_u64(&pidfd), + }; + pid_t pid; + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_EQ(pid, -1); + ASSERT_EQ(errno, EINVAL); +} + +/* + * Test that only the clone3 pidfd triggers autokill, not pidfd_open(). + * Close the pidfd_open'd fd first — child should survive. + * Then close the clone3 pidfd — child should be killed and autoreaped. + */ +TEST(autokill_pidfd_open_no_effect) +{ + int pidfd = -1, open_fd = -1, ret; + struct pollfd pfd; + pid_t pid; + + pid = create_autokill_child(&pidfd); + if (pid < 0 && errno == EINVAL) + SKIP(return, "CLONE_PIDFD_AUTOKILL not supported"); + ASSERT_GE(pid, 0); + + if (pid == 0) { + pause(); + _exit(1); + } + + ASSERT_GE(pidfd, 0); + + /* Open a second pidfd via pidfd_open(). */ + open_fd = sys_pidfd_open(pid, 0); + ASSERT_GE(open_fd, 0); + + /* + * Close the pidfd_open'd fd — child should survive because + * only the clone3 pidfd has autokill. + */ + close(open_fd); + usleep(200000); + + /* Verify child is still alive by polling the clone3 pidfd. */ + pfd.fd = pidfd; + pfd.events = POLLIN; + ret = poll(&pfd, 1, 0); + ASSERT_EQ(ret, 0) { + TH_LOG("Child died after closing pidfd_open fd — should still be alive"); + } + + /* Open another observation fd before triggering autokill. */ + open_fd = sys_pidfd_open(pid, 0); + ASSERT_GE(open_fd, 0); + + /* Now close the clone3 pidfd — this triggers autokill. */ + close(pidfd); + + pfd.fd = open_fd; + pfd.events = POLLIN; + ret = poll(&pfd, 1, 5000); + ASSERT_EQ(ret, 1); + ASSERT_TRUE(pfd.revents & POLLIN); + + /* Child should be autoreaped — no zombie. */ + usleep(100000); + ret = waitpid(pid, NULL, WNOHANG); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ECHILD); + + close(open_fd); +} + +/* + * Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP fails with EPERM + * for an unprivileged caller. + */ +TEST(autokill_requires_cap_sys_admin) +{ + int pidfd = -1, ret; + struct __clone_args args = { + .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | + CLONE_AUTOREAP, + .exit_signal = 0, + .pidfd = ptr_to_u64(&pidfd), + }; + pid_t pid; + + /* Drop all capabilities so we lack CAP_SYS_ADMIN. */ + ret = drop_all_caps(); + ASSERT_EQ(ret, 0); + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_EQ(pid, -1); + ASSERT_EQ(errno, EPERM); +} + +/* + * Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP succeeds with + * CAP_SYS_ADMIN. + */ +TEST(autokill_without_nnp_with_cap) +{ + struct __clone_args args = { + .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | + CLONE_AUTOREAP, + .exit_signal = 0, + }; + struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; + int pidfd = -1, ret; + struct pollfd pfd; + pid_t pid; + + if (geteuid() != 0) + SKIP(return, "Need root/CAP_SYS_ADMIN"); + + args.pidfd = ptr_to_u64(&pidfd); + + pid = sys_clone3(&args, sizeof(args)); + if (pid < 0 && errno == EINVAL) + SKIP(return, "CLONE_PIDFD_AUTOKILL not supported"); + ASSERT_GE(pid, 0); + + if (pid == 0) + _exit(0); + + ASSERT_GE(pidfd, 0); + + /* Wait for child to exit. */ + pfd.fd = pidfd; + pfd.events = POLLIN; + ret = poll(&pfd, 1, 5000); + ASSERT_EQ(ret, 1); + + ret = ioctl(pidfd, PIDFD_GET_INFO, &info); + ASSERT_EQ(ret, 0); + ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); + ASSERT_TRUE(WIFEXITED(info.exit_code)); + ASSERT_EQ(WEXITSTATUS(info.exit_code), 0); + + close(pidfd); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_bind_mount.c b/tools/testing/selftests/pidfd/pidfd_bind_mount.c index 7822dd080258..1fdf49939524 100644 --- a/tools/testing/selftests/pidfd/pidfd_bind_mount.c +++ b/tools/testing/selftests/pidfd/pidfd_bind_mount.c @@ -14,80 +14,8 @@ #include <unistd.h> #include "pidfd.h" -#include "../kselftest_harness.h" - -#ifndef __NR_open_tree - #if defined __alpha__ - #define __NR_open_tree 538 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_open_tree 4428 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_open_tree 6428 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_open_tree 5428 - #endif - #elif defined __ia64__ - #define __NR_open_tree (428 + 1024) - #else - #define __NR_open_tree 428 - #endif -#endif - -#ifndef __NR_move_mount - #if defined __alpha__ - #define __NR_move_mount 539 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_move_mount 4429 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_move_mount 6429 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_move_mount 5429 - #endif - #elif defined __ia64__ - #define __NR_move_mount (428 + 1024) - #else - #define __NR_move_mount 429 - #endif -#endif - -#ifndef MOVE_MOUNT_F_EMPTY_PATH -#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ -#endif - -#ifndef MOVE_MOUNT_F_EMPTY_PATH -#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ -#endif - -static inline int sys_move_mount(int from_dfd, const char *from_pathname, - int to_dfd, const char *to_pathname, - unsigned int flags) -{ - return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, - to_pathname, flags); -} - -#ifndef OPEN_TREE_CLONE -#define OPEN_TREE_CLONE 1 -#endif - -#ifndef OPEN_TREE_CLOEXEC -#define OPEN_TREE_CLOEXEC O_CLOEXEC -#endif - -#ifndef AT_RECURSIVE -#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ -#endif - -static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) -{ - return syscall(__NR_open_tree, dfd, filename, flags); -} +#include "kselftest_harness.h" +#include "../filesystems/wrappers.h" FIXTURE(pidfd_bind_mount) { char template[PATH_MAX]; diff --git a/tools/testing/selftests/pidfd/pidfd_exec_helper.c b/tools/testing/selftests/pidfd/pidfd_exec_helper.c new file mode 100644 index 000000000000..5516808c95f2 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_exec_helper.c @@ -0,0 +1,12 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +int main(int argc, char *argv[]) +{ + if (pause()) + _exit(EXIT_FAILURE); + + _exit(EXIT_SUCCESS); +} diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c index f062a986e382..9935e9471c77 100644 --- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c +++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c @@ -13,9 +13,10 @@ #include <syscall.h> #include <sys/wait.h> #include <sys/mman.h> +#include <sys/mount.h> #include "pidfd.h" -#include "../kselftest.h" +#include "kselftest.h" struct error { int code; diff --git a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c index 439b9c6c0457..68918734dcf3 100644 --- a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c +++ b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c @@ -20,7 +20,7 @@ #include <sys/stat.h> #include "pidfd.h" -#include "../kselftest_harness.h" +#include "kselftest_harness.h" FIXTURE(file_handle) { @@ -500,4 +500,64 @@ TEST_F(file_handle, valid_name_to_handle_at_flags) ASSERT_EQ(close(pidfd), 0); } +/* + * That we decode a file handle without having to pass a pidfd. + */ +TEST_F(file_handle, decode_purely_based_on_file_handle) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd1, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd1, &st1), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_NONBLOCK); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(-EBADF, fh, 0); + ASSERT_LT(pidfd, 0); + + pidfd = open_by_handle_at(AT_FDCWD, fh, 0); + ASSERT_LT(pidfd, 0); + + free(fh); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c index cd51d547b751..ea45b37001b0 100644 --- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c @@ -19,7 +19,7 @@ #include <linux/kcmp.h> #include "pidfd.h" -#include "../kselftest_harness.h" +#include "kselftest_harness.h" /* * UNKNOWN_FD is an fd number that should never exist in the child, as it is diff --git a/tools/testing/selftests/pidfd/pidfd_info_test.c b/tools/testing/selftests/pidfd/pidfd_info_test.c new file mode 100644 index 000000000000..597012ed195f --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_info_test.c @@ -0,0 +1,767 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/types.h> +#include <poll.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sys/socket.h> +#include <linux/kcmp.h> +#include <sys/stat.h> + +#include "pidfd.h" +#include "kselftest_harness.h" + +FIXTURE(pidfd_info) +{ + pid_t child_pid1; + int child_pidfd1; + + pid_t child_pid2; + int child_pidfd2; + + pid_t child_pid3; + int child_pidfd3; + + pid_t child_pid4; + int child_pidfd4; +}; + +FIXTURE_SETUP(pidfd_info) +{ + int ret; + int ipc_sockets[2]; + char c; + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid1 = create_child(&self->child_pidfd1, 0); + EXPECT_GE(self->child_pid1, 0); + + if (self->child_pid1 == 0) { + close(ipc_sockets[0]); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + /* SIGKILL but don't reap. */ + EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0), 0); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid2 = create_child(&self->child_pidfd2, 0); + EXPECT_GE(self->child_pid2, 0); + + if (self->child_pid2 == 0) { + close(ipc_sockets[0]); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + /* SIGKILL and reap. */ + EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0), 0); + EXPECT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0); + + self->child_pid3 = create_child(&self->child_pidfd3, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid3, 0); + + if (self->child_pid3 == 0) + _exit(EXIT_SUCCESS); + + self->child_pid4 = create_child(&self->child_pidfd4, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid4, 0); + + if (self->child_pid4 == 0) + _exit(EXIT_SUCCESS); + + EXPECT_EQ(sys_waitid(P_PID, self->child_pid4, NULL, WEXITED), 0); +} + +FIXTURE_TEARDOWN(pidfd_info) +{ + sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0); + if (self->child_pidfd1 >= 0) + EXPECT_EQ(0, close(self->child_pidfd1)); + + sys_waitid(P_PID, self->child_pid1, NULL, WEXITED); + + sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0); + if (self->child_pidfd2 >= 0) + EXPECT_EQ(0, close(self->child_pidfd2)); + + sys_waitid(P_PID, self->child_pid2, NULL, WEXITED); + sys_waitid(P_PID, self->child_pid3, NULL, WEXITED); + sys_waitid(P_PID, self->child_pid4, NULL, WEXITED); +} + +TEST_F(pidfd_info, sigkill_exit) +{ + struct pidfd_info info = { + .mask = PIDFD_INFO_CGROUPID, + }; + + /* Process has exited but not been reaped so this must work. */ + ASSERT_EQ(ioctl(self->child_pidfd1, PIDFD_GET_INFO, &info), 0); + + info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; + ASSERT_EQ(ioctl(self->child_pidfd1, PIDFD_GET_INFO, &info), 0); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS)); + /* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */ + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT)); +} + +TEST_F(pidfd_info, sigkill_reaped) +{ + struct pidfd_info info = { + .mask = PIDFD_INFO_CGROUPID, + }; + + /* Process has already been reaped and PIDFD_INFO_EXIT hasn't been set. */ + ASSERT_NE(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0); + ASSERT_EQ(errno, ESRCH); + + info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; + ASSERT_EQ(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0); + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS)); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT)); + ASSERT_TRUE(WIFSIGNALED(info.exit_code)); + ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL); +} + +TEST_F(pidfd_info, success_exit) +{ + struct pidfd_info info = { + .mask = PIDFD_INFO_CGROUPID, + }; + + /* Process has exited but not been reaped so this must work. */ + ASSERT_EQ(ioctl(self->child_pidfd3, PIDFD_GET_INFO, &info), 0); + + info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; + ASSERT_EQ(ioctl(self->child_pidfd3, PIDFD_GET_INFO, &info), 0); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS)); + /* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */ + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT)); +} + +TEST_F(pidfd_info, success_reaped) +{ + struct pidfd_info info = { + .mask = PIDFD_INFO_CGROUPID, + }; + + /* Process has already been reaped and PIDFD_INFO_EXIT hasn't been set. */ + ASSERT_NE(ioctl(self->child_pidfd4, PIDFD_GET_INFO, &info), 0); + ASSERT_EQ(errno, ESRCH); + + info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; + ASSERT_EQ(ioctl(self->child_pidfd4, PIDFD_GET_INFO, &info), 0); + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS)); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT)); + ASSERT_TRUE(WIFEXITED(info.exit_code)); + ASSERT_EQ(WEXITSTATUS(info.exit_code), 0); +} + +TEST_F(pidfd_info, success_reaped_poll) +{ + struct pidfd_info info = { + .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT, + }; + struct pollfd fds = {}; + int nevents; + + fds.events = POLLIN; + fds.fd = self->child_pidfd2; + + nevents = poll(&fds, 1, -1); + ASSERT_EQ(nevents, 1); + ASSERT_TRUE(!!(fds.revents & POLLIN)); + ASSERT_TRUE(!!(fds.revents & POLLHUP)); + + ASSERT_EQ(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0); + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS)); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT)); + ASSERT_TRUE(WIFSIGNALED(info.exit_code)); + ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL); +} + +static void *pidfd_info_pause_thread(void *arg) +{ + pid_t pid_thread = gettid(); + int ipc_socket = *(int *)arg; + + /* Inform the grand-parent what the tid of this thread is. */ + if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread)) + return NULL; + + close(ipc_socket); + + /* Sleep until we're killed. */ + pause(); + return NULL; +} + +TEST_F(pidfd_info, thread_group) +{ + pid_t pid_leader, pid_poller, pid_thread; + pthread_t thread; + int nevents, pidfd_leader, pidfd_thread, pidfd_leader_thread, ret; + int ipc_sockets[2]; + struct pollfd fds = {}; + struct pidfd_info info = { + .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT, + }, info2; + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + pid_leader = create_child(&pidfd_leader, 0); + EXPECT_GE(pid_leader, 0); + + if (pid_leader == 0) { + close(ipc_sockets[0]); + + /* The thread will outlive the thread-group leader. */ + if (pthread_create(&thread, NULL, pidfd_info_pause_thread, &ipc_sockets[1])) + syscall(__NR_exit, EXIT_FAILURE); + + /* Make the thread-group leader exit prematurely. */ + syscall(__NR_exit, EXIT_SUCCESS); + } + + /* + * Opening a PIDFD_THREAD aka thread-specific pidfd based on a + * thread-group leader must succeed. + */ + pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD); + ASSERT_GE(pidfd_leader_thread, 0); + + pid_poller = fork(); + ASSERT_GE(pid_poller, 0); + if (pid_poller == 0) { + /* + * We can't poll and wait for the old thread-group + * leader to exit using a thread-specific pidfd. The + * thread-group leader exited prematurely and + * notification is delayed until all subthreads have + * exited. + */ + fds.events = POLLIN; + fds.fd = pidfd_leader_thread; + nevents = poll(&fds, 1, 10000 /* wait 5 seconds */); + if (nevents != 0) + _exit(EXIT_FAILURE); + if (fds.revents & POLLIN) + _exit(EXIT_FAILURE); + if (fds.revents & POLLHUP) + _exit(EXIT_FAILURE); + _exit(EXIT_SUCCESS); + } + + /* Retrieve the tid of the thread. */ + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread)); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + /* Opening a thread as a thread-group leader must fail. */ + pidfd_thread = sys_pidfd_open(pid_thread, 0); + ASSERT_LT(pidfd_thread, 0); + ASSERT_EQ(errno, ENOENT); + + /* Opening a thread as a PIDFD_THREAD must succeed. */ + pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD); + ASSERT_GE(pidfd_thread, 0); + + ASSERT_EQ(wait_for_pid(pid_poller), 0); + + /* + * Note that pidfd_leader is a thread-group pidfd, so polling on it + * would only notify us once all thread in the thread-group have + * exited. So we can't poll before we have taken down the whole + * thread-group. + */ + + /* Get PIDFD_GET_INFO using the thread-group leader pidfd. */ + ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS)); + /* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */ + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT)); + ASSERT_EQ(info.pid, pid_leader); + + /* + * Now retrieve the same info using the thread specific pidfd + * for the thread-group leader. + */ + info2.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; + ASSERT_EQ(ioctl(pidfd_leader_thread, PIDFD_GET_INFO, &info2), 0); + ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_CREDS)); + /* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */ + ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_EXIT)); + ASSERT_EQ(info2.pid, pid_leader); + + /* Now try the thread-specific pidfd. */ + ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS)); + /* The thread hasn't exited, so no PIDFD_INFO_EXIT information yet. */ + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT)); + ASSERT_EQ(info.pid, pid_thread); + + /* + * Take down the whole thread-group. The thread-group leader + * exited successfully but the thread will now be SIGKILLed. + * This must be reflected in the recorded exit information. + */ + EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0); + EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0); + + fds.events = POLLIN; + fds.fd = pidfd_leader; + nevents = poll(&fds, 1, -1); + ASSERT_EQ(nevents, 1); + ASSERT_TRUE(!!(fds.revents & POLLIN)); + /* The thread-group leader has been reaped. */ + ASSERT_TRUE(!!(fds.revents & POLLHUP)); + + /* + * Retrieve exit information for the thread-group leader via the + * thread-group leader pidfd. + */ + info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; + ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0); + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS)); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT)); + /* Even though the thread-group exited successfully it will still report the group exit code. */ + ASSERT_TRUE(WIFSIGNALED(info.exit_code)); + ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL); + + /* + * Retrieve exit information for the thread-group leader via the + * thread-specific pidfd. + */ + info2.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; + ASSERT_EQ(ioctl(pidfd_leader_thread, PIDFD_GET_INFO, &info2), 0); + ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_CREDS)); + ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_EXIT)); + + /* Even though the thread-group exited successfully it will still report the group exit code. */ + ASSERT_TRUE(WIFSIGNALED(info2.exit_code)); + ASSERT_EQ(WTERMSIG(info2.exit_code), SIGKILL); + + /* Retrieve exit information for the thread. */ + info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; + ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0); + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS)); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT)); + + /* The thread got SIGKILLed. */ + ASSERT_TRUE(WIFSIGNALED(info.exit_code)); + ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL); + + EXPECT_EQ(close(pidfd_leader), 0); + EXPECT_EQ(close(pidfd_thread), 0); +} + +static void *pidfd_info_thread_exec(void *arg) +{ + pid_t pid_thread = gettid(); + int ipc_socket = *(int *)arg; + + /* Inform the grand-parent what the tid of this thread is. */ + if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread)) + return NULL; + + if (read_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread)) + return NULL; + + close(ipc_socket); + + sys_execveat(AT_FDCWD, "pidfd_exec_helper", NULL, NULL, 0); + return NULL; +} + +TEST_F(pidfd_info, thread_group_exec) +{ + pid_t pid_leader, pid_poller, pid_thread; + pthread_t thread; + int nevents, pidfd_leader, pidfd_leader_thread, pidfd_thread, ret; + int ipc_sockets[2]; + struct pollfd fds = {}; + struct pidfd_info info = { + .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT, + }; + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + pid_leader = create_child(&pidfd_leader, 0); + EXPECT_GE(pid_leader, 0); + + if (pid_leader == 0) { + close(ipc_sockets[0]); + + /* The thread will outlive the thread-group leader. */ + if (pthread_create(&thread, NULL, pidfd_info_thread_exec, &ipc_sockets[1])) + syscall(__NR_exit, EXIT_FAILURE); + + /* Make the thread-group leader exit prematurely. */ + syscall(__NR_exit, EXIT_SUCCESS); + } + + /* Open a thread-specific pidfd for the thread-group leader. */ + pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD); + ASSERT_GE(pidfd_leader_thread, 0); + + pid_poller = fork(); + ASSERT_GE(pid_poller, 0); + if (pid_poller == 0) { + /* + * We can't poll and wait for the old thread-group + * leader to exit using a thread-specific pidfd. The + * thread-group leader exited prematurely and + * notification is delayed until all subthreads have + * exited. + * + * When the thread has execed it will taken over the old + * thread-group leaders struct pid. Calling poll after + * the thread execed will thus block again because a new + * thread-group has started. + */ + fds.events = POLLIN; + fds.fd = pidfd_leader_thread; + nevents = poll(&fds, 1, 10000 /* wait 5 seconds */); + if (nevents != 0) + _exit(EXIT_FAILURE); + if (fds.revents & POLLIN) + _exit(EXIT_FAILURE); + if (fds.revents & POLLHUP) + _exit(EXIT_FAILURE); + _exit(EXIT_SUCCESS); + } + + /* Retrieve the tid of the thread. */ + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread)); + + /* Opening a thread as a PIDFD_THREAD must succeed. */ + pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD); + ASSERT_GE(pidfd_thread, 0); + + /* Now that we've opened a thread-specific pidfd the thread can exec. */ + ASSERT_EQ(write_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread)); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + ASSERT_EQ(wait_for_pid(pid_poller), 0); + + /* Wait until the kernel has SIGKILLed the thread. */ + fds.events = POLLHUP; + fds.fd = pidfd_thread; + nevents = poll(&fds, 1, -1); + ASSERT_EQ(nevents, 1); + /* The thread has been reaped. */ + ASSERT_TRUE(!!(fds.revents & POLLHUP)); + + /* Retrieve thread-specific exit info from pidfd. */ + ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0); + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS)); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT)); + /* + * While the kernel will have SIGKILLed the whole thread-group + * during exec it will cause the individual threads to exit + * cleanly. + */ + ASSERT_TRUE(WIFEXITED(info.exit_code)); + ASSERT_EQ(WEXITSTATUS(info.exit_code), 0); + + /* + * The thread-group leader is still alive, the thread has taken + * over its struct pid and thus its pid number. + */ + info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; + ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS)); + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT)); + ASSERT_EQ(info.pid, pid_leader); + + /* Take down the thread-group leader. */ + EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0); + + /* + * Afte the exec we're dealing with an empty thread-group so now + * we must see an exit notification on the thread-specific pidfd + * for the thread-group leader as there's no subthread that can + * revive the struct pid. + */ + fds.events = POLLIN; + fds.fd = pidfd_leader_thread; + nevents = poll(&fds, 1, -1); + ASSERT_EQ(nevents, 1); + ASSERT_TRUE(!!(fds.revents & POLLIN)); + ASSERT_FALSE(!!(fds.revents & POLLHUP)); + + EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0); + + /* Retrieve exit information for the thread-group leader. */ + info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; + ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0); + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS)); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT)); + + EXPECT_EQ(close(pidfd_leader), 0); + EXPECT_EQ(close(pidfd_thread), 0); +} + +static void *pidfd_info_thread_exec_sane(void *arg) +{ + pid_t pid_thread = gettid(); + int ipc_socket = *(int *)arg; + + /* Inform the grand-parent what the tid of this thread is. */ + if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread)) + return NULL; + + if (read_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread)) + return NULL; + + close(ipc_socket); + + sys_execveat(AT_FDCWD, "pidfd_exec_helper", NULL, NULL, 0); + return NULL; +} + +TEST_F(pidfd_info, thread_group_exec_thread) +{ + pid_t pid_leader, pid_poller, pid_thread; + pthread_t thread; + int nevents, pidfd_leader, pidfd_leader_thread, pidfd_thread, ret; + int ipc_sockets[2]; + struct pollfd fds = {}; + struct pidfd_info info = { + .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT, + }; + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + pid_leader = create_child(&pidfd_leader, 0); + EXPECT_GE(pid_leader, 0); + + if (pid_leader == 0) { + close(ipc_sockets[0]); + + /* The thread will outlive the thread-group leader. */ + if (pthread_create(&thread, NULL, pidfd_info_thread_exec_sane, &ipc_sockets[1])) + syscall(__NR_exit, EXIT_FAILURE); + + /* + * Pause the thread-group leader. It will be killed once + * the subthread execs. + */ + pause(); + syscall(__NR_exit, EXIT_SUCCESS); + } + + /* Retrieve the tid of the thread. */ + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread)); + + /* Opening a thread as a PIDFD_THREAD must succeed. */ + pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD); + ASSERT_GE(pidfd_thread, 0); + + /* Open a thread-specific pidfd for the thread-group leader. */ + pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD); + ASSERT_GE(pidfd_leader_thread, 0); + + pid_poller = fork(); + ASSERT_GE(pid_poller, 0); + if (pid_poller == 0) { + /* + * The subthread will now exec. The struct pid of the old + * thread-group leader will be assumed by the subthread which + * becomes the new thread-group leader. So no exit notification + * must be generated. Wait for 5 seconds and call it a success + * if no notification has been received. + */ + fds.events = POLLIN; + fds.fd = pidfd_leader_thread; + nevents = poll(&fds, 1, 10000 /* wait 5 seconds */); + if (nevents != 0) + _exit(EXIT_FAILURE); + if (fds.revents & POLLIN) + _exit(EXIT_FAILURE); + if (fds.revents & POLLHUP) + _exit(EXIT_FAILURE); + _exit(EXIT_SUCCESS); + } + + /* Now that we've opened a thread-specific pidfd the thread can exec. */ + ASSERT_EQ(write_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread)); + EXPECT_EQ(close(ipc_sockets[0]), 0); + ASSERT_EQ(wait_for_pid(pid_poller), 0); + + /* Wait until the kernel has SIGKILLed the thread. */ + fds.events = POLLHUP; + fds.fd = pidfd_thread; + nevents = poll(&fds, 1, -1); + ASSERT_EQ(nevents, 1); + /* The thread has been reaped. */ + ASSERT_TRUE(!!(fds.revents & POLLHUP)); + + /* Retrieve thread-specific exit info from pidfd. */ + ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0); + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS)); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT)); + /* + * While the kernel will have SIGKILLed the whole thread-group + * during exec it will cause the individual threads to exit + * cleanly. + */ + ASSERT_TRUE(WIFEXITED(info.exit_code)); + ASSERT_EQ(WEXITSTATUS(info.exit_code), 0); + + /* + * The thread-group leader is still alive, the thread has taken + * over its struct pid and thus its pid number. + */ + info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; + ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS)); + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT)); + ASSERT_EQ(info.pid, pid_leader); + + /* Take down the thread-group leader. */ + EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0); + + /* + * Afte the exec we're dealing with an empty thread-group so now + * we must see an exit notification on the thread-specific pidfd + * for the thread-group leader as there's no subthread that can + * revive the struct pid. + */ + fds.events = POLLIN; + fds.fd = pidfd_leader_thread; + nevents = poll(&fds, 1, -1); + ASSERT_EQ(nevents, 1); + ASSERT_TRUE(!!(fds.revents & POLLIN)); + ASSERT_FALSE(!!(fds.revents & POLLHUP)); + + EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0); + + /* Retrieve exit information for the thread-group leader. */ + info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; + ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0); + ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS)); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT)); + + EXPECT_EQ(close(pidfd_leader), 0); + EXPECT_EQ(close(pidfd_thread), 0); +} + +/* + * Test: PIDFD_INFO_SUPPORTED_MASK field + * + * Verify that when PIDFD_INFO_SUPPORTED_MASK is requested, the kernel + * returns the supported_mask field indicating which flags the kernel supports. + */ +TEST(supported_mask_field) +{ + struct pidfd_info info = { + .mask = PIDFD_INFO_SUPPORTED_MASK, + }; + int pidfd; + pid_t pid; + + pid = create_child(&pidfd, 0); + ASSERT_GE(pid, 0); + + if (pid == 0) + pause(); + + /* Request supported_mask field */ + ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + + /* Verify PIDFD_INFO_SUPPORTED_MASK is set in the reply */ + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_SUPPORTED_MASK)); + + /* Verify supported_mask contains expected flags */ + ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_PID)); + ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_CREDS)); + ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_CGROUPID)); + ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_EXIT)); + ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_COREDUMP)); + ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_SUPPORTED_MASK)); + ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_COREDUMP_SIGNAL)); + ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_COREDUMP_CODE)); + + /* Clean up */ + sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0); + sys_waitid(P_PIDFD, pidfd, NULL, WEXITED); + close(pidfd); +} + +/* + * Test: PIDFD_INFO_SUPPORTED_MASK always available + * + * Verify that supported_mask is returned even when other fields are requested. + */ +TEST(supported_mask_with_other_fields) +{ + struct pidfd_info info = { + .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_SUPPORTED_MASK, + }; + int pidfd; + pid_t pid; + + pid = create_child(&pidfd, 0); + ASSERT_GE(pid, 0); + + if (pid == 0) + pause(); + + ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + + /* Both fields should be present */ + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CGROUPID)); + ASSERT_TRUE(!!(info.mask & PIDFD_INFO_SUPPORTED_MASK)); + ASSERT_NE(info.supported_mask, 0); + + /* Clean up */ + sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0); + sys_waitid(P_PIDFD, pidfd, NULL, WEXITED); + close(pidfd); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index ce413a221bac..318e6f09c8e0 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -20,33 +20,7 @@ #include <unistd.h> #include "pidfd.h" -#include "../kselftest.h" - -#ifndef PIDFS_IOCTL_MAGIC -#define PIDFS_IOCTL_MAGIC 0xFF -#endif - -#ifndef PIDFD_GET_INFO -#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) -#define PIDFD_INFO_CGROUPID (1UL << 0) - -struct pidfd_info { - __u64 request_mask; - __u64 cgroupid; - __u32 pid; - __u32 tgid; - __u32 ppid; - __u32 ruid; - __u32 rgid; - __u32 euid; - __u32 egid; - __u32 suid; - __u32 sgid; - __u32 fsuid; - __u32 fsgid; - __u32 spare0[1]; -}; -#endif +#include "kselftest.h" static int safe_int(const char *numstr, int *converted) { @@ -148,7 +122,7 @@ out: int main(int argc, char **argv) { struct pidfd_info info = { - .request_mask = PIDFD_INFO_CGROUPID, + .mask = PIDFD_INFO_CGROUPID, }; int pidfd = -1, ret = 1; pid_t pid; @@ -227,7 +201,7 @@ int main(int argc, char **argv) getegid(), info.sgid); goto on_error; } - if ((info.request_mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) { + if ((info.mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) { ksft_print_msg("cgroupid should not be 0 when PIDFD_INFO_CGROUPID is set\n"); goto on_error; } diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c index 55d74a50358f..232304f818c7 100644 --- a/tools/testing/selftests/pidfd/pidfd_poll_test.c +++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c @@ -14,7 +14,7 @@ #include <unistd.h> #include "pidfd.h" -#include "../kselftest.h" +#include "kselftest.h" static bool timeout; diff --git a/tools/testing/selftests/pidfd/pidfd_setattr_test.c b/tools/testing/selftests/pidfd/pidfd_setattr_test.c new file mode 100644 index 000000000000..e8562a2992f3 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_setattr_test.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/types.h> +#include <poll.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sys/socket.h> +#include <linux/kcmp.h> +#include <sys/stat.h> +#include <sys/xattr.h> + +#include "pidfd.h" +#include "kselftest_harness.h" + +FIXTURE(pidfs_setattr) +{ + pid_t child_pid; + int child_pidfd; +}; + +FIXTURE_SETUP(pidfs_setattr) +{ + self->child_pid = create_child(&self->child_pidfd, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid, 0); + + if (self->child_pid == 0) + _exit(EXIT_SUCCESS); +} + +FIXTURE_TEARDOWN(pidfs_setattr) +{ + sys_waitid(P_PID, self->child_pid, NULL, WEXITED); + EXPECT_EQ(close(self->child_pidfd), 0); +} + +TEST_F(pidfs_setattr, no_chown) +{ + ASSERT_LT(fchown(self->child_pidfd, 1234, 5678), 0); + ASSERT_EQ(errno, EOPNOTSUPP); +} + +TEST_F(pidfs_setattr, no_chmod) +{ + ASSERT_LT(fchmod(self->child_pidfd, 0777), 0); + ASSERT_EQ(errno, EOPNOTSUPP); +} + +TEST_F(pidfs_setattr, no_exec) +{ + char *const argv[] = { NULL }; + char *const envp[] = { NULL }; + + ASSERT_LT(execveat(self->child_pidfd, "", argv, envp, AT_EMPTY_PATH), 0); + ASSERT_EQ(errno, EACCES); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 222f8131283b..107edecff224 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -16,54 +16,9 @@ #include <unistd.h> #include <sys/socket.h> #include <sys/stat.h> -#include <linux/ioctl.h> #include "pidfd.h" -#include "../kselftest_harness.h" - -#ifndef PIDFS_IOCTL_MAGIC -#define PIDFS_IOCTL_MAGIC 0xFF -#endif - -#ifndef PIDFD_GET_CGROUP_NAMESPACE -#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) -#endif - -#ifndef PIDFD_GET_IPC_NAMESPACE -#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) -#endif - -#ifndef PIDFD_GET_MNT_NAMESPACE -#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) -#endif - -#ifndef PIDFD_GET_NET_NAMESPACE -#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) -#endif - -#ifndef PIDFD_GET_PID_NAMESPACE -#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) -#endif - -#ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE -#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) -#endif - -#ifndef PIDFD_GET_TIME_NAMESPACE -#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) -#endif - -#ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE -#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) -#endif - -#ifndef PIDFD_GET_USER_NAMESPACE -#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) -#endif - -#ifndef PIDFD_GET_UTS_NAMESPACE -#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) -#endif +#include "kselftest_harness.h" enum { PIDFD_NS_USER, diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index e9728e86b4f2..932cbd8caa77 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -20,7 +20,7 @@ #include <unistd.h> #include "pidfd.h" -#include "../kselftest.h" +#include "kselftest.h" #define str(s) _str(s) #define _str(s) #s @@ -42,12 +42,41 @@ static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *)) #endif } -static int signal_received; +static pthread_t signal_received; static void set_signal_received_on_sigusr1(int sig) { if (sig == SIGUSR1) - signal_received = 1; + signal_received = pthread_self(); +} + +static int send_signal(int pidfd) +{ + int ret = 0; + + if (sys_pidfd_send_signal(pidfd, SIGUSR1, NULL, 0) < 0) { + ret = -EINVAL; + goto exit; + } + + if (signal_received != pthread_self()) { + ret = -EINVAL; + goto exit; + } + +exit: + signal_received = 0; + return ret; +} + +static void *send_signal_worker(void *arg) +{ + int pidfd = (int)(intptr_t)arg; + int ret; + + /* We forward any errors for the caller to handle. */ + ret = send_signal(pidfd); + return (void *)(intptr_t)ret; } /* @@ -56,8 +85,11 @@ static void set_signal_received_on_sigusr1(int sig) */ static int test_pidfd_send_signal_simple_success(void) { - int pidfd, ret; + int pidfd; const char *test_name = "pidfd_send_signal send SIGUSR1"; + pthread_t thread; + void *thread_res; + int err; if (!have_pidfd_send_signal) { ksft_test_result_skip( @@ -66,25 +98,45 @@ static int test_pidfd_send_signal_simple_success(void) return 0; } + signal(SIGUSR1, set_signal_received_on_sigusr1); + + /* Try sending a signal to ourselves via /proc/self. */ pidfd = open("/proc/self", O_DIRECTORY | O_CLOEXEC); if (pidfd < 0) ksft_exit_fail_msg( "%s test: Failed to open process file descriptor\n", test_name); + err = send_signal(pidfd); + if (err) + ksft_exit_fail_msg( + "%s test: Error %d on sending pidfd signal\n", + test_name, err); + close(pidfd); - signal(SIGUSR1, set_signal_received_on_sigusr1); + /* Now try the same thing only using PIDFD_SELF_THREAD_GROUP. */ + err = send_signal(PIDFD_SELF_THREAD_GROUP); + if (err) + ksft_exit_fail_msg( + "%s test: Error %d on PIDFD_SELF_THREAD_GROUP signal\n", + test_name, err); - ret = sys_pidfd_send_signal(pidfd, SIGUSR1, NULL, 0); - close(pidfd); - if (ret < 0) - ksft_exit_fail_msg("%s test: Failed to send signal\n", + /* + * Now try the same thing in a thread and assert thread ID is equal to + * worker thread ID. + */ + if (pthread_create(&thread, NULL, send_signal_worker, + (void *)(intptr_t)PIDFD_SELF_THREAD)) + ksft_exit_fail_msg("%s test: Failed to create thread\n", test_name); - - if (signal_received != 1) - ksft_exit_fail_msg("%s test: Failed to receive signal\n", + if (pthread_join(thread, &thread_res)) + ksft_exit_fail_msg("%s test: Failed to join thread\n", test_name); + err = (int)(intptr_t)thread_res; + if (err) + ksft_exit_fail_msg( + "%s test: Error %d on PIDFD_SELF_THREAD signal\n", + test_name, err); - signal_received = 0; ksft_test_result_pass("%s test: Sent signal\n", test_name); return 0; } diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 1e2d49751cde..4bf702d62c1c 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -17,7 +17,7 @@ #include <unistd.h> #include "pidfd.h" -#include "../kselftest_harness.h" +#include "kselftest_harness.h" #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) diff --git a/tools/testing/selftests/pidfd/pidfd_xattr_test.c b/tools/testing/selftests/pidfd/pidfd_xattr_test.c new file mode 100644 index 000000000000..fd57511af7e4 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_xattr_test.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/types.h> +#include <poll.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sys/socket.h> +#include <linux/kcmp.h> +#include <sys/stat.h> +#include <sys/xattr.h> + +#include "pidfd.h" +#include "kselftest_harness.h" + +FIXTURE(pidfs_xattr) +{ + pid_t child_pid; + int child_pidfd; +}; + +FIXTURE_SETUP(pidfs_xattr) +{ + self->child_pid = create_child(&self->child_pidfd, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid, 0); + + if (self->child_pid == 0) + _exit(EXIT_SUCCESS); +} + +FIXTURE_TEARDOWN(pidfs_xattr) +{ + sys_waitid(P_PID, self->child_pid, NULL, WEXITED); +} + +TEST_F(pidfs_xattr, set_get_list_xattr_multiple) +{ + int ret, i; + char xattr_name[32]; + char xattr_value[32]; + char buf[32]; + const int num_xattrs = 10; + char list[PATH_MAX] = {}; + + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + snprintf(xattr_value, sizeof(xattr_value), "testvalue%d", i); + ret = fsetxattr(self->child_pidfd, xattr_name, xattr_value, strlen(xattr_value), 0); + ASSERT_EQ(ret, 0); + } + + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + snprintf(xattr_value, sizeof(xattr_value), "testvalue%d", i); + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->child_pidfd, xattr_name, buf, sizeof(buf)); + ASSERT_EQ(ret, strlen(xattr_value)); + ASSERT_EQ(strcmp(buf, xattr_value), 0); + } + + ret = flistxattr(self->child_pidfd, list, sizeof(list)); + ASSERT_GT(ret, 0); + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + bool found = false; + for (char *it = list; it < list + ret; it += strlen(it) + 1) { + if (strcmp(it, xattr_name)) + continue; + found = true; + break; + } + ASSERT_TRUE(found); + } + + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + ret = fremovexattr(self->child_pidfd, xattr_name); + ASSERT_EQ(ret, 0); + + ret = fgetxattr(self->child_pidfd, xattr_name, buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); + } +} + +TEST_F(pidfs_xattr, set_get_list_xattr_persistent) +{ + int ret; + char buf[32]; + char list[PATH_MAX] = {}; + + ret = fsetxattr(self->child_pidfd, "trusted.persistent", "persistent value", strlen("persistent value"), 0); + ASSERT_EQ(ret, 0); + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->child_pidfd, "trusted.persistent", buf, sizeof(buf)); + ASSERT_EQ(ret, strlen("persistent value")); + ASSERT_EQ(strcmp(buf, "persistent value"), 0); + + ret = flistxattr(self->child_pidfd, list, sizeof(list)); + ASSERT_GT(ret, 0); + ASSERT_EQ(strcmp(list, "trusted.persistent"), 0) + + ASSERT_EQ(close(self->child_pidfd), 0); + self->child_pidfd = -EBADF; + sleep(2); + + self->child_pidfd = sys_pidfd_open(self->child_pid, 0); + ASSERT_GE(self->child_pidfd, 0); + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->child_pidfd, "trusted.persistent", buf, sizeof(buf)); + ASSERT_EQ(ret, strlen("persistent value")); + ASSERT_EQ(strcmp(buf, "persistent value"), 0); + + ret = flistxattr(self->child_pidfd, list, sizeof(list)); + ASSERT_GT(ret, 0); + ASSERT_EQ(strcmp(list, "trusted.persistent"), 0); +} + +TEST_HARNESS_MAIN |
