diff options
author | Christian Brauner <christian@brauner.io> | 2019-04-07 21:18:11 +0200 |
---|---|---|
committer | Christian Brauner <christian@brauner.io> | 2019-05-07 14:31:04 +0200 |
commit | 43c6afee48d4d866d5eb984d3a5dbbc7d9b4e7bf (patch) | |
tree | 19fa00ede54949757cea719765db5ec86fb89f2a | |
parent | 2151ad1b067275730de1b38c7257478cae47d29e (diff) | |
download | lwn-43c6afee48d4d866d5eb984d3a5dbbc7d9b4e7bf.tar.gz lwn-43c6afee48d4d866d5eb984d3a5dbbc7d9b4e7bf.zip |
samples: show race-free pidfd metadata access
This is a sample program showing userspace how to get race-free access
to process metadata from a pidfd. It is rather easy to do and userspace
can actually simply reuse code that currently parses a process's status
file in procfs.
The program can easily be extended into a generic helper suitable for
inclusion in a libc to make it even easier for userspace to gain metadata
access.
Since this came up in a discussion because this API is going to be used
in various service managers: A lot of programs will have a whitelist
seccomp filter that returns <some-errno> for all new syscalls. This
means that programs might get confused if CLONE_PIDFD works but the
later pidfd_send_signal() syscall doesn't. Hence, here's a ahead of
time check that pidfd_send_signal() is supported:
bool pidfd_send_signal_supported()
{
int procfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
if (procfd < 0)
return false;
/*
* A process is always allowed to signal itself so
* pidfd_send_signal() should never fail this test. If it does
* it must mean it is not available, blocked by an LSM, seccomp,
* or other.
*/
return pidfd_send_signal(procfd, 0, NULL, 0) == 0;
}
Signed-off-by: Christian Brauner <christian@brauner.io>
Co-developed-by: Jann Horn <jannh@google.com>
Signed-off-by: Jann Horn <jannh@google.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Howells <dhowells@redhat.com>
Cc: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com>
Cc: Andy Lutomirsky <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | samples/Makefile | 2 | ||||
-rw-r--r-- | samples/pidfd/Makefile | 6 | ||||
-rw-r--r-- | samples/pidfd/pidfd-metadata.c | 112 |
3 files changed, 119 insertions, 1 deletions
diff --git a/samples/Makefile b/samples/Makefile index b1142a958811..fadadb1c3b05 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -3,4 +3,4 @@ obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \ hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \ configfs/ connector/ v4l/ trace_printk/ \ - vfio-mdev/ statx/ qmi/ binderfs/ + vfio-mdev/ statx/ qmi/ binderfs/ pidfd/ diff --git a/samples/pidfd/Makefile b/samples/pidfd/Makefile new file mode 100644 index 000000000000..0ff97784177a --- /dev/null +++ b/samples/pidfd/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 + +hostprogs-y := pidfd-metadata +always := $(hostprogs-y) +HOSTCFLAGS_pidfd-metadata.o += -I$(objtree)/usr/include +all: pidfd-metadata diff --git a/samples/pidfd/pidfd-metadata.c b/samples/pidfd/pidfd-metadata.c new file mode 100644 index 000000000000..640f5f757c57 --- /dev/null +++ b/samples/pidfd/pidfd-metadata.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <limits.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#ifndef CLONE_PIDFD +#define CLONE_PIDFD 0x00001000 +#endif + +static int do_child(void *args) +{ + printf("%d\n", getpid()); + _exit(EXIT_SUCCESS); +} + +static pid_t pidfd_clone(int flags, int *pidfd) +{ + size_t stack_size = 1024; + char *stack[1024] = { 0 }; + +#ifdef __ia64__ + return __clone2(do_child, stack, stack_size, flags | SIGCHLD, NULL, pidfd); +#else + return clone(do_child, stack + stack_size, flags | SIGCHLD, NULL, pidfd); +#endif +} + +static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, + unsigned int flags) +{ + return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); +} + +static int pidfd_metadata_fd(pid_t pid, int pidfd) +{ + int procfd, ret; + char path[100]; + + snprintf(path, sizeof(path), "/proc/%d", pid); + procfd = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC); + if (procfd < 0) { + warn("Failed to open %s\n", path); + return -1; + } + + /* + * Verify that the pid has not been recycled and our /proc/<pid> handle + * is still valid. + */ + ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0); + if (ret < 0) { + switch (errno) { + case EPERM: + /* Process exists, just not allowed to signal it. */ + break; + default: + warn("Failed to signal process\n"); + close(procfd); + procfd = -1; + } + } + + return procfd; +} + +int main(int argc, char *argv[]) +{ + int pidfd = 0, ret = EXIT_FAILURE; + char buf[4096] = { 0 }; + pid_t pid; + int procfd, statusfd; + ssize_t bytes; + + pid = pidfd_clone(CLONE_PIDFD, &pidfd); + if (pid < 0) + exit(ret); + + procfd = pidfd_metadata_fd(pid, pidfd); + close(pidfd); + if (procfd < 0) + goto out; + + statusfd = openat(procfd, "status", O_RDONLY | O_CLOEXEC); + close(procfd); + if (statusfd < 0) + goto out; + + bytes = read(statusfd, buf, sizeof(buf)); + if (bytes > 0) + bytes = write(STDOUT_FILENO, buf, bytes); + close(statusfd); + ret = EXIT_SUCCESS; + +out: + (void)wait(NULL); + + exit(ret); +} |