diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/fork.c | 26 | ||||
-rw-r--r-- | kernel/pid.c | 71 | ||||
-rw-r--r-- | kernel/signal.c | 11 |
3 files changed, 108 insertions, 0 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 847dd147b068..187c02ce534c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1711,8 +1711,34 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) } #endif +/* + * Poll support for process exit notification. + */ +static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts) +{ + struct task_struct *task; + struct pid *pid = file->private_data; + int poll_flags = 0; + + poll_wait(file, &pid->wait_pidfd, pts); + + rcu_read_lock(); + task = pid_task(pid, PIDTYPE_PID); + /* + * Inform pollers only when the whole thread group exits. + * If the thread group leader exits before all other threads in the + * group, then poll(2) should block, similar to the wait(2) family. + */ + if (!task || (task->exit_state && thread_group_empty(task))) + poll_flags = POLLIN | POLLRDNORM; + rcu_read_unlock(); + + return poll_flags; +} + const struct file_operations pidfd_fops = { .release = pidfd_release, + .poll = pidfd_poll, #ifdef CONFIG_PROC_FS .show_fdinfo = pidfd_show_fdinfo, #endif diff --git a/kernel/pid.c b/kernel/pid.c index e5cad0c7d5dd..16263b526560 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -38,6 +38,8 @@ #include <linux/syscalls.h> #include <linux/proc_ns.h> #include <linux/proc_fs.h> +#include <linux/anon_inodes.h> +#include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/idr.h> @@ -214,6 +216,8 @@ struct pid *alloc_pid(struct pid_namespace *ns) for (type = 0; type < PIDTYPE_MAX; ++type) INIT_HLIST_HEAD(&pid->tasks[type]); + init_waitqueue_head(&pid->wait_pidfd); + upid = pid->numbers + ns->level; spin_lock_irq(&pidmap_lock); if (!(ns->pid_allocated & PIDNS_ADDING)) @@ -451,6 +455,73 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) return idr_get_next(&ns->idr, &nr); } +/** + * pidfd_create() - Create a new pid file descriptor. + * + * @pid: struct pid that the pidfd will reference + * + * This creates a new pid file descriptor with the O_CLOEXEC flag set. + * + * Note, that this function can only be called after the fd table has + * been unshared to avoid leaking the pidfd to the new process. + * + * Return: On success, a cloexec pidfd is returned. + * On error, a negative errno number will be returned. + */ +static int pidfd_create(struct pid *pid) +{ + int fd; + + fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), + O_RDWR | O_CLOEXEC); + if (fd < 0) + put_pid(pid); + + return fd; +} + +/** + * pidfd_open() - Open new pid file descriptor. + * + * @pid: pid for which to retrieve a pidfd + * @flags: flags to pass + * + * This creates a new pid file descriptor with the O_CLOEXEC flag set for + * the process identified by @pid. Currently, the process identified by + * @pid must be a thread-group leader. This restriction currently exists + * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot + * be used with CLONE_THREAD) and pidfd polling (only supports thread group + * leaders). + * + * Return: On success, a cloexec pidfd is returned. + * On error, a negative errno number will be returned. + */ +SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) +{ + int fd, ret; + struct pid *p; + + if (flags) + return -EINVAL; + + if (pid <= 0) + return -EINVAL; + + p = find_get_pid(pid); + if (!p) + return -ESRCH; + + ret = 0; + rcu_read_lock(); + if (!pid_task(p, PIDTYPE_TGID)) + ret = -EINVAL; + rcu_read_unlock(); + + fd = ret ?: pidfd_create(p); + put_pid(p); + return fd; +} + void __init pid_idr_init(void) { /* Verify no one has done anything silly: */ diff --git a/kernel/signal.c b/kernel/signal.c index 91cb8ca41954..dabe100d2091 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1881,6 +1881,14 @@ ret: return ret; } +static void do_notify_pidfd(struct task_struct *task) +{ + struct pid *pid; + + pid = task_pid(task); + wake_up_all(&pid->wait_pidfd); +} + /* * Let a parent know about the death of a child. * For a stopped/continued status change, use do_notify_parent_cldstop instead. @@ -1904,6 +1912,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig) BUG_ON(!tsk->ptrace && (tsk->group_leader != tsk || !thread_group_empty(tsk))); + /* Wake up all pidfd waiters */ + do_notify_pidfd(tsk); + if (sig != SIGCHLD) { /* * This is only possible if parent == real_parent. |