diff options
author | Oleg Nesterov <oleg@redhat.com> | 2024-01-31 14:26:02 +0100 |
---|---|---|
committer | Christian Brauner <brauner@kernel.org> | 2024-02-02 13:12:28 +0100 |
commit | 64bef697d33b75fc06c5789b3f8108680271529f (patch) | |
tree | dacc56670ce9a76cb7252e69dd88fcdeb7d72f2f /kernel/fork.c | |
parent | 21e25205d7f9b6d7d3807546dd12ea93844b7c8e (diff) | |
download | lwn-64bef697d33b75fc06c5789b3f8108680271529f.tar.gz lwn-64bef697d33b75fc06c5789b3f8108680271529f.zip |
pidfd: implement PIDFD_THREAD flag for pidfd_open()
With this flag:
- pidfd_open() doesn't require that the target task must be
a thread-group leader
- pidfd_poll() succeeds when the task exits and becomes a
zombie (iow, passes exit_notify()), even if it is a leader
and thread-group is not empty.
This means that the behaviour of pidfd_poll(PIDFD_THREAD,
pid-of-group-leader) is not well defined if it races with
exec() from its sub-thread; pidfd_poll() can succeed or not
depending on whether pidfd_task_exited() is called before
or after exchange_tids().
Perhaps we can improve this behaviour later, pidfd_poll()
can probably take sig->group_exec_task into account. But
this doesn't really differ from the case when the leader
exits before other threads (so pidfd_poll() succeeds) and
then another thread execs and pidfd_poll() will block again.
thread_group_exited() is no longer used, perhaps it can die.
Co-developed-by: Tycho Andersen <tycho@tycho.pizza>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Link: https://lore.kernel.org/r/20240131132602.GA23641@redhat.com
Tested-by: Tycho Andersen <tandersen@netflix.com>
Reviewed-by: Tycho Andersen <tandersen@netflix.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 38 |
1 files changed, 31 insertions, 7 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 726a92043531..1a9b91055916 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -101,6 +101,7 @@ #include <linux/user_events.h> #include <linux/iommu.h> #include <linux/rseq.h> +#include <uapi/linux/pidfd.h> #include <asm/pgalloc.h> #include <linux/uaccess.h> @@ -2050,6 +2051,8 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) seq_put_decimal_ll(m, "Pid:\t", nr); + /* TODO: report PIDFD_THREAD */ + #ifdef CONFIG_PID_NS seq_put_decimal_ll(m, "\nNSpid:\t", nr); if (nr > 0) { @@ -2068,22 +2071,35 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) } #endif +static bool pidfd_task_exited(struct pid *pid, bool thread) +{ + struct task_struct *task; + bool exited; + + rcu_read_lock(); + task = pid_task(pid, PIDTYPE_PID); + exited = !task || + (READ_ONCE(task->exit_state) && (thread || thread_group_empty(task))); + rcu_read_unlock(); + + return exited; +} + /* * Poll support for process exit notification. */ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) { struct pid *pid = file->private_data; + bool thread = file->f_flags & PIDFD_THREAD; __poll_t poll_flags = 0; poll_wait(file, &pid->wait_pidfd, pts); - /* - * Inform pollers only when the whole thread group exits. - * If the thread group leader exits before all other threads in the - * group, then poll(2) should block, similar to the wait(2) family. + * Depending on PIDFD_THREAD, inform pollers when the thread + * or the whole thread-group exits. */ - if (thread_group_exited(pid)) + if (pidfd_task_exited(pid, thread)) poll_flags = EPOLLIN | EPOLLRDNORM; return poll_flags; @@ -2141,6 +2157,11 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re return PTR_ERR(pidfd_file); } get_pid(pid); /* held by pidfd_file now */ + /* + * anon_inode_getfile() ignores everything outside of the + * O_ACCMODE | O_NONBLOCK mask, set PIDFD_THREAD manually. + */ + pidfd_file->f_flags |= (flags & PIDFD_THREAD); *ret = pidfd_file; return pidfd; } @@ -2154,7 +2175,8 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re * Allocate a new file that stashes @pid and reserve a new pidfd number in the * caller's file descriptor table. The pidfd is reserved but not installed yet. * - * The helper verifies that @pid is used as a thread group leader. + * The helper verifies that @pid is still in use, without PIDFD_THREAD the + * task identified by @pid must be a thread-group leader. * * If this function returns successfully the caller is responsible to either * call fd_install() passing the returned pidfd and pidfd file as arguments in @@ -2173,7 +2195,9 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re */ int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret) { - if (!pid || !pid_has_task(pid, PIDTYPE_TGID)) + bool thread = flags & PIDFD_THREAD; + + if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID)) return -EINVAL; return __pidfd_prepare(pid, flags, ret); |