diff options
author | Christian Brauner <brauner@kernel.org> | 2025-04-11 17:09:44 +0200 |
---|---|---|
committer | Christian Brauner <brauner@kernel.org> | 2025-04-12 14:04:53 +0200 |
commit | a9d7de0f68b79e5e481967fc605698915a37ac13 (patch) | |
tree | c038848db5e8c37bcc784fb5602c370c7c560099 | |
parent | 1e940fff94374d04b6c34f896ed9fbad3d2fb706 (diff) | |
parent | 17f1b08acf50c0bfb02e21623e53e7e575612b67 (diff) |
Merge patch series "pidfs: ensure consistent ENOENT/ESRCH reporting"
Christian Brauner <brauner@kernel.org> says:
In a prior patch series we tried to cleanly differentiate between:
(1) The task has already been reaped.
(2) The caller requested a pidfd for a thread-group leader but the pid
actually references a struct pid that isn't used as a thread-group
leader.
as this was causing issues for non-threaded workloads.
But there's cases where the current simple logic is wrong. Specifically,
if the pid was a leader pid and the check races with __unhash_process().
Stabilize this by using the pidfd waitqueue lock.
* patches from https://lore.kernel.org/20250411-work-pidfs-enoent-v2-0-60b2d3bb545f@kernel.org:
pidfs: ensure consistent ENOENT/ESRCH reporting
exit: move wake_up_all() pidfd waiters into __unhash_process()
Link: https://lore.kernel.org/20250411-work-pidfs-enoent-v2-0-60b2d3bb545f@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r-- | kernel/exit.c | 5 | ||||
-rw-r--r-- | kernel/fork.c | 34 | ||||
-rw-r--r-- | kernel/pid.c | 5 |
3 files changed, 21 insertions, 23 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index 1b51dc099f1e..abcd93ce4e18 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -133,8 +133,13 @@ struct release_task_post { static void __unhash_process(struct release_task_post *post, struct task_struct *p, bool group_dead) { + struct pid *pid = task_pid(p); + nr_threads--; + detach_pid(post->pids, p, PIDTYPE_PID); + wake_up_all(&pid->wait_pidfd); + if (group_dead) { detach_pid(post->pids, p, PIDTYPE_TGID); detach_pid(post->pids, p, PIDTYPE_PGID); diff --git a/kernel/fork.c b/kernel/fork.c index 4a2080b968c8..f7403e1fb0d4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2108,28 +2108,26 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re */ int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret) { - int err = 0; - - if (!(flags & PIDFD_THREAD)) { + /* + * While holding the pidfd waitqueue lock removing the task + * linkage for the thread-group leader pid (PIDTYPE_TGID) isn't + * possible. Thus, if there's still task linkage for PIDTYPE_PID + * not having thread-group leader linkage for the pid means it + * wasn't a thread-group leader in the first place. + */ + scoped_guard(spinlock_irq, &pid->wait_pidfd.lock) { + /* Task has already been reaped. */ + if (!pid_has_task(pid, PIDTYPE_PID)) + return -ESRCH; /* - * If this is struct pid isn't used as a thread-group - * leader pid but the caller requested to create a - * thread-group leader pidfd then report ENOENT to the - * caller as a hint. + * If this struct pid isn't used as a thread-group + * leader but the caller requested to create a + * thread-group leader pidfd then report ENOENT. */ - if (!pid_has_task(pid, PIDTYPE_TGID)) - err = -ENOENT; + if (!(flags & PIDFD_THREAD) && !pid_has_task(pid, PIDTYPE_TGID)) + return -ENOENT; } - /* - * If this wasn't a thread-group leader struct pid or the task - * got reaped in the meantime report -ESRCH to userspace. - */ - if (!pid_has_task(pid, PIDTYPE_PID)) - err = -ESRCH; - if (err) - return err; - return __pidfd_prepare(pid, flags, ret); } diff --git a/kernel/pid.c b/kernel/pid.c index 4ac2ce46817f..26f1e136f017 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -359,11 +359,6 @@ static void __change_pid(struct pid **pids, struct task_struct *task, hlist_del_rcu(&task->pid_links[type]); *pid_ptr = new; - if (type == PIDTYPE_PID) { - WARN_ON_ONCE(pid_has_task(pid, PIDTYPE_PID)); - wake_up_all(&pid->wait_pidfd); - } - for (tmp = PIDTYPE_MAX; --tmp >= 0; ) if (pid_has_task(pid, tmp)) return; |