From 21e25205d7f9b6d7d3807546dd12ea93844b7c8e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 27 Jan 2024 14:24:07 +0100 Subject: pidfd: don't do_notify_pidfd() if !thread_group_empty() do_notify_pidfd() makes no sense until the whole thread group exits, change do_notify_parent() to check thread_group_empty(). This avoids the unnecessary do_notify_pidfd() when tsk is not a leader, or it exits before other threads, or it has a ptraced EXIT_ZOMBIE sub-thread. Signed-off-by: Oleg Nesterov Link: https://lore.kernel.org/r/20240127132407.GA29136@redhat.com Reviewed-by: Tycho Andersen Signed-off-by: Christian Brauner --- kernel/signal.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index c9c57d053ce4..9561a3962ca6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2050,9 +2050,11 @@ bool do_notify_parent(struct task_struct *tsk, int sig) WARN_ON_ONCE(!tsk->ptrace && (tsk->group_leader != tsk || !thread_group_empty(tsk))); - - /* Wake up all pidfd waiters */ - do_notify_pidfd(tsk); + /* + * tsk is a group leader and has no threads, wake up the pidfd waiters. + */ + if (thread_group_empty(tsk)) + do_notify_pidfd(tsk); if (sig != SIGCHLD) { /* -- cgit v1.2.3 From 64bef697d33b75fc06c5789b3f8108680271529f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 31 Jan 2024 14:26:02 +0100 Subject: pidfd: implement PIDFD_THREAD flag for pidfd_open() With this flag: - pidfd_open() doesn't require that the target task must be a thread-group leader - pidfd_poll() succeeds when the task exits and becomes a zombie (iow, passes exit_notify()), even if it is a leader and thread-group is not empty. This means that the behaviour of pidfd_poll(PIDFD_THREAD, pid-of-group-leader) is not well defined if it races with exec() from its sub-thread; pidfd_poll() can succeed or not depending on whether pidfd_task_exited() is called before or after exchange_tids(). Perhaps we can improve this behaviour later, pidfd_poll() can probably take sig->group_exec_task into account. But this doesn't really differ from the case when the leader exits before other threads (so pidfd_poll() succeeds) and then another thread execs and pidfd_poll() will block again. thread_group_exited() is no longer used, perhaps it can die. Co-developed-by: Tycho Andersen Signed-off-by: Oleg Nesterov Link: https://lore.kernel.org/r/20240131132602.GA23641@redhat.com Tested-by: Tycho Andersen Reviewed-by: Tycho Andersen Signed-off-by: Christian Brauner --- fs/exec.c | 6 +++++- include/linux/pid.h | 3 ++- include/uapi/linux/pidfd.h | 3 ++- kernel/exit.c | 7 +++++++ kernel/fork.c | 38 +++++++++++++++++++++++++++++++------- kernel/pid.c | 14 +++----------- kernel/signal.c | 6 ++++-- 7 files changed, 54 insertions(+), 23 deletions(-) (limited to 'kernel/signal.c') diff --git a/fs/exec.c b/fs/exec.c index 8cdd5b2dd09c..b68f61bbcaa8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1143,7 +1143,11 @@ static int de_thread(struct task_struct *tsk) BUG_ON(leader->exit_state != EXIT_ZOMBIE); leader->exit_state = EXIT_DEAD; - + /* + * leader and tsk exhanged their pids, the old pid dies, + * wake up the PIDFD_THREAD waiters. + */ + do_notify_pidfd(leader); /* * We are going to release_task()->ptrace_unlink() silently, * the tracer can sleep in do_wait(). EXIT_DEAD guarantees diff --git a/include/linux/pid.h b/include/linux/pid.h index e6a041cb8bac..8124d57752b9 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -70,10 +70,11 @@ extern const struct file_operations pidfd_fops; struct file; -extern struct pid *pidfd_pid(const struct file *file); +struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); +void do_notify_pidfd(struct task_struct *task); static inline struct pid *get_pid(struct pid *pid) { diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 5406fbc13074..2e6461459877 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -7,6 +7,7 @@ #include /* Flags for pidfd_open(). */ -#define PIDFD_NONBLOCK O_NONBLOCK +#define PIDFD_NONBLOCK O_NONBLOCK +#define PIDFD_THREAD O_EXCL #endif /* _UAPI_LINUX_PIDFD_H */ diff --git a/kernel/exit.c b/kernel/exit.c index 3988a02efaef..c038d10dfb38 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -739,6 +739,13 @@ static void exit_notify(struct task_struct *tsk, int group_dead) kill_orphaned_pgrp(tsk->group_leader, NULL); tsk->exit_state = EXIT_ZOMBIE; + /* + * sub-thread or delay_group_leader(), wake up the + * PIDFD_THREAD waiters. + */ + if (!thread_group_empty(tsk)) + do_notify_pidfd(tsk); + if (unlikely(tsk->ptrace)) { int sig = thread_group_leader(tsk) && thread_group_empty(tsk) && diff --git a/kernel/fork.c b/kernel/fork.c index 726a92043531..1a9b91055916 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -101,6 +101,7 @@ #include #include #include +#include #include #include @@ -2050,6 +2051,8 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) seq_put_decimal_ll(m, "Pid:\t", nr); + /* TODO: report PIDFD_THREAD */ + #ifdef CONFIG_PID_NS seq_put_decimal_ll(m, "\nNSpid:\t", nr); if (nr > 0) { @@ -2068,22 +2071,35 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) } #endif +static bool pidfd_task_exited(struct pid *pid, bool thread) +{ + struct task_struct *task; + bool exited; + + rcu_read_lock(); + task = pid_task(pid, PIDTYPE_PID); + exited = !task || + (READ_ONCE(task->exit_state) && (thread || thread_group_empty(task))); + rcu_read_unlock(); + + return exited; +} + /* * Poll support for process exit notification. */ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) { struct pid *pid = file->private_data; + bool thread = file->f_flags & PIDFD_THREAD; __poll_t poll_flags = 0; poll_wait(file, &pid->wait_pidfd, pts); - /* - * Inform pollers only when the whole thread group exits. - * If the thread group leader exits before all other threads in the - * group, then poll(2) should block, similar to the wait(2) family. + * Depending on PIDFD_THREAD, inform pollers when the thread + * or the whole thread-group exits. */ - if (thread_group_exited(pid)) + if (pidfd_task_exited(pid, thread)) poll_flags = EPOLLIN | EPOLLRDNORM; return poll_flags; @@ -2141,6 +2157,11 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re return PTR_ERR(pidfd_file); } get_pid(pid); /* held by pidfd_file now */ + /* + * anon_inode_getfile() ignores everything outside of the + * O_ACCMODE | O_NONBLOCK mask, set PIDFD_THREAD manually. + */ + pidfd_file->f_flags |= (flags & PIDFD_THREAD); *ret = pidfd_file; return pidfd; } @@ -2154,7 +2175,8 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re * Allocate a new file that stashes @pid and reserve a new pidfd number in the * caller's file descriptor table. The pidfd is reserved but not installed yet. * - * The helper verifies that @pid is used as a thread group leader. + * The helper verifies that @pid is still in use, without PIDFD_THREAD the + * task identified by @pid must be a thread-group leader. * * If this function returns successfully the caller is responsible to either * call fd_install() passing the returned pidfd and pidfd file as arguments in @@ -2173,7 +2195,9 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re */ int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret) { - if (!pid || !pid_has_task(pid, PIDTYPE_TGID)) + bool thread = flags & PIDFD_THREAD; + + if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID)) return -EINVAL; return __pidfd_prepare(pid, flags, ret); diff --git a/kernel/pid.c b/kernel/pid.c index c7a3e359f8f5..e11144466828 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -552,11 +552,6 @@ struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) * Return the task associated with @pidfd. The function takes a reference on * the returned task. The caller is responsible for releasing that reference. * - * Currently, the process identified by @pidfd is always a thread-group leader. - * This restriction currently exists for all aspects of pidfds including pidfd - * creation (CLONE_PIDFD cannot be used with CLONE_THREAD) and pidfd polling - * (only supports thread group leaders). - * * Return: On success, the task_struct associated with the pidfd. * On error, a negative errno number will be returned. */ @@ -615,11 +610,8 @@ static int pidfd_create(struct pid *pid, unsigned int flags) * @flags: flags to pass * * This creates a new pid file descriptor with the O_CLOEXEC flag set for - * the process identified by @pid. Currently, the process identified by - * @pid must be a thread-group leader. This restriction currently exists - * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot - * be used with CLONE_THREAD) and pidfd polling (only supports thread group - * leaders). + * the task identified by @pid. Without PIDFD_THREAD flag the target task + * must be a thread-group leader. * * Return: On success, a cloexec pidfd is returned. * On error, a negative errno number will be returned. @@ -629,7 +621,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) int fd; struct pid *p; - if (flags & ~PIDFD_NONBLOCK) + if (flags & ~(PIDFD_NONBLOCK | PIDFD_THREAD)) return -EINVAL; if (pid <= 0) diff --git a/kernel/signal.c b/kernel/signal.c index 9561a3962ca6..9b40109f0c56 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2019,7 +2019,7 @@ ret: return ret; } -static void do_notify_pidfd(struct task_struct *task) +void do_notify_pidfd(struct task_struct *task) { struct pid *pid; @@ -2051,7 +2051,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig) WARN_ON_ONCE(!tsk->ptrace && (tsk->group_leader != tsk || !thread_group_empty(tsk))); /* - * tsk is a group leader and has no threads, wake up the pidfd waiters. + * tsk is a group leader and has no threads, wake up the + * non-PIDFD_THREAD waiters. */ if (thread_group_empty(tsk)) do_notify_pidfd(tsk); @@ -3926,6 +3927,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, prepare_kill_siginfo(sig, &kinfo); } + /* TODO: respect PIDFD_THREAD */ ret = kill_pid_info(sig, &kinfo, pid); err: -- cgit v1.2.3 From 9ed52108f6478a6a805c0c15a3f70aabba07247e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 5 Feb 2024 15:13:48 +0100 Subject: pidfd: change do_notify_pidfd() to use __wake_up(poll_to_key(EPOLLIN)) rather than wake_up_all(). This way do_notify_pidfd() won't wakeup the POLLHUP-only waiters which wait for pid_task() == NULL. TODO: - as Christian pointed out, this asks for the new wake_up_all_poll() helper, it can already have other users. - we can probably discriminate the PIDFD_THREAD and non-PIDFD_THREAD waiters, but this needs more work. See https://lore.kernel.org/all/20240205140848.GA15853@redhat.com/ Signed-off-by: Oleg Nesterov Link: https://lore.kernel.org/r/20240205141348.GA16539@redhat.com Reviewed-by: Tycho Andersen Signed-off-by: Christian Brauner --- kernel/signal.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 9b40109f0c56..c3fac06937e2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2021,11 +2021,12 @@ ret: void do_notify_pidfd(struct task_struct *task) { - struct pid *pid; + struct pid *pid = task_pid(task); WARN_ON(task->exit_state == 0); - pid = task_pid(task); - wake_up_all(&pid->wait_pidfd); + + __wake_up(&pid->wait_pidfd, TASK_NORMAL, 0, + poll_to_key(EPOLLIN | EPOLLRDNORM)); } /* -- cgit v1.2.3 From c044a9502649a66bf5c5e1a584cb82b2c538ae25 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 9 Feb 2024 14:06:20 +0100 Subject: signal: fill in si_code in prepare_kill_siginfo() So that do_tkill() can use this helper too. This also simplifies the next patch. TODO: perhaps we can kill prepare_kill_siginfo() and change the callers to use SEND_SIG_NOINFO, but this needs some changes in __send_signal_locked() and TP_STORE_SIGINFO(). Reviewed-by: Tycho Andersen Signed-off-by: Oleg Nesterov Link: https://lore.kernel.org/r/20240209130620.GA8039@redhat.com Signed-off-by: Christian Brauner --- kernel/signal.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index c3fac06937e2..1450689302d9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3793,12 +3793,13 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time32, compat_sigset_t __user *, uthese, #endif #endif -static inline void prepare_kill_siginfo(int sig, struct kernel_siginfo *info) +static void prepare_kill_siginfo(int sig, struct kernel_siginfo *info, + enum pid_type type) { clear_siginfo(info); info->si_signo = sig; info->si_errno = 0; - info->si_code = SI_USER; + info->si_code = (type == PIDTYPE_PID) ? SI_TKILL : SI_USER; info->si_pid = task_tgid_vnr(current); info->si_uid = from_kuid_munged(current_user_ns(), current_uid()); } @@ -3812,7 +3813,7 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) { struct kernel_siginfo info; - prepare_kill_siginfo(sig, &info); + prepare_kill_siginfo(sig, &info, PIDTYPE_TGID); return kill_something_info(sig, &info, pid); } @@ -3925,7 +3926,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) goto err; } else { - prepare_kill_siginfo(sig, &kinfo); + prepare_kill_siginfo(sig, &kinfo, PIDTYPE_TGID); } /* TODO: respect PIDFD_THREAD */ @@ -3970,12 +3971,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) { struct kernel_siginfo info; - clear_siginfo(&info); - info.si_signo = sig; - info.si_errno = 0; - info.si_code = SI_TKILL; - info.si_pid = task_tgid_vnr(current); - info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); + prepare_kill_siginfo(sig, &info, PIDTYPE_PID); return do_send_specific(tgid, pid, sig, &info); } -- cgit v1.2.3 From 81b9d8ac0640b285a3c369cd41a85f6c240d3a60 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 9 Feb 2024 14:06:50 +0100 Subject: pidfd: change pidfd_send_signal() to respect PIDFD_THREAD Turn kill_pid_info() into kill_pid_info_type(), this allows to pass any pid_type to group_send_sig_info(), despite its name it should work fine even if type = PIDTYPE_PID. Change pidfd_send_signal() to use PIDTYPE_PID or PIDTYPE_TGID depending on PIDFD_THREAD. While at it kill another TODO comment in pidfd_show_fdinfo(). As Christian expains fdinfo reports f_flags, userspace can already detect PIDFD_THREAD. Reviewed-by: Tycho Andersen Signed-off-by: Oleg Nesterov Link: https://lore.kernel.org/r/20240209130650.GA8048@redhat.com Signed-off-by: Christian Brauner --- kernel/fork.c | 2 -- kernel/signal.c | 39 +++++++++++++++++++++++---------------- 2 files changed, 23 insertions(+), 18 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/fork.c b/kernel/fork.c index 4b6d994505ca..3f22ec90c5c6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2051,8 +2051,6 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) seq_put_decimal_ll(m, "Pid:\t", nr); - /* TODO: report PIDFD_THREAD */ - #ifdef CONFIG_PID_NS seq_put_decimal_ll(m, "\nNSpid:\t", nr); if (nr > 0) { diff --git a/kernel/signal.c b/kernel/signal.c index 1450689302d9..8b8169623850 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -47,6 +47,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -1436,7 +1437,8 @@ void lockdep_assert_task_sighand_held(struct task_struct *task) #endif /* - * send signal info to all the members of a group + * send signal info to all the members of a thread group or to the + * individual thread if type == PIDTYPE_PID. */ int group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type) @@ -1478,7 +1480,8 @@ int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp) return ret; } -int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid) +static int kill_pid_info_type(int sig, struct kernel_siginfo *info, + struct pid *pid, enum pid_type type) { int error = -ESRCH; struct task_struct *p; @@ -1487,11 +1490,10 @@ int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid) rcu_read_lock(); p = pid_task(pid, PIDTYPE_PID); if (p) - error = group_send_sig_info(sig, info, p, PIDTYPE_TGID); + error = group_send_sig_info(sig, info, p, type); rcu_read_unlock(); if (likely(!p || error != -ESRCH)) return error; - /* * The task was unhashed in between, try again. If it * is dead, pid_task() will return NULL, if we race with @@ -1500,6 +1502,11 @@ int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid) } } +int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid) +{ + return kill_pid_info_type(sig, info, pid, PIDTYPE_TGID); +} + static int kill_proc_info(int sig, struct kernel_siginfo *info, pid_t pid) { int error; @@ -3873,14 +3880,10 @@ static struct pid *pidfd_to_pid(const struct file *file) * @info: signal info * @flags: future flags * - * The syscall currently only signals via PIDTYPE_PID which covers - * kill(, . It does not signal threads or process - * groups. - * In order to extend the syscall to threads and process groups the @flags - * argument should be used. In essence, the @flags argument will determine - * what is signaled and not the file descriptor itself. Put in other words, - * grouping is a property of the flags argument not a property of the file - * descriptor. + * Send the signal to the thread group or to the individual thread depending + * on PIDFD_THREAD. + * In the future extension to @flags may be used to override the default scope + * of @pidfd. * * Return: 0 on success, negative errno on failure */ @@ -3891,6 +3894,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, struct fd f; struct pid *pid; kernel_siginfo_t kinfo; + enum pid_type type; /* Enforce flags be set to 0 until we add an extension. */ if (flags) @@ -3911,6 +3915,11 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, if (!access_pidfd_pidns(pid)) goto err; + if (f.file->f_flags & PIDFD_THREAD) + type = PIDTYPE_PID; + else + type = PIDTYPE_TGID; + if (info) { ret = copy_siginfo_from_user_any(&kinfo, info); if (unlikely(ret)) @@ -3926,12 +3935,10 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) goto err; } else { - prepare_kill_siginfo(sig, &kinfo, PIDTYPE_TGID); + prepare_kill_siginfo(sig, &kinfo, type); } - /* TODO: respect PIDFD_THREAD */ - ret = kill_pid_info(sig, &kinfo, pid); - + ret = kill_pid_info_type(sig, &kinfo, pid, type); err: fdput(f); return ret; -- cgit v1.2.3 From e1fb1dc08e73466830612bcf2f9f72180965c9ba Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 9 Feb 2024 15:49:45 +0100 Subject: pidfd: allow to override signal scope in pidfd_send_signal() Right now we determine the scope of the signal based on the type of pidfd. There are use-cases where it's useful to override the scope of the signal. For example in [1]. Add flags to determine the scope of the signal: (1) PIDFD_SIGNAL_THREAD: send signal to specific thread reference by @pidfd (2) PIDFD_SIGNAL_THREAD_GROUP: send signal to thread-group of @pidfd (2) PIDFD_SIGNAL_PROCESS_GROUP: send signal to process-group of @pidfd Since we now allow specifying PIDFD_SEND_PROCESS_GROUP for pidfd_send_signal() to send signals to process groups we need to adjust the check restricting si_code emulation by userspace to account for PIDTYPE_PGID. Reviewed-by: Oleg Nesterov Link: https://github.com/systemd/systemd/issues/31093 [1] Link: https://lore.kernel.org/r/20240210-chihuahua-hinzog-3945b6abd44a@brauner Link: https://lore.kernel.org/r/20240214123655.GB16265@redhat.com Signed-off-by: Christian Brauner --- include/uapi/linux/pidfd.h | 5 +++++ kernel/signal.c | 46 +++++++++++++++++++++++++++++++++++++--------- 2 files changed, 42 insertions(+), 9 deletions(-) (limited to 'kernel/signal.c') diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 2e6461459877..72ec000a97cd 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -10,4 +10,9 @@ #define PIDFD_NONBLOCK O_NONBLOCK #define PIDFD_THREAD O_EXCL +/* Flags for pidfd_send_signal(). */ +#define PIDFD_SIGNAL_THREAD (1UL << 0) +#define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) +#define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) + #endif /* _UAPI_LINUX_PIDFD_H */ diff --git a/kernel/signal.c b/kernel/signal.c index 8b8169623850..bdca529f0f7b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1905,16 +1905,19 @@ int send_sig_fault_trapno(int sig, int code, void __user *addr, int trapno, return send_sig_info(info.si_signo, &info, t); } -int kill_pgrp(struct pid *pid, int sig, int priv) +static int kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp) { int ret; - read_lock(&tasklist_lock); - ret = __kill_pgrp_info(sig, __si_special(priv), pid); + ret = __kill_pgrp_info(sig, info, pgrp); read_unlock(&tasklist_lock); - return ret; } + +int kill_pgrp(struct pid *pid, int sig, int priv) +{ + return kill_pgrp_info(sig, __si_special(priv), pid); +} EXPORT_SYMBOL(kill_pgrp); int kill_pid(struct pid *pid, int sig, int priv) @@ -3873,6 +3876,10 @@ static struct pid *pidfd_to_pid(const struct file *file) return tgid_pidfd_to_pid(file); } +#define PIDFD_SEND_SIGNAL_FLAGS \ + (PIDFD_SIGNAL_THREAD | PIDFD_SIGNAL_THREAD_GROUP | \ + PIDFD_SIGNAL_PROCESS_GROUP) + /** * sys_pidfd_send_signal - Signal a process through a pidfd * @pidfd: file descriptor of the process @@ -3897,7 +3904,11 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, enum pid_type type; /* Enforce flags be set to 0 until we add an extension. */ - if (flags) + if (flags & ~PIDFD_SEND_SIGNAL_FLAGS) + return -EINVAL; + + /* Ensure that only a single signal scope determining flag is set. */ + if (hweight32(flags & PIDFD_SEND_SIGNAL_FLAGS) > 1) return -EINVAL; f = fdget(pidfd); @@ -3915,10 +3926,24 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, if (!access_pidfd_pidns(pid)) goto err; - if (f.file->f_flags & PIDFD_THREAD) + switch (flags) { + case 0: + /* Infer scope from the type of pidfd. */ + if (f.file->f_flags & PIDFD_THREAD) + type = PIDTYPE_PID; + else + type = PIDTYPE_TGID; + break; + case PIDFD_SIGNAL_THREAD: type = PIDTYPE_PID; - else + break; + case PIDFD_SIGNAL_THREAD_GROUP: type = PIDTYPE_TGID; + break; + case PIDFD_SIGNAL_PROCESS_GROUP: + type = PIDTYPE_PGID; + break; + } if (info) { ret = copy_siginfo_from_user_any(&kinfo, info); @@ -3931,14 +3956,17 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, /* Only allow sending arbitrary signals to yourself. */ ret = -EPERM; - if ((task_pid(current) != pid) && + if ((task_pid(current) != pid || type > PIDTYPE_TGID) && (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) goto err; } else { prepare_kill_siginfo(sig, &kinfo, type); } - ret = kill_pid_info_type(sig, &kinfo, pid, type); + if (type == PIDTYPE_PGID) + ret = kill_pgrp_info(sig, &kinfo, pid); + else + ret = kill_pid_info_type(sig, &kinfo, pid, type); err: fdput(f); return ret; -- cgit v1.2.3 From 49fd5f5ac4b59dcd53b5788d56d4ae7a8a1e1434 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 26 Feb 2024 17:56:47 +0100 Subject: get_signal: don't abuse ksig->info.si_signo and ksig->sig Patch series "get_signal: minor cleanups and fix". Lets remove this clear_siginfo() right now. It is incomplete (and thus looks confusing) and unnecessary. Also, PF_USER_WORKER's already don't get a fully initialized ksig anyway. This patch (of 3): Cleanup and preparation for the next changes. get_signal() uses signr or ksig->info.si_signo or ksig->sig in a chaotic way, this looks confusing. Change it to always use signr. Link: https://lkml.kernel.org/r/20240226165612.GA20787@redhat.com Link: https://lkml.kernel.org/r/20240226165647.GA20826@redhat.com Signed-off-by: Oleg Nesterov Cc: Christian Brauner Cc: Eric W. Biederman Cc: Peter Collingbourne Cc: Wen Yang Signed-off-by: Andrew Morton --- kernel/signal.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index c9c57d053ce4..09a6dd07cf6b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2842,7 +2842,7 @@ relock: spin_lock_irq(&sighand->siglock); } - if (likely(do_signal_stop(ksig->info.si_signo))) { + if (likely(do_signal_stop(signr))) { /* It released the siglock. */ goto relock; } @@ -2866,7 +2866,7 @@ relock: if (sig_kernel_coredump(signr)) { if (print_fatal_signals) - print_fatal_signal(ksig->info.si_signo); + print_fatal_signal(signr); proc_coredump_connector(current); /* * If it was able to dump core, this kills all @@ -2890,7 +2890,7 @@ relock: /* * Death signals, no core dump. */ - do_group_exit(ksig->info.si_signo); + do_group_exit(signr); /* NOTREACHED */ } spin_unlock_irq(&sighand->siglock); @@ -2900,7 +2900,7 @@ out: if (!(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS)) hide_si_addr_tag_bits(ksig); - return ksig->sig > 0; + return signr > 0; } /** -- cgit v1.2.3 From dd69edd643a8263f9a96d0a3a82d8d50d9df9b48 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 26 Feb 2024 17:56:50 +0100 Subject: get_signal: hide_si_addr_tag_bits: fix the usage of uninitialized ksig ksig->ka and ksig->info are not initialized if get_signal() returns 0 or if the caller is PF_USER_WORKER. Check signr != 0 before SA_EXPOSE_TAGBITS and move the "out" label down. The latter means that ksig->sig won't be initialized if a PF_USER_WORKER thread gets a fatal signal but this is fine, PF_USER_WORKER's don't use ksig. And there is nothing new, in this case ksig->ka and ksig-info are not initialized anyway. Add a comment. Link: https://lkml.kernel.org/r/20240226165650.GA20829@redhat.com Signed-off-by: Oleg Nesterov Cc: Christian Brauner Cc: Eric W. Biederman Cc: Peter Collingbourne Cc: Wen Yang Signed-off-by: Andrew Morton --- kernel/signal.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 09a6dd07cf6b..a69d3069067a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2881,8 +2881,9 @@ relock: /* * PF_USER_WORKER threads will catch and exit on fatal signals - * themselves. They have cleanup that must be performed, so - * we cannot call do_exit() on their behalf. + * themselves. They have cleanup that must be performed, so we + * cannot call do_exit() on their behalf. Note that ksig won't + * be properly initialized, PF_USER_WORKER's shouldn't use it. */ if (current->flags & PF_USER_WORKER) goto out; @@ -2894,12 +2895,12 @@ relock: /* NOTREACHED */ } spin_unlock_irq(&sighand->siglock); -out: + ksig->sig = signr; - if (!(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS)) + if (signr && !(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS)) hide_si_addr_tag_bits(ksig); - +out: return signr > 0; } -- cgit v1.2.3 From a436184e3bfb14b3c38e6ed0c2e7f6d810312c4f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 26 Feb 2024 17:56:53 +0100 Subject: get_signal: don't initialize ksig->info if SIGNAL_GROUP_EXIT/group_exec_task This initialization is incomplete and unnecessary, neither do_group_exit() nor PF_USER_WORKER need ksig->info. Link: https://lkml.kernel.org/r/20240226165653.GA20834@redhat.com Signed-off-by: Oleg Nesterov Cc: Christian Brauner Cc: Eric W. Biederman Cc: Peter Collingbourne Cc: Wen Yang Signed-off-by: Andrew Morton --- kernel/signal.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index a69d3069067a..9c6a5ccac328 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2727,12 +2727,15 @@ relock: /* Has this task already been marked for death? */ if ((signal->flags & SIGNAL_GROUP_EXIT) || signal->group_exec_task) { - clear_siginfo(&ksig->info); - ksig->info.si_signo = signr = SIGKILL; + signr = SIGKILL; sigdelset(¤t->pending.signal, SIGKILL); trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO, - &sighand->action[SIGKILL - 1]); + &sighand->action[SIGKILL-1]); recalc_sigpending(); + /* + * implies do_group_exit() or return to PF_USER_WORKER, + * no need to initialize ksig->info/etc. + */ goto fatal; } -- cgit v1.2.3