diff options
-rw-r--r-- | include/linux/posix-timers.h | 2 | ||||
-rw-r--r-- | include/linux/sched/signal.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/prctl.h | 11 | ||||
-rw-r--r-- | kernel/sys.c | 5 | ||||
-rw-r--r-- | kernel/time/posix-timers.c | 105 |
5 files changed, 98 insertions, 26 deletions
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 094ef5778c0f..dd48c64b605e 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -114,6 +114,7 @@ bool posixtimer_init_sigqueue(struct sigqueue *q); void posixtimer_send_sigqueue(struct k_itimer *tmr); bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq); void posixtimer_free_timer(struct k_itimer *timer); +long posixtimer_create_prctl(unsigned long ctrl); /* Init task static initializer */ #define INIT_CPU_TIMERBASE(b) { \ @@ -140,6 +141,7 @@ static inline void posixtimer_rearm_itimer(struct task_struct *p) { } static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq) { return false; } static inline void posixtimer_free_timer(struct k_itimer *timer) { } +static inline long posixtimer_create_prctl(unsigned long ctrl) { return -EINVAL; } #endif #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 72649d7fce2a..1ef1edbaaf79 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -136,6 +136,7 @@ struct signal_struct { #ifdef CONFIG_POSIX_TIMERS /* POSIX.1b Interval Timers */ + unsigned int timer_create_restore_ids:1; atomic_t next_posix_timer_id; struct hlist_head posix_timers; struct hlist_head ignored_posix_timers; diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 5c6080680cb2..15c18ef4eb11 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -353,4 +353,15 @@ struct prctl_mm_map { */ #define PR_LOCK_SHADOW_STACK_STATUS 76 +/* + * Controls the mode of timer_create() for CRIU restore operations. + * Enabling this allows CRIU to restore timers with explicit IDs. + * + * Don't use for normal operations as the result might be undefined. + */ +#define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index cb366ff8703a..982e1c465f50 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2811,6 +2811,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; error = arch_lock_shadow_stack_status(me, arg2); break; + case PR_TIMER_CREATE_RESTORE_IDS: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = posixtimer_create_prctl(arg2); + break; default: trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); error = -EINVAL; diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index b917a162d84a..2ca1c55d38a2 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -19,6 +19,7 @@ #include <linux/nospec.h> #include <linux/posix-clock.h> #include <linux/posix-timers.h> +#include <linux/prctl.h> #include <linux/sched/task.h> #include <linux/slab.h> #include <linux/syscalls.h> @@ -57,6 +58,8 @@ static const struct k_clock * const posix_clocks[]; static const struct k_clock *clockid_to_kclock(const clockid_t id); static const struct k_clock clock_realtime, clock_monotonic; +#define TIMER_ANY_ID INT_MIN + /* SIGEV_THREAD_ID cannot share a bit with the other SIGEV values. */ #if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \ ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD)) @@ -128,38 +131,60 @@ static bool posix_timer_hashed(struct timer_hash_bucket *bucket, struct signal_s return false; } -static int posix_timer_add(struct k_itimer *timer) +static bool posix_timer_add_at(struct k_itimer *timer, struct signal_struct *sig, unsigned int id) +{ + struct timer_hash_bucket *bucket = hash_bucket(sig, id); + + scoped_guard (spinlock, &bucket->lock) { + /* + * Validate under the lock as this could have raced against + * another thread ending up with the same ID, which is + * highly unlikely, but possible. + */ + if (!posix_timer_hashed(bucket, sig, id)) { + /* + * Set the timer ID and the signal pointer to make + * it identifiable in the hash table. The signal + * pointer has bit 0 set to indicate that it is not + * yet fully initialized. posix_timer_hashed() + * masks this bit out, but the syscall lookup fails + * to match due to it being set. This guarantees + * that there can't be duplicate timer IDs handed + * out. + */ + timer->it_id = (timer_t)id; + timer->it_signal = (struct signal_struct *)((unsigned long)sig | 1UL); + hlist_add_head_rcu(&timer->t_hash, &bucket->head); + return true; + } + } + return false; +} + +static int posix_timer_add(struct k_itimer *timer, int req_id) { struct signal_struct *sig = current->signal; + if (unlikely(req_id != TIMER_ANY_ID)) { + if (!posix_timer_add_at(timer, sig, req_id)) + return -EBUSY; + + /* + * Move the ID counter past the requested ID, so that after + * switching back to normal mode the IDs are outside of the + * exact allocated region. That avoids ID collisions on the + * next regular timer_create() invocations. + */ + atomic_set(&sig->next_posix_timer_id, req_id + 1); + return req_id; + } + for (unsigned int cnt = 0; cnt <= INT_MAX; cnt++) { /* Get the next timer ID and clamp it to positive space */ unsigned int id = atomic_fetch_inc(&sig->next_posix_timer_id) & INT_MAX; - struct timer_hash_bucket *bucket = hash_bucket(sig, id); - scoped_guard (spinlock, &bucket->lock) { - /* - * Validate under the lock as this could have raced - * against another thread ending up with the same - * ID, which is highly unlikely, but possible. - */ - if (!posix_timer_hashed(bucket, sig, id)) { - /* - * Set the timer ID and the signal pointer to make - * it identifiable in the hash table. The signal - * pointer has bit 0 set to indicate that it is not - * yet fully initialized. posix_timer_hashed() - * masks this bit out, but the syscall lookup fails - * to match due to it being set. This guarantees - * that there can't be duplicate timer IDs handed - * out. - */ - timer->it_id = (timer_t)id; - timer->it_signal = (struct signal_struct *)((unsigned long)sig | 1UL); - hlist_add_head_rcu(&timer->t_hash, &bucket->head); - return id; - } - } + if (posix_timer_add_at(timer, sig, id)) + return id; cond_resched(); } /* POSIX return code when no timer ID could be allocated */ @@ -364,6 +389,21 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } +long posixtimer_create_prctl(unsigned long ctrl) +{ + switch (ctrl) { + case PR_TIMER_CREATE_RESTORE_IDS_OFF: + current->signal->timer_create_restore_ids = 0; + return 0; + case PR_TIMER_CREATE_RESTORE_IDS_ON: + current->signal->timer_create_restore_ids = 1; + return 0; + case PR_TIMER_CREATE_RESTORE_IDS_GET: + return current->signal->timer_create_restore_ids; + } + return -EINVAL; +} + static struct pid *good_sigevent(sigevent_t * event) { struct pid *pid = task_tgid(current); @@ -435,6 +475,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, timer_t __user *created_timer_id) { const struct k_clock *kc = clockid_to_kclock(which_clock); + timer_t req_id = TIMER_ANY_ID; struct k_itimer *new_timer; int error, new_timer_id; @@ -449,11 +490,20 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, spin_lock_init(&new_timer->it_lock); + /* Special case for CRIU to restore timers with a given timer ID. */ + if (unlikely(current->signal->timer_create_restore_ids)) { + if (copy_from_user(&req_id, created_timer_id, sizeof(req_id))) + return -EFAULT; + /* Valid IDs are 0..INT_MAX */ + if ((unsigned int)req_id > INT_MAX) + return -EINVAL; + } + /* * Add the timer to the hash table. The timer is not yet valid * after insertion, but has a unique ID allocated. */ - new_timer_id = posix_timer_add(new_timer); + new_timer_id = posix_timer_add(new_timer, req_id); if (new_timer_id < 0) { posixtimer_free_timer(new_timer); return new_timer_id; @@ -1041,6 +1091,9 @@ void exit_itimers(struct task_struct *tsk) struct hlist_node *next; struct k_itimer *timer; + /* Clear restore mode for exec() */ + tsk->signal->timer_create_restore_ids = 0; + if (hlist_empty(&tsk->signal->posix_timers)) return; |