From b74494872555d1f7888dfd9225700a363f4a84fc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Jul 2019 20:30:49 +0200 Subject: hrtimer: Remove task argument from hrtimer_init_sleeper() All callers hand in 'current' and that's the only task pointer which actually makes sense. Remove the task argument and set current in the function. Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt (VMware) Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190726185752.791885290@linutronix.de --- include/linux/hrtimer.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux/hrtimer.h') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 4971100a8cab..3c74f89367c4 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -463,8 +463,7 @@ extern long hrtimer_nanosleep(const struct timespec64 *rqtp, const enum hrtimer_mode mode, const clockid_t clockid); -extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, - struct task_struct *tsk); +extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl); extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, const enum hrtimer_mode mode); -- cgit v1.2.3 From dbc1625fc9deefb352f6ff26a575ae4b3ddef23a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 26 Jul 2019 20:30:50 +0200 Subject: hrtimer: Consolidate hrtimer_init() + hrtimer_init_sleeper() calls hrtimer_init_sleeper() calls require prior initialisation of the hrtimer object which is embedded into the hrtimer_sleeper. Combine the initialization and spare a function call. Fixup all call sites. This is also a preparatory change for PREEMPT_RT to do hrtimer sleeper specific initializations of the embedded hrtimer without modifying any of the call sites. No functional change. [ anna-maria: Minor cleanups ] [ tglx: Adopted to the removal of the task argument of hrtimer_init_sleeper() and trivial polishing. Folded a fix from Stephen Rothwell for the vsoc code ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Anna-Maria Gleixner Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190726185752.887468908@linutronix.de --- block/blk-mq.c | 3 +-- drivers/staging/android/vsoc.c | 6 ++---- include/linux/hrtimer.h | 17 ++++++++++++++--- include/linux/wait.h | 4 ++-- kernel/futex.c | 8 +++----- kernel/time/hrtimer.c | 43 +++++++++++++++++++++++++++++++----------- net/core/pktgen.c | 4 +--- 7 files changed, 55 insertions(+), 30 deletions(-) (limited to 'include/linux/hrtimer.h') diff --git a/block/blk-mq.c b/block/blk-mq.c index 5f647cb8c695..df3fafbfe9a9 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3415,10 +3415,9 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, kt = nsecs; mode = HRTIMER_MODE_REL; - hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode); + hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode); hrtimer_set_expires(&hs.timer, kt); - hrtimer_init_sleeper(&hs); do { if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE) break; diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c index ce480bcf20d2..2d6b3981afb8 100644 --- a/drivers/staging/android/vsoc.c +++ b/drivers/staging/android/vsoc.c @@ -437,12 +437,10 @@ static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) return -EINVAL; wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec); - hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); + hrtimer_init_sleeper_on_stack(to, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS); hrtimer_set_expires_range_ns(&to->timer, wake_time, current->timer_slack_ns); - - hrtimer_init_sleeper(to); } while (1) { diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 3c74f89367c4..0df373bed3d7 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -347,10 +347,15 @@ DECLARE_PER_CPU(struct tick_device, tick_cpu_device); /* Initialize timers: */ extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); +extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, + enum hrtimer_mode mode); #ifdef CONFIG_DEBUG_OBJECTS_TIMERS extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); +extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, + clockid_t clock_id, + enum hrtimer_mode mode); extern void destroy_hrtimer_on_stack(struct hrtimer *timer); #else @@ -360,6 +365,14 @@ static inline void hrtimer_init_on_stack(struct hrtimer *timer, { hrtimer_init(timer, which_clock, mode); } + +static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, + clockid_t clock_id, + enum hrtimer_mode mode) +{ + hrtimer_init_sleeper(sl, clock_id, mode); +} + static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } #endif @@ -463,10 +476,8 @@ extern long hrtimer_nanosleep(const struct timespec64 *rqtp, const enum hrtimer_mode mode, const clockid_t clockid); -extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl); - extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, - const enum hrtimer_mode mode); + const enum hrtimer_mode mode); extern int schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, const enum hrtimer_mode mode, diff --git a/include/linux/wait.h b/include/linux/wait.h index d57832774ca6..4707543ef575 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -488,8 +488,8 @@ do { \ int __ret = 0; \ struct hrtimer_sleeper __t; \ \ - hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); \ - hrtimer_init_sleeper(&__t); \ + hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \ + HRTIMER_MODE_REL); \ if ((timeout) != KTIME_MAX) \ hrtimer_start_range_ns(&__t.timer, timeout, \ current->timer_slack_ns, \ diff --git a/kernel/futex.c b/kernel/futex.c index 5e9842ea4012..c8561aa5338e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -487,11 +487,9 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, if (!time) return NULL; - hrtimer_init_on_stack(&timeout->timer, (flags & FLAGS_CLOCKRT) ? - CLOCK_REALTIME : CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); - hrtimer_init_sleeper(timeout); - + hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ? + CLOCK_REALTIME : CLOCK_MONOTONIC, + HRTIMER_MODE_ABS); /* * If range_ns is 0, calling hrtimer_set_expires_range_ns() is * effectively the same as calling hrtimer_set_expires(). diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index de895d86800c..bb55d62f631e 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -427,6 +427,17 @@ void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id, } EXPORT_SYMBOL_GPL(hrtimer_init_on_stack); +static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, + clockid_t clock_id, enum hrtimer_mode mode); + +void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, + clockid_t clock_id, enum hrtimer_mode mode) +{ + debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr); + __hrtimer_init_sleeper(sl, clock_id, mode); +} +EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack); + void destroy_hrtimer_on_stack(struct hrtimer *timer) { debug_object_free(timer, &hrtimer_debug_descr); @@ -1639,11 +1650,27 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) return HRTIMER_NORESTART; } -void hrtimer_init_sleeper(struct hrtimer_sleeper *sl) +static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, + clockid_t clock_id, enum hrtimer_mode mode) { + __hrtimer_init(&sl->timer, clock_id, mode); sl->timer.function = hrtimer_wakeup; sl->task = current; } + +/** + * hrtimer_init_sleeper - initialize sleeper to the given clock + * @sl: sleeper to be initialized + * @clock_id: the clock to be used + * @mode: timer mode abs/rel + */ +void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, + enum hrtimer_mode mode) +{ + debug_init(&sl->timer, clock_id, mode); + __hrtimer_init_sleeper(sl, clock_id, mode); + +} EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) @@ -1669,8 +1696,6 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod { struct restart_block *restart; - hrtimer_init_sleeper(t); - do { set_current_state(TASK_INTERRUPTIBLE); hrtimer_start_expires(&t->timer, mode); @@ -1707,10 +1732,9 @@ static long __sched hrtimer_nanosleep_restart(struct restart_block *restart) struct hrtimer_sleeper t; int ret; - hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, - HRTIMER_MODE_ABS); + hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid, + HRTIMER_MODE_ABS); hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); - ret = do_nanosleep(&t, HRTIMER_MODE_ABS); destroy_hrtimer_on_stack(&t.timer); return ret; @@ -1728,7 +1752,7 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp, if (dl_task(current) || rt_task(current)) slack = 0; - hrtimer_init_on_stack(&t.timer, clockid, mode); + hrtimer_init_sleeper_on_stack(&t, clockid, mode); hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); ret = do_nanosleep(&t, mode); if (ret != -ERESTART_RESTARTBLOCK) @@ -1927,11 +1951,8 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, return -EINTR; } - hrtimer_init_on_stack(&t.timer, clock_id, mode); + hrtimer_init_sleeper_on_stack(&t, clock_id, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); - - hrtimer_init_sleeper(&t); - hrtimer_start_expires(&t.timer, mode); if (likely(t.task)) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 7f3cf2381f27..a5905975bc12 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2156,7 +2156,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) s64 remaining; struct hrtimer_sleeper t; - hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_set_expires(&t.timer, spin_until); remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer)); @@ -2170,8 +2170,6 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) end_time = ktime_get(); } while (ktime_compare(end_time, spin_until) < 0); } else { - /* see do_nanosleep */ - hrtimer_init_sleeper(&t); do { set_current_state(TASK_INTERRUPTIBLE); hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); -- cgit v1.2.3 From 01656464fce946f70b02a84ab218e562ceb1662e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 Jul 2019 21:03:53 +0200 Subject: hrtimer: Provide hrtimer_sleeper_start_expires() hrtimer_sleepers will gain a scheduling class dependent treatment on PREEMPT_RT. Create a wrapper around hrtimer_start_expires() to make that possible. Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 3 +++ kernel/time/hrtimer.c | 19 +++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux/hrtimer.h') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 0df373bed3d7..24072a0942c0 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -408,6 +408,9 @@ static inline void hrtimer_start_expires(struct hrtimer *timer, hrtimer_start_range_ns(timer, soft, delta, mode); } +void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, + enum hrtimer_mode mode); + static inline void hrtimer_restart(struct hrtimer *timer) { hrtimer_start_expires(timer, HRTIMER_MODE_ABS); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index bb55d62f631e..dab1ea1a99d0 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1650,6 +1650,21 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) return HRTIMER_NORESTART; } +/** + * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer + * @sl: sleeper to be started + * @mode: timer mode abs/rel + * + * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers + * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context) + */ +void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, + enum hrtimer_mode mode) +{ + hrtimer_start_expires(&sl->timer, mode); +} +EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires); + static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, enum hrtimer_mode mode) { @@ -1698,7 +1713,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod do { set_current_state(TASK_INTERRUPTIBLE); - hrtimer_start_expires(&t->timer, mode); + hrtimer_sleeper_start_expires(t, mode); if (likely(t->task)) freezable_schedule(); @@ -1953,7 +1968,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, hrtimer_init_sleeper_on_stack(&t, clock_id, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); - hrtimer_start_expires(&t.timer, mode); + hrtimer_sleeper_start_expires(&t, mode); if (likely(t.task)) schedule(); -- cgit v1.2.3 From ae6683d815895c2be1e60e1942630fa99488055b Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 26 Jul 2019 20:30:51 +0200 Subject: hrtimer: Introduce HARD expiry mode On PREEMPT_RT not all hrtimers can be expired in hard interrupt context even if that is perfectly fine on a PREEMPT_RT=n kernel, e.g. because they take regular spinlocks. Also for latency reasons PREEMPT_RT tries to defer most hrtimers' expiry into soft interrupt context. But there are hrtimers which must be expired in hard interrupt context even when PREEMPT_RT is enabled: - hrtimers which must expiry in hard interrupt context, e.g. scheduler, perf, watchdog related hrtimers - latency critical hrtimers, e.g. nanosleep, ..., kvm lapic timer Add a new mode flag HRTIMER_MODE_HARD which allows to mark these timers so PREEMPT_RT will not move them into softirq expiry mode. [ tglx: Split out of a larger combo patch. Added changelog ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190726185752.981398465@linutronix.de --- include/linux/hrtimer.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/hrtimer.h') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 24072a0942c0..15c2ba6b6316 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -38,6 +38,7 @@ enum hrtimer_mode { HRTIMER_MODE_REL = 0x01, HRTIMER_MODE_PINNED = 0x02, HRTIMER_MODE_SOFT = 0x04, + HRTIMER_MODE_HARD = 0x08, HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, @@ -48,6 +49,11 @@ enum hrtimer_mode { HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, + HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD, + HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD, + + HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD, + HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD, }; /* -- cgit v1.2.3 From 0ab6a3ddbad40ef5b6b8c2353fd53fa4ecf9c479 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 30 Jul 2019 20:15:25 +0200 Subject: hrtimer: Make enqueue mode check work on RT hrtimer_start_range_ns() has a WARN_ONCE() which verifies that a timer which is marker for softirq expiry is not queued in the hard interrupt base and vice versa. When PREEMPT_RT is enabled, timers which are not explicitely marked to expire in hard interrupt context are deferrred to the soft interrupt. So the regular check would trigger. Change the check, so when PREEMPT_RT is enabled, it is verified that the timers marked for hard interrupt expiry are not tried to be queued for soft interrupt expiry or any of the unmarked and softirq marked is tried to be expired in hard interrupt context. Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 3 +++ kernel/time/hrtimer.c | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux/hrtimer.h') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 15c2ba6b6316..7d0d0a36a8f4 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -107,6 +107,8 @@ enum hrtimer_restart { * @state: state information (See bit values above) * @is_rel: Set if the timer was armed relative * @is_soft: Set if hrtimer will be expired in soft interrupt context. + * @is_hard: Set if hrtimer will be expired in hard interrupt context + * even on RT. * * The hrtimer structure must be initialized by hrtimer_init() */ @@ -118,6 +120,7 @@ struct hrtimer { u8 state; u8 is_rel; u8 is_soft; + u8 is_hard; }; /** diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index dab1ea1a99d0..0ace301a56f4 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1107,9 +1107,13 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, /* * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft - * match. + * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard + * expiry mode because unmarked timers are moved to softirq expiry. */ - WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); + else + WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard); base = lock_hrtimer_base(timer, &flags); @@ -1288,6 +1292,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, base += hrtimer_clockid_to_base(clock_id); timer->is_soft = softtimer; + timer->is_hard = !softtimer; timer->base = &cpu_base->clock_base[base]; timerqueue_init(&timer->node); } -- cgit v1.2.3 From f61eff83cec9cfab31fd30a2ca8856be379cdcd5 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Fri, 26 Jul 2019 20:30:59 +0200 Subject: hrtimer: Prepare support for PREEMPT_RT When PREEMPT_RT is enabled, the soft interrupt thread can be preempted. If the soft interrupt thread is preempted in the middle of a timer callback, then calling hrtimer_cancel() can lead to two issues: - If the caller is on a remote CPU then it has to spin wait for the timer handler to complete. This can result in unbound priority inversion. - If the caller originates from the task which preempted the timer handler on the same CPU, then spin waiting for the timer handler to complete is never going to end. To avoid these issues, add a new lock to the timer base which is held around the execution of the timer callbacks. If hrtimer_cancel() detects that the timer callback is currently running, it blocks on the expiry lock. When the callback is finished, the expiry lock is dropped by the softirq thread which wakes up the waiter and the system makes progress. This addresses both the priority inversion and the life lock issues. The same issue can happen in virtual machines when the vCPU which runs a timer callback is scheduled out. If a second vCPU of the same guest calls hrtimer_cancel() it will spin wait for the other vCPU to be scheduled back in. The expiry lock mechanism would avoid that. It'd be trivial to enable this when paravirt spinlocks are enabled in a guest, but it's not clear whether this is an actual problem in the wild, so for now it's an RT only mechanism. [ tglx: Refactored it for mainline ] Signed-off-by: Anna-Maria Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190726185753.737767218@linutronix.de --- include/linux/hrtimer.h | 16 +++++++++ kernel/time/hrtimer.c | 95 +++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 105 insertions(+), 6 deletions(-) (limited to 'include/linux/hrtimer.h') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 7d0d0a36a8f4..5df4bcff96d5 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -192,6 +192,10 @@ enum hrtimer_base_type { * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are + * expired + * @timer_waiters: A hrtimer_cancel() invocation waits for the timer + * callback to finish. * @expires_next: absolute time of the next event, is required for remote * hrtimer enqueue; it is the total first expiry time (hard * and soft hrtimer are taken into account) @@ -218,6 +222,10 @@ struct hrtimer_cpu_base { unsigned short nr_retries; unsigned short nr_hangs; unsigned int max_hang_time; +#endif +#ifdef CONFIG_PREEMPT_RT + spinlock_t softirq_expiry_lock; + atomic_t timer_waiters; #endif ktime_t expires_next; struct hrtimer *next_timer; @@ -350,6 +358,14 @@ extern void hrtimers_resume(void); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); +#ifdef CONFIG_PREEMPT_RT +void hrtimer_cancel_wait_running(const struct hrtimer *timer); +#else +static inline void hrtimer_cancel_wait_running(struct hrtimer *timer) +{ + cpu_relax(); +} +#endif /* Exported timer functions: */ diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index c101f88ae8aa..499122752649 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1162,6 +1162,82 @@ int hrtimer_try_to_cancel(struct hrtimer *timer) } EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); +#ifdef CONFIG_PREEMPT_RT +static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) +{ + spin_lock_init(&base->softirq_expiry_lock); +} + +static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) +{ + spin_lock(&base->softirq_expiry_lock); +} + +static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) +{ + spin_unlock(&base->softirq_expiry_lock); +} + +/* + * The counterpart to hrtimer_cancel_wait_running(). + * + * If there is a waiter for cpu_base->expiry_lock, then it was waiting for + * the timer callback to finish. Drop expiry_lock and reaquire it. That + * allows the waiter to acquire the lock and make progress. + */ +static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, + unsigned long flags) +{ + if (atomic_read(&cpu_base->timer_waiters)) { + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + spin_unlock(&cpu_base->softirq_expiry_lock); + spin_lock(&cpu_base->softirq_expiry_lock); + raw_spin_lock_irq(&cpu_base->lock); + } +} + +/* + * This function is called on PREEMPT_RT kernels when the fast path + * deletion of a timer failed because the timer callback function was + * running. + * + * This prevents priority inversion, if the softirq thread on a remote CPU + * got preempted, and it prevents a life lock when the task which tries to + * delete a timer preempted the softirq thread running the timer callback + * function. + */ +void hrtimer_cancel_wait_running(const struct hrtimer *timer) +{ + struct hrtimer_clock_base *base = timer->base; + + if (!timer->is_soft || !base || !base->cpu_base) { + cpu_relax(); + return; + } + + /* + * Mark the base as contended and grab the expiry lock, which is + * held by the softirq across the timer callback. Drop the lock + * immediately so the softirq can expire the next timer. In theory + * the timer could already be running again, but that's more than + * unlikely and just causes another wait loop. + */ + atomic_inc(&base->cpu_base->timer_waiters); + spin_lock_bh(&base->cpu_base->softirq_expiry_lock); + atomic_dec(&base->cpu_base->timer_waiters); + spin_unlock_bh(&base->cpu_base->softirq_expiry_lock); +} +#else +static inline void +hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { } +static inline void +hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { } +static inline void +hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { } +static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base, + unsigned long flags) { } +#endif + /** * hrtimer_cancel - cancel a timer and wait for the handler to finish. * @timer: the timer to be cancelled @@ -1172,13 +1248,15 @@ EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); */ int hrtimer_cancel(struct hrtimer *timer) { - for (;;) { - int ret = hrtimer_try_to_cancel(timer); + int ret; - if (ret >= 0) - return ret; - cpu_relax(); - } + do { + ret = hrtimer_try_to_cancel(timer); + + if (ret < 0) + hrtimer_cancel_wait_running(timer); + } while (ret < 0); + return ret; } EXPORT_SYMBOL_GPL(hrtimer_cancel); @@ -1475,6 +1553,8 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, break; __run_hrtimer(cpu_base, base, timer, &basenow, flags); + if (active_mask == HRTIMER_ACTIVE_SOFT) + hrtimer_sync_wait_running(cpu_base, flags); } } } @@ -1485,6 +1565,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) unsigned long flags; ktime_t now; + hrtimer_cpu_base_lock_expiry(cpu_base); raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); @@ -1494,6 +1575,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) hrtimer_update_softirq_timer(cpu_base, true); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + hrtimer_cpu_base_unlock_expiry(cpu_base); } #ifdef CONFIG_HIGH_RES_TIMERS @@ -1897,6 +1979,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->softirq_next_timer = NULL; cpu_base->expires_next = KTIME_MAX; cpu_base->softirq_expires_next = KTIME_MAX; + hrtimer_cpu_base_init_expiry_lock(cpu_base); return 0; } -- cgit v1.2.3 From a67e408241783575716fcf3f79d0878f6cef0273 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 23 Aug 2019 13:38:44 +0200 Subject: hrtimer: Add kernel doc annotation for HRTIMER_MODE_HARD Add kernel doc annotation for HRTIMER_MODE_HARD. Fixes: ae6683d815895 ("hrtimer: Introduce HARD expiry mode") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190823113845.12125-2-bigeasy@linutronix.de --- include/linux/hrtimer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/hrtimer.h') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5df4bcff96d5..1b9a51a1bccb 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -32,6 +32,8 @@ struct hrtimer_cpu_base; * when starting the timer) * HRTIMER_MODE_SOFT - Timer callback function will be executed in * soft irq context + * HRTIMER_MODE_HARD - Timer callback function will be executed in + * hard irq context even on PREEMPT_RT. */ enum hrtimer_mode { HRTIMER_MODE_ABS = 0x00, -- cgit v1.2.3