summaryrefslogtreecommitdiff
path: root/kernel/time/timer.c
diff options
context:
space:
mode:
authorJames Morris <james.l.morris@oracle.com>2017-11-29 12:47:41 +1100
committerJames Morris <james.l.morris@oracle.com>2017-11-29 12:47:41 +1100
commitcf40a76e7d5874bb25f4404eecc58a2e033af885 (patch)
tree8fd81cbea03c87b3d41d7ae5b1d11eadd35d6ef5 /kernel/time/timer.c
parentab5348c9c23cd253f5902980d2d8fe067dc24c82 (diff)
parent4fbd8d194f06c8a3fd2af1ce560ddb31f7ec8323 (diff)
Merge tag 'v4.15-rc1' into next-seccomp
Linux 4.15-rc1
Diffstat (limited to 'kernel/time/timer.c')
-rw-r--r--kernel/time/timer.c174
1 files changed, 129 insertions, 45 deletions
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 71ce3f4eead3..ffebcf878fba 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -203,6 +203,7 @@ struct timer_base {
bool migration_enabled;
bool nohz_active;
bool is_idle;
+ bool must_forward_clk;
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
struct hlist_head vectors[WHEEL_SIZE];
} ____cacheline_aligned;
@@ -609,7 +610,7 @@ static bool timer_fixup_init(void *addr, enum debug_obj_state state)
}
/* Stub timer callback for improperly used timers. */
-static void stub_timer(unsigned long data)
+static void stub_timer(struct timer_list *unused)
{
WARN_ON(1);
}
@@ -625,7 +626,7 @@ static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
switch (state) {
case ODEBUG_STATE_NOTAVAILABLE:
- setup_timer(timer, stub_timer, 0);
+ timer_setup(timer, stub_timer, 0);
return true;
case ODEBUG_STATE_ACTIVE:
@@ -664,7 +665,7 @@ static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
switch (state) {
case ODEBUG_STATE_NOTAVAILABLE:
- setup_timer(timer, stub_timer, 0);
+ timer_setup(timer, stub_timer, 0);
return true;
default:
return false;
@@ -706,14 +707,18 @@ static inline void debug_timer_assert_init(struct timer_list *timer)
debug_object_assert_init(timer, &timer_debug_descr);
}
-static void do_init_timer(struct timer_list *timer, unsigned int flags,
+static void do_init_timer(struct timer_list *timer,
+ void (*func)(struct timer_list *),
+ unsigned int flags,
const char *name, struct lock_class_key *key);
-void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags,
+void init_timer_on_stack_key(struct timer_list *timer,
+ void (*func)(struct timer_list *),
+ unsigned int flags,
const char *name, struct lock_class_key *key)
{
debug_object_init_on_stack(timer, &timer_debug_descr);
- do_init_timer(timer, flags, name, key);
+ do_init_timer(timer, func, flags, name, key);
}
EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
@@ -754,10 +759,13 @@ static inline void debug_assert_init(struct timer_list *timer)
debug_timer_assert_init(timer);
}
-static void do_init_timer(struct timer_list *timer, unsigned int flags,
+static void do_init_timer(struct timer_list *timer,
+ void (*func)(struct timer_list *),
+ unsigned int flags,
const char *name, struct lock_class_key *key)
{
timer->entry.pprev = NULL;
+ timer->function = func;
timer->flags = flags | raw_smp_processor_id();
lockdep_init_map(&timer->lockdep_map, name, key, 0);
}
@@ -765,6 +773,7 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags,
/**
* init_timer_key - initialize a timer
* @timer: the timer to be initialized
+ * @func: timer callback function
* @flags: timer flags
* @name: name of the timer
* @key: lockdep class key of the fake lock used for tracking timer
@@ -773,11 +782,12 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags,
* init_timer_key() must be done to a timer prior calling *any* of the
* other timer functions.
*/
-void init_timer_key(struct timer_list *timer, unsigned int flags,
+void init_timer_key(struct timer_list *timer,
+ void (*func)(struct timer_list *), unsigned int flags,
const char *name, struct lock_class_key *key)
{
debug_init(timer);
- do_init_timer(timer, flags, name, key);
+ do_init_timer(timer, func, flags, name, key);
}
EXPORT_SYMBOL(init_timer_key);
@@ -856,13 +866,19 @@ get_target_base(struct timer_base *base, unsigned tflags)
static inline void forward_timer_base(struct timer_base *base)
{
- unsigned long jnow = READ_ONCE(jiffies);
+ unsigned long jnow;
/*
- * We only forward the base when it's idle and we have a delta between
- * base clock and jiffies.
+ * We only forward the base when we are idle or have just come out of
+ * idle (must_forward_clk logic), and have a delta between base clock
+ * and jiffies. In the common case, run_timers will take care of it.
*/
- if (!base->is_idle || (long) (jnow - base->clk) < 2)
+ if (likely(!base->must_forward_clk))
+ return;
+
+ jnow = READ_ONCE(jiffies);
+ base->must_forward_clk = base->is_idle;
+ if ((long)(jnow - base->clk) < 2)
return;
/*
@@ -922,8 +938,11 @@ static struct timer_base *lock_timer_base(struct timer_list *timer,
}
}
+#define MOD_TIMER_PENDING_ONLY 0x01
+#define MOD_TIMER_REDUCE 0x02
+
static inline int
-__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
+__mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options)
{
struct timer_base *base, *new_base;
unsigned int idx = UINT_MAX;
@@ -938,7 +957,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* same array bucket then just return:
*/
if (timer_pending(timer)) {
- if (timer->expires == expires)
+ /*
+ * The downside of this optimization is that it can result in
+ * larger granularity than you would get from adding a new
+ * timer with this expiry.
+ */
+ long diff = timer->expires - expires;
+
+ if (!diff)
+ return 1;
+ if (options & MOD_TIMER_REDUCE && diff <= 0)
return 1;
/*
@@ -948,6 +976,13 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* dequeue/enqueue dance.
*/
base = lock_timer_base(timer, &flags);
+ forward_timer_base(base);
+
+ if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) &&
+ time_before_eq(timer->expires, expires)) {
+ ret = 1;
+ goto out_unlock;
+ }
clk = base->clk;
idx = calc_wheel_index(expires, clk);
@@ -958,16 +993,20 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* subsequent call will exit in the expires check above.
*/
if (idx == timer_get_idx(timer)) {
- timer->expires = expires;
+ if (!(options & MOD_TIMER_REDUCE))
+ timer->expires = expires;
+ else if (time_after(timer->expires, expires))
+ timer->expires = expires;
ret = 1;
goto out_unlock;
}
} else {
base = lock_timer_base(timer, &flags);
+ forward_timer_base(base);
}
ret = detach_if_pending(timer, base, false);
- if (!ret && pending_only)
+ if (!ret && (options & MOD_TIMER_PENDING_ONLY))
goto out_unlock;
debug_activate(timer, expires);
@@ -991,12 +1030,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
raw_spin_lock(&base->lock);
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | base->cpu);
+ forward_timer_base(base);
}
}
- /* Try to forward a stale timer base clock */
- forward_timer_base(base);
-
timer->expires = expires;
/*
* If 'idx' was calculated above and the base time did not advance
@@ -1030,7 +1067,7 @@ out_unlock:
*/
int mod_timer_pending(struct timer_list *timer, unsigned long expires)
{
- return __mod_timer(timer, expires, true);
+ return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY);
}
EXPORT_SYMBOL(mod_timer_pending);
@@ -1056,20 +1093,35 @@ EXPORT_SYMBOL(mod_timer_pending);
*/
int mod_timer(struct timer_list *timer, unsigned long expires)
{
- return __mod_timer(timer, expires, false);
+ return __mod_timer(timer, expires, 0);
}
EXPORT_SYMBOL(mod_timer);
/**
+ * timer_reduce - Modify a timer's timeout if it would reduce the timeout
+ * @timer: The timer to be modified
+ * @expires: New timeout in jiffies
+ *
+ * timer_reduce() is very similar to mod_timer(), except that it will only
+ * modify a running timer if that would reduce the expiration time (it will
+ * start a timer that isn't running).
+ */
+int timer_reduce(struct timer_list *timer, unsigned long expires)
+{
+ return __mod_timer(timer, expires, MOD_TIMER_REDUCE);
+}
+EXPORT_SYMBOL(timer_reduce);
+
+/**
* add_timer - start a timer
* @timer: the timer to be added
*
- * The kernel will do a ->function(->data) callback from the
+ * The kernel will do a ->function(@timer) callback from the
* timer interrupt at the ->expires point in the future. The
* current time is 'jiffies'.
*
- * The timer's ->expires, ->function (and if the handler uses it, ->data)
- * fields must be set prior calling this function.
+ * The timer's ->expires, ->function fields must be set prior calling this
+ * function.
*
* Timers with an ->expires field in the past will be executed in the next
* timer tick.
@@ -1112,6 +1164,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | cpu);
}
+ forward_timer_base(base);
debug_activate(timer, timer->expires);
internal_add_timer(base, timer);
@@ -1240,8 +1293,7 @@ int del_timer_sync(struct timer_list *timer)
EXPORT_SYMBOL(del_timer_sync);
#endif
-static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
- unsigned long data)
+static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *))
{
int count = preempt_count();
@@ -1265,7 +1317,7 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
lock_map_acquire(&lockdep_map);
trace_timer_expire_entry(timer);
- fn(data);
+ fn(timer);
trace_timer_expire_exit(timer);
lock_map_release(&lockdep_map);
@@ -1287,8 +1339,7 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
{
while (!hlist_empty(head)) {
struct timer_list *timer;
- void (*fn)(unsigned long);
- unsigned long data;
+ void (*fn)(struct timer_list *);
timer = hlist_entry(head->first, struct timer_list, entry);
@@ -1296,15 +1347,14 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
detach_timer(timer, true);
fn = timer->function;
- data = timer->data;
if (timer->flags & TIMER_IRQSAFE) {
raw_spin_unlock(&base->lock);
- call_timer_fn(timer, fn, data);
+ call_timer_fn(timer, fn);
raw_spin_lock(&base->lock);
} else {
raw_spin_unlock_irq(&base->lock);
- call_timer_fn(timer, fn, data);
+ call_timer_fn(timer, fn);
raw_spin_lock_irq(&base->lock);
}
}
@@ -1495,12 +1545,18 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
base->is_idle = false;
} else {
if (!is_max_delta)
- expires = basem + (nextevt - basej) * TICK_NSEC;
+ expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
/*
- * If we expect to sleep more than a tick, mark the base idle:
+ * If we expect to sleep more than a tick, mark the base idle.
+ * Also the tick is stopped so any added timer must forward
+ * the base clk itself to keep granularity small. This idle
+ * logic is only maintained for the BASE_STD base, deferrable
+ * timers may still see large granularity skew (by design).
*/
- if ((expires - basem) > TICK_NSEC)
+ if ((expires - basem) > TICK_NSEC) {
+ base->must_forward_clk = true;
base->is_idle = true;
+ }
}
raw_spin_unlock(&base->lock);
@@ -1541,8 +1597,11 @@ static int collect_expired_timers(struct timer_base *base,
* jiffies, otherwise forward to the next expiry time:
*/
if (time_after(next, jiffies)) {
- /* The call site will increment clock! */
- base->clk = jiffies - 1;
+ /*
+ * The call site will increment base->clk and then
+ * terminate the expiry loop immediately.
+ */
+ base->clk = jiffies;
return 0;
}
base->clk = next;
@@ -1611,6 +1670,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
{
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+ /*
+ * must_forward_clk must be cleared before running timers so that any
+ * timer functions that call mod_timer will not try to forward the
+ * base. idle trcking / clock forwarding logic is only used with
+ * BASE_STD timers.
+ *
+ * The deferrable base does not do idle tracking at all, so we do
+ * not forward it. This can result in very large variations in
+ * granularity for deferrable timers, but they can be deferred for
+ * long periods due to idle.
+ */
+ base->must_forward_clk = false;
+
__run_timers(base);
if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
@@ -1636,9 +1708,20 @@ void run_local_timers(void)
raise_softirq(TIMER_SOFTIRQ);
}
-static void process_timeout(unsigned long __data)
+/*
+ * Since schedule_timeout()'s timer is defined on the stack, it must store
+ * the target task on the stack as well.
+ */
+struct process_timer {
+ struct timer_list timer;
+ struct task_struct *task;
+};
+
+static void process_timeout(struct timer_list *t)
{
- wake_up_process((struct task_struct *)__data);
+ struct process_timer *timeout = from_timer(timeout, t, timer);
+
+ wake_up_process(timeout->task);
}
/**
@@ -1672,7 +1755,7 @@ static void process_timeout(unsigned long __data)
*/
signed long __sched schedule_timeout(signed long timeout)
{
- struct timer_list timer;
+ struct process_timer timer;
unsigned long expire;
switch (timeout)
@@ -1706,13 +1789,14 @@ signed long __sched schedule_timeout(signed long timeout)
expire = timeout + jiffies;
- setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
- __mod_timer(&timer, expire, false);
+ timer.task = current;
+ timer_setup_on_stack(&timer.timer, process_timeout, 0);
+ __mod_timer(&timer.timer, expire, 0);
schedule();
- del_singleshot_timer_sync(&timer);
+ del_singleshot_timer_sync(&timer.timer);
/* Remove the timer from the object tracker */
- destroy_timer_on_stack(&timer);
+ destroy_timer_on_stack(&timer.timer);
timeout = expire - jiffies;