summaryrefslogtreecommitdiff
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/alarmtimer.c3
-rw-r--r--kernel/time/posix-cpu-timers.c81
-rw-r--r--kernel/time/posix-timers.c4
-rw-r--r--kernel/time/tick-broadcast.c120
-rw-r--r--kernel/time/tick-common.c12
-rw-r--r--kernel/time/tick-sched.c151
-rw-r--r--kernel/time/tick-sched.h67
-rw-r--r--kernel/time/timekeeping.c4
8 files changed, 291 insertions, 151 deletions
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 7e5dff602585..82b28ab0f328 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -81,8 +81,7 @@ struct rtc_device *alarmtimer_get_rtcdev(void)
}
EXPORT_SYMBOL_GPL(alarmtimer_get_rtcdev);
-static int alarmtimer_rtc_add_device(struct device *dev,
- struct class_interface *class_intf)
+static int alarmtimer_rtc_add_device(struct device *dev)
{
unsigned long flags;
struct rtc_device *rtc = to_rtc_device(dev);
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 2f5e9b34022c..e9c6f9d0e42c 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -846,6 +846,8 @@ static u64 collect_timerqueue(struct timerqueue_head *head,
return expires;
ctmr->firing = 1;
+ /* See posix_cpu_timer_wait_running() */
+ rcu_assign_pointer(ctmr->handling, current);
cpu_timer_dequeue(ctmr);
list_add_tail(&ctmr->elist, firing);
}
@@ -1161,7 +1163,49 @@ static void handle_posix_cpu_timers(struct task_struct *tsk);
#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
static void posix_cpu_timers_work(struct callback_head *work)
{
+ struct posix_cputimers_work *cw = container_of(work, typeof(*cw), work);
+
+ mutex_lock(&cw->mutex);
handle_posix_cpu_timers(current);
+ mutex_unlock(&cw->mutex);
+}
+
+/*
+ * Invoked from the posix-timer core when a cancel operation failed because
+ * the timer is marked firing. The caller holds rcu_read_lock(), which
+ * protects the timer and the task which is expiring it from being freed.
+ */
+static void posix_cpu_timer_wait_running(struct k_itimer *timr)
+{
+ struct task_struct *tsk = rcu_dereference(timr->it.cpu.handling);
+
+ /* Has the handling task completed expiry already? */
+ if (!tsk)
+ return;
+
+ /* Ensure that the task cannot go away */
+ get_task_struct(tsk);
+ /* Now drop the RCU protection so the mutex can be locked */
+ rcu_read_unlock();
+ /* Wait on the expiry mutex */
+ mutex_lock(&tsk->posix_cputimers_work.mutex);
+ /* Release it immediately again. */
+ mutex_unlock(&tsk->posix_cputimers_work.mutex);
+ /* Drop the task reference. */
+ put_task_struct(tsk);
+ /* Relock RCU so the callsite is balanced */
+ rcu_read_lock();
+}
+
+static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr)
+{
+ /* Ensure that timr->it.cpu.handling task cannot go away */
+ rcu_read_lock();
+ spin_unlock_irq(&timr->it_lock);
+ posix_cpu_timer_wait_running(timr);
+ rcu_read_unlock();
+ /* @timr is on stack and is valid */
+ spin_lock_irq(&timr->it_lock);
}
/*
@@ -1177,6 +1221,7 @@ void clear_posix_cputimers_work(struct task_struct *p)
sizeof(p->posix_cputimers_work.work));
init_task_work(&p->posix_cputimers_work.work,
posix_cpu_timers_work);
+ mutex_init(&p->posix_cputimers_work.mutex);
p->posix_cputimers_work.scheduled = false;
}
@@ -1255,6 +1300,18 @@ static inline void __run_posix_cpu_timers(struct task_struct *tsk)
lockdep_posixtimer_exit();
}
+static void posix_cpu_timer_wait_running(struct k_itimer *timr)
+{
+ cpu_relax();
+}
+
+static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr)
+{
+ spin_unlock_irq(&timr->it_lock);
+ cpu_relax();
+ spin_lock_irq(&timr->it_lock);
+}
+
static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
{
return false;
@@ -1363,6 +1420,8 @@ static void handle_posix_cpu_timers(struct task_struct *tsk)
*/
if (likely(cpu_firing >= 0))
cpu_timer_fire(timer);
+ /* See posix_cpu_timer_wait_running() */
+ rcu_assign_pointer(timer->it.cpu.handling, NULL);
spin_unlock(&timer->it_lock);
}
}
@@ -1497,23 +1556,16 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
expires = cpu_timer_getexpires(&timer.it.cpu);
error = posix_cpu_timer_set(&timer, 0, &zero_it, &it);
if (!error) {
- /*
- * Timer is now unarmed, deletion can not fail.
- */
+ /* Timer is now unarmed, deletion can not fail. */
posix_cpu_timer_del(&timer);
+ } else {
+ while (error == TIMER_RETRY) {
+ posix_cpu_timer_wait_running_nsleep(&timer);
+ error = posix_cpu_timer_del(&timer);
+ }
}
- spin_unlock_irq(&timer.it_lock);
- while (error == TIMER_RETRY) {
- /*
- * We need to handle case when timer was or is in the
- * middle of firing. In other cases we already freed
- * resources.
- */
- spin_lock_irq(&timer.it_lock);
- error = posix_cpu_timer_del(&timer);
- spin_unlock_irq(&timer.it_lock);
- }
+ spin_unlock_irq(&timer.it_lock);
if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) {
/*
@@ -1623,6 +1675,7 @@ const struct k_clock clock_posix_cpu = {
.timer_del = posix_cpu_timer_del,
.timer_get = posix_cpu_timer_get,
.timer_rearm = posix_cpu_timer_rearm,
+ .timer_wait_running = posix_cpu_timer_wait_running,
};
const struct k_clock clock_process = {
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 0c8a87a11b39..808a247205a9 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -846,6 +846,10 @@ static struct k_itimer *timer_wait_running(struct k_itimer *timer,
rcu_read_lock();
unlock_timer(timer, *flags);
+ /*
+ * kc->timer_wait_running() might drop RCU lock. So @timer
+ * cannot be touched anymore after the function returns!
+ */
if (!WARN_ON_ONCE(!kc->timer_wait_running))
kc->timer_wait_running(timer);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 93bf2b4e47e5..771d1e040303 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -35,14 +35,15 @@ static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
#ifdef CONFIG_TICK_ONESHOT
static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device);
-static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
+static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic);
static void tick_broadcast_clear_oneshot(int cpu);
static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
# ifdef CONFIG_HOTPLUG_CPU
static void tick_broadcast_oneshot_offline(unsigned int cpu);
# endif
#else
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
+static inline void
+tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic) { BUG(); }
static inline void tick_broadcast_clear_oneshot(int cpu) { }
static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
# ifdef CONFIG_HOTPLUG_CPU
@@ -264,7 +265,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
tick_broadcast_start_periodic(bc);
else
- tick_broadcast_setup_oneshot(bc);
+ tick_broadcast_setup_oneshot(bc, false);
ret = 1;
} else {
/*
@@ -500,7 +501,7 @@ void tick_broadcast_control(enum tick_broadcast_mode mode)
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
tick_broadcast_start_periodic(bc);
else
- tick_broadcast_setup_oneshot(bc);
+ tick_broadcast_setup_oneshot(bc, false);
}
}
out:
@@ -1020,48 +1021,101 @@ static inline ktime_t tick_get_next_period(void)
/**
* tick_broadcast_setup_oneshot - setup the broadcast device
*/
-static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
+static void tick_broadcast_setup_oneshot(struct clock_event_device *bc,
+ bool from_periodic)
{
int cpu = smp_processor_id();
+ ktime_t nexttick = 0;
if (!bc)
return;
- /* Set it up only once ! */
- if (bc->event_handler != tick_handle_oneshot_broadcast) {
- int was_periodic = clockevent_state_periodic(bc);
-
- bc->event_handler = tick_handle_oneshot_broadcast;
-
+ /*
+ * When the broadcast device was switched to oneshot by the first
+ * CPU handling the NOHZ change, the other CPUs will reach this
+ * code via hrtimer_run_queues() -> tick_check_oneshot_change()
+ * too. Set up the broadcast device only once!
+ */
+ if (bc->event_handler == tick_handle_oneshot_broadcast) {
/*
- * We must be careful here. There might be other CPUs
- * waiting for periodic broadcast. We need to set the
- * oneshot_mask bits for those and program the
- * broadcast device to fire.
+ * The CPU which switched from periodic to oneshot mode
+ * set the broadcast oneshot bit for all other CPUs which
+ * are in the general (periodic) broadcast mask to ensure
+ * that CPUs which wait for the periodic broadcast are
+ * woken up.
+ *
+ * Clear the bit for the local CPU as the set bit would
+ * prevent the first tick_broadcast_enter() after this CPU
+ * switched to oneshot state to program the broadcast
+ * device.
+ *
+ * This code can also be reached via tick_broadcast_control(),
+ * but this cannot avoid the tick_broadcast_clear_oneshot()
+ * as that would break the periodic to oneshot transition of
+ * secondary CPUs. But that's harmless as the below only
+ * clears already cleared bits.
*/
+ tick_broadcast_clear_oneshot(cpu);
+ return;
+ }
+
+
+ bc->event_handler = tick_handle_oneshot_broadcast;
+ bc->next_event = KTIME_MAX;
+
+ /*
+ * When the tick mode is switched from periodic to oneshot it must
+ * be ensured that CPUs which are waiting for periodic broadcast
+ * get their wake-up at the next tick. This is achieved by ORing
+ * tick_broadcast_mask into tick_broadcast_oneshot_mask.
+ *
+ * For other callers, e.g. broadcast device replacement,
+ * tick_broadcast_oneshot_mask must not be touched as this would
+ * set bits for CPUs which are already NOHZ, but not idle. Their
+ * next tick_broadcast_enter() would observe the bit set and fail
+ * to update the expiry time and the broadcast event device.
+ */
+ if (from_periodic) {
cpumask_copy(tmpmask, tick_broadcast_mask);
+ /* Remove the local CPU as it is obviously not idle */
cpumask_clear_cpu(cpu, tmpmask);
- cpumask_or(tick_broadcast_oneshot_mask,
- tick_broadcast_oneshot_mask, tmpmask);
+ cpumask_or(tick_broadcast_oneshot_mask, tick_broadcast_oneshot_mask, tmpmask);
- if (was_periodic && !cpumask_empty(tmpmask)) {
- ktime_t nextevt = tick_get_next_period();
+ /*
+ * Ensure that the oneshot broadcast handler will wake the
+ * CPUs which are still waiting for periodic broadcast.
+ */
+ nexttick = tick_get_next_period();
+ tick_broadcast_init_next_event(tmpmask, nexttick);
- clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
- tick_broadcast_init_next_event(tmpmask, nextevt);
- tick_broadcast_set_event(bc, cpu, nextevt);
- } else
- bc->next_event = KTIME_MAX;
- } else {
/*
- * The first cpu which switches to oneshot mode sets
- * the bit for all other cpus which are in the general
- * (periodic) broadcast mask. So the bit is set and
- * would prevent the first broadcast enter after this
- * to program the bc device.
+ * If the underlying broadcast clock event device is
+ * already in oneshot state, then there is nothing to do.
+ * The device was already armed for the next tick
+ * in tick_handle_broadcast_periodic()
*/
- tick_broadcast_clear_oneshot(cpu);
+ if (clockevent_state_oneshot(bc))
+ return;
}
+
+ /*
+ * When switching from periodic to oneshot mode arm the broadcast
+ * device for the next tick.
+ *
+ * If the broadcast device has been replaced in oneshot mode and
+ * the oneshot broadcast mask is not empty, then arm it to expire
+ * immediately in order to reevaluate the next expiring timer.
+ * @nexttick is 0 and therefore in the past which will cause the
+ * clockevent code to force an event.
+ *
+ * For both cases the programming can be avoided when the oneshot
+ * broadcast mask is empty.
+ *
+ * tick_broadcast_set_event() implicitly switches the broadcast
+ * device to oneshot state.
+ */
+ if (!cpumask_empty(tick_broadcast_oneshot_mask))
+ tick_broadcast_set_event(bc, cpu, nexttick);
}
/*
@@ -1070,14 +1124,16 @@ static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
void tick_broadcast_switch_to_oneshot(void)
{
struct clock_event_device *bc;
+ enum tick_device_mode oldmode;
unsigned long flags;
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
+ oldmode = tick_broadcast_device.mode;
tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
bc = tick_broadcast_device.evtdev;
if (bc)
- tick_broadcast_setup_oneshot(bc);
+ tick_broadcast_setup_oneshot(bc, oldmode == TICKDEV_MODE_PERIODIC);
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 46789356f856..65b8658da829 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -218,9 +218,19 @@ static void tick_setup_device(struct tick_device *td,
* this cpu:
*/
if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
+ ktime_t next_p;
+ u32 rem;
+
tick_do_timer_cpu = cpu;
- tick_next_period = ktime_get();
+ next_p = ktime_get();
+ div_u64_rem(next_p, TICK_NSEC, &rem);
+ if (rem) {
+ next_p -= rem;
+ next_p += TICK_NSEC;
+ }
+
+ tick_next_period = next_p;
#ifdef CONFIG_NO_HZ_FULL
/*
* The boot CPU may be nohz_full, in which case set
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b0e3c9205946..52254679ec48 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -281,6 +281,11 @@ static bool check_tick_dependency(atomic_t *dep)
return true;
}
+ if (val & TICK_DEP_MASK_RCU_EXP) {
+ trace_tick_stop(0, TICK_DEP_MASK_RCU_EXP);
+ return true;
+ }
+
return false;
}
@@ -527,7 +532,7 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask)
tick_nohz_full_running = true;
}
-static int tick_nohz_cpu_down(unsigned int cpu)
+bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
{
/*
* The tick_do_timer_cpu CPU handles housekeeping duty (unbound
@@ -535,8 +540,13 @@ static int tick_nohz_cpu_down(unsigned int cpu)
* CPUs. It must remain online when nohz full is enabled.
*/
if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
- return -EBUSY;
- return 0;
+ return false;
+ return true;
+}
+
+static int tick_nohz_cpu_down(unsigned int cpu)
+{
+ return tick_nohz_cpu_hotpluggable(cpu) ? 0 : -EBUSY;
}
void __init tick_nohz_init(void)
@@ -637,43 +647,67 @@ static void tick_nohz_update_jiffies(ktime_t now)
touch_softlockup_watchdog_sched();
}
-/*
- * Updates the per-CPU time idle statistics counters
- */
-static void
-update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
+static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
{
ktime_t delta;
- if (ts->idle_active) {
- delta = ktime_sub(now, ts->idle_entrytime);
- if (nr_iowait_cpu(cpu) > 0)
- ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
- else
- ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
- ts->idle_entrytime = now;
- }
+ if (WARN_ON_ONCE(!ts->idle_active))
+ return;
- if (last_update_time)
- *last_update_time = ktime_to_us(now);
+ delta = ktime_sub(now, ts->idle_entrytime);
-}
+ write_seqcount_begin(&ts->idle_sleeptime_seq);
+ if (nr_iowait_cpu(smp_processor_id()) > 0)
+ ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
+ else
+ ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
-static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
-{
- update_ts_time_stats(smp_processor_id(), ts, now, NULL);
+ ts->idle_entrytime = now;
ts->idle_active = 0;
+ write_seqcount_end(&ts->idle_sleeptime_seq);
sched_clock_idle_wakeup_event();
}
static void tick_nohz_start_idle(struct tick_sched *ts)
{
+ write_seqcount_begin(&ts->idle_sleeptime_seq);
ts->idle_entrytime = ktime_get();
ts->idle_active = 1;
+ write_seqcount_end(&ts->idle_sleeptime_seq);
+
sched_clock_idle_sleep_event();
}
+static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime,
+ bool compute_delta, u64 *last_update_time)
+{
+ ktime_t now, idle;
+ unsigned int seq;
+
+ if (!tick_nohz_active)
+ return -1;
+
+ now = ktime_get();
+ if (last_update_time)
+ *last_update_time = ktime_to_us(now);
+
+ do {
+ seq = read_seqcount_begin(&ts->idle_sleeptime_seq);
+
+ if (ts->idle_active && compute_delta) {
+ ktime_t delta = ktime_sub(now, ts->idle_entrytime);
+
+ idle = ktime_add(*sleeptime, delta);
+ } else {
+ idle = *sleeptime;
+ }
+ } while (read_seqcount_retry(&ts->idle_sleeptime_seq, seq));
+
+ return ktime_to_us(idle);
+
+}
+
/**
* get_cpu_idle_time_us - get the total idle time of a CPU
* @cpu: CPU number to query
@@ -681,7 +715,10 @@ static void tick_nohz_start_idle(struct tick_sched *ts)
* counters if NULL.
*
* Return the cumulative idle time (since boot) for a given
- * CPU, in microseconds.
+ * CPU, in microseconds. Note this is partially broken due to
+ * the counter of iowait tasks that can be remotely updated without
+ * any synchronization. Therefore it is possible to observe backward
+ * values within two consecutive reads.
*
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
@@ -691,27 +728,9 @@ static void tick_nohz_start_idle(struct tick_sched *ts)
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
- ktime_t now, idle;
-
- if (!tick_nohz_active)
- return -1;
-
- now = ktime_get();
- if (last_update_time) {
- update_ts_time_stats(cpu, ts, now, last_update_time);
- idle = ts->idle_sleeptime;
- } else {
- if (ts->idle_active && !nr_iowait_cpu(cpu)) {
- ktime_t delta = ktime_sub(now, ts->idle_entrytime);
-
- idle = ktime_add(ts->idle_sleeptime, delta);
- } else {
- idle = ts->idle_sleeptime;
- }
- }
-
- return ktime_to_us(idle);
+ return get_cpu_sleep_time_us(ts, &ts->idle_sleeptime,
+ !nr_iowait_cpu(cpu), last_update_time);
}
EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
@@ -722,7 +741,10 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
* counters if NULL.
*
* Return the cumulative iowait time (since boot) for a given
- * CPU, in microseconds.
+ * CPU, in microseconds. Note this is partially broken due to
+ * the counter of iowait tasks that can be remotely updated without
+ * any synchronization. Therefore it is possible to observe backward
+ * values within two consecutive reads.
*
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
@@ -732,26 +754,9 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
- ktime_t now, iowait;
-
- if (!tick_nohz_active)
- return -1;
-
- now = ktime_get();
- if (last_update_time) {
- update_ts_time_stats(cpu, ts, now, last_update_time);
- iowait = ts->iowait_sleeptime;
- } else {
- if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
- ktime_t delta = ktime_sub(now, ts->idle_entrytime);
- iowait = ktime_add(ts->iowait_sleeptime, delta);
- } else {
- iowait = ts->iowait_sleeptime;
- }
- }
-
- return ktime_to_us(iowait);
+ return get_cpu_sleep_time_us(ts, &ts->iowait_sleeptime,
+ nr_iowait_cpu(cpu), last_update_time);
}
EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
@@ -1084,10 +1089,16 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return true;
}
-static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
+/**
+ * tick_nohz_idle_stop_tick - stop the idle tick from the idle task
+ *
+ * When the next event is more than a tick into the future, stop the idle tick
+ */
+void tick_nohz_idle_stop_tick(void)
{
- ktime_t expires;
+ struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
int cpu = smp_processor_id();
+ ktime_t expires;
/*
* If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
@@ -1119,16 +1130,6 @@ static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
}
}
-/**
- * tick_nohz_idle_stop_tick - stop the idle tick from the idle task
- *
- * When the next event is more than a tick into the future, stop the idle tick
- */
-void tick_nohz_idle_stop_tick(void)
-{
- __tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
-}
-
void tick_nohz_idle_retain_tick(void)
{
tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 504649513399..5ed5a9d41d5a 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -22,65 +22,82 @@ enum tick_nohz_mode {
/**
* struct tick_sched - sched tick emulation and no idle tick control/stats
- * @sched_timer: hrtimer to schedule the periodic tick in high
- * resolution mode
- * @check_clocks: Notification mechanism about clocksource changes
- * @nohz_mode: Mode - one state of tick_nohz_mode
+ *
* @inidle: Indicator that the CPU is in the tick idle mode
* @tick_stopped: Indicator that the idle tick has been stopped
* @idle_active: Indicator that the CPU is actively in the tick idle mode;
* it is reset during irq handling phases.
- * @do_timer_lst: CPU was the last one doing do_timer before going idle
+ * @do_timer_last: CPU was the last one doing do_timer before going idle
* @got_idle_tick: Tick timer function has run with @inidle set
+ * @stalled_jiffies: Number of stalled jiffies detected across ticks
+ * @last_tick_jiffies: Value of jiffies seen on last tick
+ * @sched_timer: hrtimer to schedule the periodic tick in high
+ * resolution mode
* @last_tick: Store the last tick expiry time when the tick
* timer is modified for nohz sleeps. This is necessary
* to resume the tick timer operation in the timeline
* when the CPU returns from nohz sleep.
* @next_tick: Next tick to be fired when in dynticks mode.
* @idle_jiffies: jiffies at the entry to idle for idle time accounting
+ * @idle_waketime: Time when the idle was interrupted
+ * @idle_entrytime: Time when the idle call was entered
+ * @nohz_mode: Mode - one state of tick_nohz_mode
+ * @last_jiffies: Base jiffies snapshot when next event was last computed
+ * @timer_expires_base: Base time clock monotonic for @timer_expires
+ * @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
+ * @next_timer: Expiry time of next expiring timer for debugging purpose only
+ * @idle_expires: Next tick in idle, for debugging purpose only
* @idle_calls: Total number of idle calls
* @idle_sleeps: Number of idle calls, where the sched tick was stopped
- * @idle_entrytime: Time when the idle call was entered
- * @idle_waketime: Time when the idle was interrupted
* @idle_exittime: Time when the idle state was left
* @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
* @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding
- * @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
- * @timer_expires_base: Base time clock monotonic for @timer_expires
- * @next_timer: Expiry time of next expiring timer for debugging purpose only
* @tick_dep_mask: Tick dependency mask - is set, if someone needs the tick
- * @last_tick_jiffies: Value of jiffies seen on last tick
- * @stalled_jiffies: Number of stalled jiffies detected across ticks
+ * @check_clocks: Notification mechanism about clocksource changes
*/
struct tick_sched {
- struct hrtimer sched_timer;
- unsigned long check_clocks;
- enum tick_nohz_mode nohz_mode;
-
+ /* Common flags */
unsigned int inidle : 1;
unsigned int tick_stopped : 1;
unsigned int idle_active : 1;
unsigned int do_timer_last : 1;
unsigned int got_idle_tick : 1;
+ /* Tick handling: jiffies stall check */
+ unsigned int stalled_jiffies;
+ unsigned long last_tick_jiffies;
+
+ /* Tick handling */
+ struct hrtimer sched_timer;
ktime_t last_tick;
ktime_t next_tick;
unsigned long idle_jiffies;
- unsigned long idle_calls;
- unsigned long idle_sleeps;
- ktime_t idle_entrytime;
ktime_t idle_waketime;
- ktime_t idle_exittime;
- ktime_t idle_sleeptime;
- ktime_t iowait_sleeptime;
+
+ /* Idle entry */
+ seqcount_t idle_sleeptime_seq;
+ ktime_t idle_entrytime;
+
+ /* Tick stop */
+ enum tick_nohz_mode nohz_mode;
unsigned long last_jiffies;
- u64 timer_expires;
u64 timer_expires_base;
+ u64 timer_expires;
u64 next_timer;
ktime_t idle_expires;
+ unsigned long idle_calls;
+ unsigned long idle_sleeps;
+
+ /* Idle exit */
+ ktime_t idle_exittime;
+ ktime_t idle_sleeptime;
+ ktime_t iowait_sleeptime;
+
+ /* Full dynticks handling */
atomic_t tick_dep_mask;
- unsigned long last_tick_jiffies;
- unsigned int stalled_jiffies;
+
+ /* Clocksource changes */
+ unsigned long check_clocks;
};
extern struct tick_sched *tick_get_tick_sched(int cpu);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5579ead449f2..09d594900ee0 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -526,7 +526,7 @@ EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
* partially updated. Since the tk->offs_boot update is a rare event, this
* should be a rare occurrence which postprocessing should be able to handle.
*
- * The caveats vs. timestamp ordering as documented for ktime_get_fast_ns()
+ * The caveats vs. timestamp ordering as documented for ktime_get_mono_fast_ns()
* apply as well.
*/
u64 notrace ktime_get_boot_fast_ns(void)
@@ -576,7 +576,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
/**
* ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime.
*
- * See ktime_get_fast_ns() for documentation of the time stamp ordering.
+ * See ktime_get_mono_fast_ns() for documentation of the time stamp ordering.
*/
u64 ktime_get_real_fast_ns(void)
{